From 9b6d5690b543b8fbf3daf8fc2d7ec7b59b3a3467 Mon Sep 17 00:00:00 2001 From: sachin agarwal Date: Sun, 9 Feb 2020 16:46:20 +0530 Subject: [PATCH 001/562] gpio: ich: fix a typo We had written "Mangagment" rather than "Management". Signed-off-by: Sachin Agarwal Signed-off-by: Andy Shevchenko --- drivers/gpio/gpio-ich.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-ich.c b/drivers/gpio/gpio-ich.c index 2f086d0aa1f4..9960bb8b0f5b 100644 --- a/drivers/gpio/gpio-ich.c +++ b/drivers/gpio/gpio-ich.c @@ -89,7 +89,7 @@ static struct { struct device *dev; struct gpio_chip chip; struct resource *gpio_base; /* GPIO IO base */ - struct resource *pm_base; /* Power Mangagment IO base */ + struct resource *pm_base; /* Power Management IO base */ struct ichx_desc *desc; /* Pointer to chipset-specific description */ u32 orig_gpio_ctrl; /* Orig CTRL value, used to restore on exit */ u8 use_gpio; /* Which GPIO groups are usable */ From 4e2dec66e2ae4f3b44e0b16311391eaf2e95bb95 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 8 Apr 2020 17:46:15 +0200 Subject: [PATCH 002/562] scsi: docs: fusion: get rid of a doc build warning Use a table for the enum list, to avoid this warning: ./drivers/message/fusion/mptbase.c:5058: WARNING: Definition list ends without a blank line; unexpected unindent. Link: https://lore.kernel.org/r/9ca049c2d25689c56448afddf4f0d1e619fa87f7.1586359676.git.mchehab+huawei@kernel.org Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Martin K. Petersen --- drivers/message/fusion/mptbase.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c index c2dd322691d1..68aea22f2b89 100644 --- a/drivers/message/fusion/mptbase.c +++ b/drivers/message/fusion/mptbase.c @@ -5052,9 +5052,11 @@ GetLanConfigPages(MPT_ADAPTER *ioc) * @ioc: Pointer to MPT_ADAPTER structure * @persist_opcode: see below * - * MPI_SAS_OP_CLEAR_NOT_PRESENT - Free all persist TargetID mappings for - * devices not currently present. - * MPI_SAS_OP_CLEAR_ALL_PERSISTENT - Clear al persist TargetID mappings + * =============================== ====================================== + * MPI_SAS_OP_CLEAR_NOT_PRESENT Free all persist TargetID mappings for + * devices not currently present. + * MPI_SAS_OP_CLEAR_ALL_PERSISTENT Clear al persist TargetID mappings + * =============================== ====================================== * * NOTE: Don't use not this function during interrupt time. * From fbbef0dad79f31baca4fdc09779918c70c1b26ca Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Fri, 3 Apr 2020 11:27:17 +0200 Subject: [PATCH 003/562] scsi: qedf: Simplify mutex_unlock() usage The commit 6d1368e8f987 ("scsi: qedf: fixup locking in qedf_restart_rport()") introduced the lock. Though the lock protects only the fc_rport_create() call. Thus, we can move the mutex unlock up before the if statement and drop the else body. Link: https://lore.kernel.org/r/20200403092717.19779-1-dwagner@suse.de Cc: Hannes Reinecke Cc: Saurav Kashyap Signed-off-by: Daniel Wagner Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_els.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/qedf/qedf_els.c b/drivers/scsi/qedf/qedf_els.c index 87e169dcebdb..542ba9454257 100644 --- a/drivers/scsi/qedf/qedf_els.c +++ b/drivers/scsi/qedf/qedf_els.c @@ -388,14 +388,10 @@ void qedf_restart_rport(struct qedf_rport *fcport) mutex_lock(&lport->disc.disc_mutex); /* Recreate the rport and log back in */ rdata = fc_rport_create(lport, port_id); - if (rdata) { - mutex_unlock(&lport->disc.disc_mutex); + mutex_unlock(&lport->disc.disc_mutex); + if (rdata) fc_rport_login(rdata); - fcport->rdata = rdata; - } else { - mutex_unlock(&lport->disc.disc_mutex); - fcport->rdata = NULL; - } + fcport->rdata = rdata; } clear_bit(QEDF_RPORT_IN_RESET, &fcport->flags); } From 2f1ea39870c95aa9fff6a0b48757625a0b22f551 Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Tue, 7 Apr 2020 23:43:27 -0700 Subject: [PATCH 004/562] scsi: qedi: Remove additional char from boot target iqnname While parsing the iSCSI TLV data to MFW request, a newline was added to the firmware boot target iqnname string. Because of this, we were getting the following error even after the boot target was successfully logged in: "[qedi_get_protocol_tlv_data:1197]:1: Boot target not set" Remove the trailing newline. [mkp: clarified commit desc] Link: https://lore.kernel.org/r/20200408064332.19377-2-mrangankar@marvell.com Reviewed-by: Lee Duncan Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index b995b19865ca..2fe114074455 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -921,7 +921,7 @@ static void qedi_get_boot_tgt_info(struct nvm_iscsi_block *block, ipv6_en = !!(block->generic.ctrl_flags & NVM_ISCSI_CFG_GEN_IPV6_ENABLED); - snprintf(tgt->iscsi_name, sizeof(tgt->iscsi_name), "%s\n", + snprintf(tgt->iscsi_name, sizeof(tgt->iscsi_name), "%s", block->target[index].target_name.byte); tgt->ipv6_en = ipv6_en; From 2e612fab05112d9e40add3c3cb75a0e86967d53c Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Tue, 7 Apr 2020 23:43:28 -0700 Subject: [PATCH 005/562] scsi: qedi: Avoid unnecessary endpoint allocation on link down No need to allocate and deallocate endpoint memory if the physical link is down. Link: https://lore.kernel.org/r/20200408064332.19377-3-mrangankar@marvell.com Reviewed-by: Lee Duncan Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_iscsi.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index 1f4a5fb00a05..26b11515f43e 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -836,6 +836,11 @@ qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, return ERR_PTR(ret); } + if (atomic_read(&qedi->link_state) != QEDI_LINK_UP) { + QEDI_WARN(&qedi->dbg_ctx, "qedi link down\n"); + return ERR_PTR(-ENXIO); + } + ep = iscsi_create_endpoint(sizeof(struct qedi_endpoint)); if (!ep) { QEDI_ERR(&qedi->dbg_ctx, "endpoint create fail\n"); @@ -870,12 +875,6 @@ qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, QEDI_ERR(&qedi->dbg_ctx, "Invalid endpoint\n"); } - if (atomic_read(&qedi->link_state) != QEDI_LINK_UP) { - QEDI_WARN(&qedi->dbg_ctx, "qedi link down\n"); - ret = -ENXIO; - goto ep_conn_exit; - } - ret = qedi_alloc_sq(qedi, qedi_ep); if (ret) goto ep_conn_exit; From cf9e672dd85561a0fb7e5595b4e40efa291fc59b Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Tue, 7 Apr 2020 23:43:29 -0700 Subject: [PATCH 006/562] scsi: qedi: Use correct msix count for fastpath vectors Use MSI-X count provided by qed. Link: https://lore.kernel.org/r/20200408064332.19377-4-mrangankar@marvell.com Reviewed-by: Lee Duncan Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 2fe114074455..f1d998c5f540 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -1360,7 +1360,7 @@ static int qedi_request_msix_irq(struct qedi_ctx *qedi) u16 idx; cpu = cpumask_first(cpu_online_mask); - for (i = 0; i < MIN_NUM_CPUS_MSIX(qedi); i++) { + for (i = 0; i < qedi->int_info.msix_cnt; i++) { idx = i * qedi->dev_info.common.num_hwfns + qedi_ops->common->get_affin_hwfn_idx(qedi->cdev); From 927527aea0e2a9c1d336c7d33f77f1911481d008 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Tue, 7 Apr 2020 23:43:30 -0700 Subject: [PATCH 007/562] scsi: qedi: Do not flush offload work if ARP not resolved For an unreachable target, offload_work is not initialized and the endpoint state is set to OFLDCONN_NONE. This results in a WARN_ON due to the check of the work function field being set to zero. ------------[ cut here ]------------ WARNING: CPU: 24 PID: 18587 at ../kernel/workqueue.c:3037 __flush_work+0x1c1/0x1d0 : Hardware name: HPE ProLiant DL380 Gen10/ProLiant DL380 Gen10, BIOS U30 02/01/2020 RIP: 0010:__flush_work+0x1c1/0x1d0 Code: ba 6d 00 03 80 c9 f0 eb b6 48 c7 c7 20 ee 6c a4 e8 52 d3 04 00 0f 0b 31 c0 e9 d1 fe ff ff 48 c7 c7 20 ee 6c a4 e8 3d d3 04 00 <0f> 0b 31 c0 e9 bc fe ff ff e8 11 f3 f 00 31 f6 RSP: 0018:ffffac5a8cd47a80 EFLAGS: 00010282 RAX: 0000000000000024 RBX: ffff98d68c1fcaf0 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff98ce9fd99898 RDI: ffff98ce9fd99898 RBP: ffff98d68c1fcbc0 R08: 00000000000006fa R09: 0000000000000001 R10: ffffac5a8cd47b50 R11: 0000000000000001 R12: 0000000000000000 R13: 000000000000489b R14: ffff98d68c1fc800 R15: ffff98d692132c00 FS: 00007f65f7f62280(0000) GS:ffff98ce9fd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffd2435e880 CR3: 0000000809334003 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? class_create_release+0x40/0x40 ? klist_put+0x2c/0x80 qedi_ep_disconnect+0xdd/0x400 [qedi] iscsi_if_ep_disconnect.isra.20+0x59/0x70 [scsi_transport_iscsi] iscsi_if_rx+0x129b/0x1670 [scsi_transport_iscsi] ? __netlink_lookup+0xe7/0x160 netlink_unicast+0x21d/0x300 netlink_sendmsg+0x30f/0x430 sock_sendmsg+0x5b/0x60 ____sys_sendmsg+0x1e2/0x240 ? copy_msghdr_from_user+0xd9/0x160 ___sys_sendmsg+0x88/0xd0 ? ___sys_recvmsg+0xa2/0xe0 ? hrtimer_try_to_cancel+0x25/0x100 ? do_nanosleep+0x9c/0x170 ? __sys_sendmsg+0x5e/0xa0 __sys_sendmsg+0x5e/0xa0 do_syscall_64+0x60/0x1f0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f65f6f16107 Code: 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 80 00 00 00 00 8b 05 aa d2 2b 00 48 63 d2 48 63 ff 85 c0 75 18 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 59 f3 c3 0f 1f 8 0 00 00 00 00 53 48 89 f3 48 RSP: 002b:00007ffd24367ca8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 000055a7aeaaf110 RCX: 00007f65f6f16107 RDX: 0000000000000000 RSI: 00007ffd24367cc0 RDI: 0000000000000003 RBP: 0000000000000070 R08: 0000000000000000 R09: 0000000000000000 R10: 000000000000075c R11: 0000000000000246 R12: 00007ffd24367cc0 R13: 000055a7ae560008 R14: 00007ffd24367db0 R15: 0000000000000000 ---[ end trace 54f499c05d41f8bb ]--- Only flush if the connection endpoint state if different from OFLDCONN_NONE. [mkp: clarified commit desc] Link: https://lore.kernel.org/r/20200408064332.19377-5-mrangankar@marvell.com Reviewed-by: Lee Duncan Signed-off-by: Nilesh Javali Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_iscsi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index 26b11515f43e..80c724bf16d7 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -1000,7 +1000,8 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep) if (qedi_ep->state == EP_STATE_OFLDCONN_START) goto ep_exit_recover; - flush_work(&qedi_ep->offload_work); + if (qedi_ep->state != EP_STATE_OFLDCONN_NONE) + flush_work(&qedi_ep->offload_work); if (qedi_ep->conn) { qedi_conn = qedi_ep->conn; From c6bfa707207c34674d45235e44e63f9a5801232f Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Tue, 7 Apr 2020 23:43:31 -0700 Subject: [PATCH 008/562] scsi: qedi: Add modules param to enable qed iSCSI debug Add module parameter to enable debug messages specific to iSCSI functions. Link: https://lore.kernel.org/r/20200408064332.19377-6-mrangankar@marvell.com Reviewed-by: Lee Duncan Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index f1d998c5f540..b9a5c842a76e 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -28,6 +28,10 @@ #include "qedi_gbl.h" #include "qedi_iscsi.h" +static uint qedi_qed_debug; +module_param(qedi_qed_debug, uint, 0644); +MODULE_PARM_DESC(qedi_qed_debug, " QED debug level 0 (default)"); + static uint qedi_fw_debug; module_param(qedi_fw_debug, uint, 0644); MODULE_PARM_DESC(qedi_fw_debug, " Firmware debug level 0(default) to 3"); @@ -2422,7 +2426,6 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) { struct qedi_ctx *qedi; struct qed_ll2_params params; - u32 dp_module = 0; u8 dp_level = 0; bool is_vf = false; char host_buf[16]; @@ -2445,7 +2448,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) memset(&qed_params, 0, sizeof(qed_params)); qed_params.protocol = QED_PROTOCOL_ISCSI; - qed_params.dp_module = dp_module; + qed_params.dp_module = qedi_qed_debug; qed_params.dp_level = dp_level; qed_params.is_vf = is_vf; qedi->cdev = qedi_ops->common->probe(pdev, &qed_params); From b9b97e6903032ec56e6dcbe137a9819b74a17fea Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Tue, 7 Apr 2020 23:43:32 -0700 Subject: [PATCH 009/562] scsi: qedi: Fix termination timeouts in session logout The destroy connection ramrod timed out during session logout. Fix the wait delay for graceful vs abortive termination as per the FW requirements. Link: https://lore.kernel.org/r/20200408064332.19377-7-mrangankar@marvell.com Reviewed-by: Lee Duncan Signed-off-by: Nilesh Javali Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_iscsi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index 80c724bf16d7..b867a143d263 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -1065,6 +1065,9 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep) break; } + if (!abrt_conn) + wait_delay += qedi->pf_params.iscsi_pf_params.two_msl_timer; + qedi_ep->state = EP_STATE_DISCONN_START; ret = qedi_ops->destroy_conn(qedi->cdev, qedi_ep->handle, abrt_conn); if (ret) { From 21e855cdfeb91cf2fe92cd608c3f1d657fc73c5c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sun, 5 Apr 2020 15:59:05 -0700 Subject: [PATCH 010/562] scsi: qla2xxx: Split qla2x00_configure_local_loop() The size of the function qla2x00_configure_local_loop() hurts its readability. Hence split that function. This patch does not change any functionality. Link: https://lore.kernel.org/r/20200405225905.17171-1-bvanassche@acm.org Cc: Nilesh Javali Cc: Himanshu Madhani Cc: Quinn Tran Cc: Martin Wilck Cc: Daniel Wagner Cc: Roman Bolshakov Reviewed-by: Roman Bolshakov Reviewed-by: Himanshu Madhani Reviewed-by: Daniel Wagner Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_init.c | 92 ++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 42 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 5b2deaa730bf..80390d3f3236 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -5081,6 +5081,54 @@ qla2x00_configure_loop(scsi_qla_host_t *vha) return (rval); } +static int qla2x00_configure_n2n_loop(scsi_qla_host_t *vha) +{ + struct qla_hw_data *ha = vha->hw; + unsigned long flags; + fc_port_t *fcport; + int rval; + + if (test_and_clear_bit(N2N_LOGIN_NEEDED, &vha->dpc_flags)) { + /* borrowing */ + u32 *bp, sz; + + memset(ha->init_cb, 0, ha->init_cb_size); + sz = min_t(int, sizeof(struct els_plogi_payload), + ha->init_cb_size); + rval = qla24xx_get_port_login_templ(vha, ha->init_cb_dma, + ha->init_cb, sz); + if (rval == QLA_SUCCESS) { + __be32 *q = &ha->plogi_els_payld.data[0]; + + bp = (uint32_t *)ha->init_cb; + cpu_to_be32_array(q, bp, sz / 4); + memcpy(bp, q, sizeof(ha->plogi_els_payld.data)); + } else { + ql_dbg(ql_dbg_init, vha, 0x00d1, + "PLOGI ELS param read fail.\n"); + goto skip_login; + } + } + + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (fcport->n2n_flag) { + qla24xx_fcport_handle_login(vha, fcport); + return QLA_SUCCESS; + } + } + +skip_login: + spin_lock_irqsave(&vha->work_lock, flags); + vha->scan.scan_retry++; + spin_unlock_irqrestore(&vha->work_lock, flags); + + if (vha->scan.scan_retry < MAX_SCAN_RETRIES) { + set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); + set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); + } + return QLA_FUNCTION_FAILED; +} + /* * qla2x00_configure_local_loop * Updates Fibre Channel Device Database with local loop devices. @@ -5098,7 +5146,6 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha) int found_devs; int found; fc_port_t *fcport, *new_fcport; - uint16_t index; uint16_t entries; struct gid_list_info *gid; @@ -5108,47 +5155,8 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha) unsigned long flags; /* Inititae N2N login. */ - if (N2N_TOPO(ha)) { - if (test_and_clear_bit(N2N_LOGIN_NEEDED, &vha->dpc_flags)) { - /* borrowing */ - u32 *bp, sz; - - memset(ha->init_cb, 0, ha->init_cb_size); - sz = min_t(int, sizeof(struct els_plogi_payload), - ha->init_cb_size); - rval = qla24xx_get_port_login_templ(vha, - ha->init_cb_dma, (void *)ha->init_cb, sz); - if (rval == QLA_SUCCESS) { - __be32 *q = &ha->plogi_els_payld.data[0]; - - bp = (uint32_t *)ha->init_cb; - cpu_to_be32_array(q, bp, sz / 4); - - memcpy(bp, q, sizeof(ha->plogi_els_payld.data)); - } else { - ql_dbg(ql_dbg_init, vha, 0x00d1, - "PLOGI ELS param read fail.\n"); - goto skip_login; - } - } - - list_for_each_entry(fcport, &vha->vp_fcports, list) { - if (fcport->n2n_flag) { - qla24xx_fcport_handle_login(vha, fcport); - return QLA_SUCCESS; - } - } -skip_login: - spin_lock_irqsave(&vha->work_lock, flags); - vha->scan.scan_retry++; - spin_unlock_irqrestore(&vha->work_lock, flags); - - if (vha->scan.scan_retry < MAX_SCAN_RETRIES) { - set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); - set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); - } - return QLA_FUNCTION_FAILED; - } + if (N2N_TOPO(ha)) + return qla2x00_configure_n2n_loop(vha); found_devs = 0; new_fcport = NULL; From 7fc504b993cee79e38c0f018bf3c38940873bebd Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Wed, 1 Apr 2020 13:49:37 -0400 Subject: [PATCH 011/562] gpio: xgene-sb: set valid IRQ type in to_irq() xgene-sb is setup to be a hierarchical IRQ chip with the GIC as the parent chip. xgene_gpio_sb_to_irq() currently sets the default IRQ type to IRQ_TYPE_NONE, which the GIC loudly complains about with a WARN_ON(). Let's set the initial default to a sane value (IRQ_TYPE_EDGE_RISING) that was determined by decoding the ACPI tables on affected hardware: Device (_SB.GPSB) { Name (_HID, "APMC0D15") // _HID: Hardware ID Name (_CID, "APMC0D15") // _CID: Compatible ID Name (_UID, "GPIOSB") // _UID: Unique ID ... Name (_CRS, ResourceTemplate () // _CRS: Current Resource Settings { ... Interrupt (ResourceConsumer, Edge, ActiveHigh, Exclusive, ,, ) { 0x00000048, } ... } } This can be overridden later as needed with irq_set_irq_type(). Signed-off-by: Brian Masney Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-xgene-sb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-xgene-sb.c b/drivers/gpio/gpio-xgene-sb.c index 25d86441666e..b45bfa9baa26 100644 --- a/drivers/gpio/gpio-xgene-sb.c +++ b/drivers/gpio/gpio-xgene-sb.c @@ -122,7 +122,7 @@ static int xgene_gpio_sb_to_irq(struct gpio_chip *gc, u32 gpio) fwspec.fwnode = gc->parent->fwnode; fwspec.param_count = 2; fwspec.param[0] = GPIO_TO_HWIRQ(priv, gpio); - fwspec.param[1] = IRQ_TYPE_NONE; + fwspec.param[1] = IRQ_TYPE_EDGE_RISING; return irq_create_fwspec_mapping(&fwspec); } From 85a94ff8fb14511c88b8d21f82937197d76b82a2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 2 Apr 2020 22:21:45 +0300 Subject: [PATCH 012/562] gpio: Extend TODO to cover code duplication avoidance It appears at least two drivers has a lot of duplication code in GPIO subsystem. To avoid adding more and get rid of existing duplication extend TODO. Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/TODO | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpio/TODO b/drivers/gpio/TODO index 3a44e6ae52bd..b989c9352da2 100644 --- a/drivers/gpio/TODO +++ b/drivers/gpio/TODO @@ -99,6 +99,10 @@ similar and probe a proper driver in the gpiolib subsystem. In some cases it makes sense to create a GPIO chip from the local driver for a few GPIOs. Those should stay where they are. +At the same time it makes sense to get rid of code duplication in existing or +new coming drivers. For example, gpio-ml-ioh should be incorporated into +gpio-pch. In similar way gpio-intel-mid into gpio-pxa. + Generic MMIO GPIO From 616844408de7f21546c3c2a71ea7f8d364f45e0d Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 8 Apr 2020 19:41:10 -0600 Subject: [PATCH 013/562] gpio: pl061: Support building as module Enable building the PL061 GPIO driver as a module. This does change the initcall level when built-in. This shouldn't be a problem as any user should support deferred probe by now. A scan of DT based platforms at least didn't reveal any users that would be a problem. Cc: Linus Walleij Cc: Bartosz Golaszewski Cc: linux-gpio@vger.kernel.org Signed-off-by: Rob Herring Signed-off-by: Bartosz Golaszewski --- drivers/gpio/Kconfig | 2 +- drivers/gpio/gpio-pl061.c | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 1b96169d84f7..8ef2179fb999 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -422,7 +422,7 @@ config GPIO_OMAP Say yes here to enable GPIO support for TI OMAP SoCs. config GPIO_PL061 - bool "PrimeCell PL061 GPIO support" + tristate "PrimeCell PL061 GPIO support" depends on ARM_AMBA select IRQ_DOMAIN select GPIOLIB_IRQCHIP diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c index e241fb884c12..f1b53dd1df1a 100644 --- a/drivers/gpio/gpio-pl061.c +++ b/drivers/gpio/gpio-pl061.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -408,6 +409,7 @@ static const struct amba_id pl061_ids[] = { }, { 0, 0 }, }; +MODULE_DEVICE_TABLE(amba, pl061_ids); static struct amba_driver pl061_gpio_driver = { .drv = { @@ -419,9 +421,6 @@ static struct amba_driver pl061_gpio_driver = { .id_table = pl061_ids, .probe = pl061_probe, }; +module_amba_driver(pl061_gpio_driver); -static int __init pl061_gpio_init(void) -{ - return amba_driver_register(&pl061_gpio_driver); -} -device_initcall(pl061_gpio_init); +MODULE_LICENSE("GPL v2"); From 0184afd15a141d7ce24c32c0d86a1e3ba6bc0eb3 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Mon, 6 Apr 2020 20:35:01 +0300 Subject: [PATCH 014/562] RDMA/rxe: Set default vendor ID The RXE driver doesn't set vendor_id and user space applications see zeros. This causes to pyverbs tests to fail with the following traceback, because the expectation is to have valid vendor_id. Traceback (most recent call last): File "tests/test_device.py", line 51, in test_query_device self.verify_device_attr(attr) File "tests/test_device.py", line 77, in verify_device_attr assert attr.vendor_id != 0 In order to fix it, we will set vendor_id 0XFFFFFF, according to the IBTA v1.4 A3.3.1 VENDOR INFORMATION section. """ A vendor that produces a generic controller (i.e., one that supports a standard I/O protocol such as SRP), which does not have vendor specific device drivers, may use the value of 0xFFFFFF in the VendorID field. """ Before: hca_id: rxe0 transport: InfiniBand (0) fw_ver: 0.0.0 node_guid: 5054:00ff:feaa:5363 sys_image_guid: 5054:00ff:feaa:5363 vendor_id: 0x0000 After: hca_id: rxe0 transport: InfiniBand (0) fw_ver: 0.0.0 node_guid: 5054:00ff:feaa:5363 sys_image_guid: 5054:00ff:feaa:5363 vendor_id: 0xffffff Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20200406173501.1466273-1-leon@kernel.org Signed-off-by: Zhu Yanjun Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe.c | 1 + drivers/infiniband/sw/rxe/rxe_param.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 4afdd2e20883..5642eefb4ba1 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -77,6 +77,7 @@ static void rxe_init_device_param(struct rxe_dev *rxe) { rxe->max_inline_data = RXE_MAX_INLINE_DATA; + rxe->attr.vendor_id = RXE_VENDOR_ID; rxe->attr.max_mr_size = RXE_MAX_MR_SIZE; rxe->attr.page_size_cap = RXE_PAGE_SIZE_CAP; rxe->attr.max_qp = RXE_MAX_QP; diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index f59616b02477..99e9d8ba9767 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -127,6 +127,9 @@ enum rxe_device_param { /* Delay before calling arbiter timer */ RXE_NSEC_ARB_TIMER_DELAY = 200, + + /* IBTA v1.4 A3.3.1 VENDOR INFORMATION section */ + RXE_VENDOR_ID = 0XFFFFFF, }; /* default/initial rxe port parameters */ From cf26deff9036cd3270af562dbec545239e5c7f07 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 6 Apr 2020 20:35:40 +0300 Subject: [PATCH 015/562] RDMA/mlx5: Fix udata response upon SRQ creation Fix udata response upon SRQ creation to use the UAPI structure (i.e. mlx5_ib_create_srq_resp). It did not zero the reserved field in userspace. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Link: https://lore.kernel.org/r/20200406173540.1466477-1-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/srq.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index b1a8a9175040..6d1ff13d2283 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -310,12 +310,18 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq, srq->msrq.event = mlx5_ib_srq_event; srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; - if (udata) - if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) { + if (udata) { + struct mlx5_ib_create_srq_resp resp = { + .srqn = srq->msrq.srqn, + }; + + if (ib_copy_to_udata(udata, &resp, min(udata->outlen, + sizeof(resp)))) { mlx5_ib_dbg(dev, "copy to user failed\n"); err = -EFAULT; goto err_core; } + } init_attr->attr.max_wr = srq->msrq.max - 1; From eb356e6dc15a30af604f052cd0e170450193c254 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 6 Apr 2020 21:44:26 -0300 Subject: [PATCH 016/562] RDMA/uverbs: Make the event_queue fds return POLLERR when disassociated If is_closed is set, and the event list is empty, then read() will return -EIO without blocking. After setting is_closed in ib_uverbs_free_event_queue(), we do trigger a wake_up on the poll_wait, but the fops->poll() function does not check it, so poll will continue to sleep on an empty list. Fixes: 14e23bd6d221 ("RDMA/core: Fix locking in ib_uverbs_event_read") Link: https://lore.kernel.org/r/0-v1-ace813388969+48859-uverbs_poll_fix%25jgg@mellanox.com Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 2d4083bf4a04..8710a3427146 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -296,6 +296,8 @@ static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue, spin_lock_irq(&ev_queue->lock); if (!list_empty(&ev_queue->event_list)) pollflags = EPOLLIN | EPOLLRDNORM; + else if (ev_queue->is_closed) + pollflags = EPOLLERR; spin_unlock_irq(&ev_queue->lock); return pollflags; From 1587982e705db1ac090b05a7006771c78d0e8417 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 7 Apr 2020 20:20:09 -0300 Subject: [PATCH 017/562] RDMA: Remove a few extra calls to ib_get_client_data() These four places already have easy access to the client data, just use that instead. Link: https://lore.kernel.org/r/0-v1-fae83f600b4a+68-less_get_client_data%25jgg@mellanox.com Acked-by: Ursula Braun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sa_query.c | 15 ++++++--------- drivers/infiniband/ulp/srpt/ib_srpt.c | 7 ++----- net/smc/smc_ib.c | 3 +-- 3 files changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 74e0058fcf9e..2dd326f2beed 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1412,17 +1412,13 @@ void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute) EXPORT_SYMBOL(ib_sa_pack_path); static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client, - struct ib_device *device, + struct ib_sa_device *sa_dev, u8 port_num) { - struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; unsigned long flags; bool ret = false; - if (!sa_dev) - return ret; - port = &sa_dev->port[port_num - sa_dev->start_port]; spin_lock_irqsave(&port->classport_lock, flags); if (!port->classport_info.valid) @@ -1450,8 +1446,8 @@ enum opa_pr_supported { * query is possible. */ static int opa_pr_query_possible(struct ib_sa_client *client, - struct ib_device *device, - u8 port_num, + struct ib_sa_device *sa_dev, + struct ib_device *device, u8 port_num, struct sa_path_rec *rec) { struct ib_port_attr port_attr; @@ -1459,7 +1455,7 @@ static int opa_pr_query_possible(struct ib_sa_client *client, if (ib_query_port(device, port_num, &port_attr)) return PR_NOT_SUPPORTED; - if (ib_sa_opa_pathrecord_support(client, device, port_num)) + if (ib_sa_opa_pathrecord_support(client, sa_dev, port_num)) return PR_OPA_SUPPORTED; if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) @@ -1574,7 +1570,8 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, query->sa_query.port = port; if (rec->rec_type == SA_PATH_REC_TYPE_OPA) { - status = opa_pr_query_possible(client, device, port_num, rec); + status = opa_pr_query_possible(client, sa_dev, device, port_num, + rec); if (status == PR_NOT_SUPPORTED) { ret = -EINVAL; goto err1; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 98552749d71c..9d02d8088f1c 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -135,14 +135,11 @@ static bool srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new) static void srpt_event_handler(struct ib_event_handler *handler, struct ib_event *event) { - struct srpt_device *sdev; + struct srpt_device *sdev = + container_of(handler, struct srpt_device, event_handler); struct srpt_port *sport; u8 port_num; - sdev = ib_get_client_data(event->device, &srpt_client); - if (!sdev || sdev->device != event->device) - return; - pr_debug("ASYNC event= %d on device= %s\n", event->event, dev_name(&sdev->device->dev)); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 04b6fefb8bce..e7e7c3c6e94a 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -588,9 +588,8 @@ static void smc_ib_add_dev(struct ib_device *ibdev) /* callback function for ib_unregister_client() */ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) { - struct smc_ib_device *smcibdev; + struct smc_ib_device *smcibdev = client_data; - smcibdev = ib_get_client_data(ibdev, &smc_ib_client); if (!smcibdev || smcibdev->ibdev != ibdev) return; ib_set_client_data(ibdev, &smc_ib_client, NULL); From 255e636df4133507254da13137e8d8524ef0794f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 14 Apr 2020 18:48:43 +0200 Subject: [PATCH 018/562] IB: Fix some documentation warnings Parsing verbs.c with kernel-doc produce some warnings: ./drivers/infiniband/core/verbs.c:2579: WARNING: Unexpected indentation. ./drivers/infiniband/core/verbs.c:2581: WARNING: Block quote ends without a blank line; unexpected unindent. ./drivers/infiniband/core/verbs.c:2613: WARNING: Unexpected indentation. ./drivers/infiniband/core/verbs.c:2579: WARNING: Unexpected indentation. ./drivers/infiniband/core/verbs.c:2581: WARNING: Block quote ends without a blank line; unexpected unindent. ./drivers/infiniband/core/verbs.c:2613: WARNING: Unexpected indentation. Address them by adding an extra blank line and converting the parameters on one of the arguments to a table. Link: https://lore.kernel.org/r/4c5466d0f450c5a9952138150c3485740b37f9c5.1586881715.git.mchehab+huawei@kernel.org Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 56a71337112c..3bfadd8effcc 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2574,6 +2574,7 @@ EXPORT_SYMBOL(ib_map_mr_sg_pi); * @page_size: page vector desired page size * * Constraints: + * * - The first sg element is allowed to have an offset. * - Each sg element must either be aligned to page_size or virtually * contiguous to the previous element. In case an sg element has a @@ -2607,10 +2608,12 @@ EXPORT_SYMBOL(ib_map_mr_sg); * @mr: memory region * @sgl: dma mapped scatterlist * @sg_nents: number of entries in sg - * @sg_offset_p: IN: start offset in bytes into sg - * OUT: offset in bytes for element n of the sg of the first + * @sg_offset_p: ==== ======================================================= + * IN start offset in bytes into sg + * OUT offset in bytes for element n of the sg of the first * byte that has not been processed where n is the return * value of this function. + * ==== ======================================================= * @set_page: driver page assignment function pointer * * Core service helper for drivers to convert the largest From 4f953089111d45d0e654080a1f2edec39f197c93 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Mon, 13 Apr 2020 10:42:04 +0800 Subject: [PATCH 019/562] IB/qib: Remove unused variable ret This patch fixes below warnings reported by coccicheck drivers/infiniband/hw/qib/qib_iba7322.c:6878:8-11: Unneeded variable: "ret". Return "0" on line 6907 drivers/infiniband/hw/qib/qib_iba7322.c:2378:5-8: Unneeded variable: "ret". Return "0" on line 2513 Link: https://lore.kernel.org/r/1586745724-107477-1-git-send-email-zou_wei@huawei.com Reported-by: Hulk Robot Signed-off-by: Zou Wei Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_iba7322.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 91d64dd71a8a..8bcbc884e5b6 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -2375,7 +2375,6 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) struct qib_devdata *dd = ppd->dd; u64 val, guid, ibc; unsigned long flags; - int ret = 0; /* * SerDes model not in Pd, but still need to @@ -2510,7 +2509,7 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) val | ERR_MASK_N(IBStatusChanged)); /* Always zero until we start messing with SerDes for real */ - return ret; + return 0; } /** @@ -6875,7 +6874,7 @@ static int init_sdma_7322_regs(struct qib_pportdata *ppd) struct qib_devdata *dd = ppd->dd; unsigned lastbuf, erstbuf; u64 senddmabufmask[3] = { 0 }; - int n, ret = 0; + int n; qib_write_kreg_port(ppd, krp_senddmabase, ppd->sdma_descq_phys); qib_sdma_7322_setlengen(ppd); @@ -6904,7 +6903,7 @@ static int init_sdma_7322_regs(struct qib_pportdata *ppd) qib_write_kreg_port(ppd, krp_senddmabufmask0, senddmabufmask[0]); qib_write_kreg_port(ppd, krp_senddmabufmask1, senddmabufmask[1]); qib_write_kreg_port(ppd, krp_senddmabufmask2, senddmabufmask[2]); - return ret; + return 0; } /* sdma_lock must be held */ From 99bf84e24eb83d1612598cee1807732bd194c23c Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 2 Apr 2020 14:12:12 -0400 Subject: [PATCH 020/562] RDMA/bnxt_re: Reduce device page size detection code Getting rid of the repeated code in the driver when deciding on the page size of the hardware ring memory. A new common function would translate the ring page size into device specific page size. Link: https://lore.kernel.org/r/1585851136-2316-2-git-send-email-devesh.sharma@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 138 ++++++--------------- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 72 ++++------- drivers/infiniband/hw/bnxt_re/qplib_res.h | 40 ++++++ 3 files changed, 103 insertions(+), 147 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 899a5d2c100e..d3bf9f665982 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -612,6 +612,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, struct cmdq_create_srq req; struct bnxt_qplib_pbl *pbl; u16 cmd_flags = 0; + u16 pg_sz_lvl; int rc, idx; hwq_attr.res = res; @@ -638,22 +639,11 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, req.srq_size = cpu_to_le16((u16)srq->hwq.max_elements); pbl = &srq->hwq.pbl[PBL_LVL_0]; - req.pg_size_lvl = cpu_to_le16((((u16)srq->hwq.level & - CMDQ_CREATE_SRQ_LVL_MASK) << - CMDQ_CREATE_SRQ_LVL_SFT) | - (pbl->pg_size == ROCE_PG_SIZE_4K ? - CMDQ_CREATE_SRQ_PG_SIZE_PG_4K : - pbl->pg_size == ROCE_PG_SIZE_8K ? - CMDQ_CREATE_SRQ_PG_SIZE_PG_8K : - pbl->pg_size == ROCE_PG_SIZE_64K ? - CMDQ_CREATE_SRQ_PG_SIZE_PG_64K : - pbl->pg_size == ROCE_PG_SIZE_2M ? - CMDQ_CREATE_SRQ_PG_SIZE_PG_2M : - pbl->pg_size == ROCE_PG_SIZE_8M ? - CMDQ_CREATE_SRQ_PG_SIZE_PG_8M : - pbl->pg_size == ROCE_PG_SIZE_1G ? - CMDQ_CREATE_SRQ_PG_SIZE_PG_1G : - CMDQ_CREATE_SRQ_PG_SIZE_PG_4K)); + pg_sz_lvl = ((u16)bnxt_qplib_base_pg_size(&srq->hwq) << + CMDQ_CREATE_SRQ_PG_SIZE_SFT); + pg_sz_lvl |= (srq->hwq.level & CMDQ_CREATE_SRQ_LVL_MASK) << + CMDQ_CREATE_SRQ_LVL_SFT; + req.pg_size_lvl = cpu_to_le16(pg_sz_lvl); req.pbl = cpu_to_le64(pbl->pg_map_arr[0]); req.pd_id = cpu_to_le32(srq->pd->id); req.eventq_id = cpu_to_le16(srq->eventq_hw_ring_id); @@ -809,6 +799,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) struct bnxt_qplib_pbl *pbl; u16 cmd_flags = 0; u32 qp_flags = 0; + u8 pg_sz_lvl; int rc; RCFW_CMD_PREP(req, CREATE_QP1, cmd_flags); @@ -835,28 +826,13 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) } pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); - req.sq_pg_size_sq_lvl = - ((sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK) - << CMDQ_CREATE_QP1_SQ_LVL_SFT) | - (pbl->pg_size == ROCE_PG_SIZE_4K ? - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K : - pbl->pg_size == ROCE_PG_SIZE_8K ? - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8K : - pbl->pg_size == ROCE_PG_SIZE_64K ? - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_64K : - pbl->pg_size == ROCE_PG_SIZE_2M ? - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_2M : - pbl->pg_size == ROCE_PG_SIZE_8M ? - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8M : - pbl->pg_size == ROCE_PG_SIZE_1G ? - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_1G : - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K); + pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << + CMDQ_CREATE_QP1_SQ_PG_SIZE_SFT); + pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK); + req.sq_pg_size_sq_lvl = pg_sz_lvl; if (qp->scq) req.scq_cid = cpu_to_le32(qp->scq->id); - - qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE; - /* RQ */ if (rq->max_wqe) { hwq_attr.res = res; @@ -876,32 +852,20 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) } pbl = &rq->hwq.pbl[PBL_LVL_0]; req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); - req.rq_pg_size_rq_lvl = - ((rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK) << - CMDQ_CREATE_QP1_RQ_LVL_SFT) | - (pbl->pg_size == ROCE_PG_SIZE_4K ? - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K : - pbl->pg_size == ROCE_PG_SIZE_8K ? - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8K : - pbl->pg_size == ROCE_PG_SIZE_64K ? - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_64K : - pbl->pg_size == ROCE_PG_SIZE_2M ? - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_2M : - pbl->pg_size == ROCE_PG_SIZE_8M ? - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8M : - pbl->pg_size == ROCE_PG_SIZE_1G ? - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_1G : - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K); + pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) << + CMDQ_CREATE_QP1_RQ_PG_SIZE_SFT); + pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK); + req.rq_pg_size_rq_lvl = pg_sz_lvl; if (qp->rcq) req.rcq_cid = cpu_to_le32(qp->rcq->id); } - /* Header buffer - allow hdr_buf pass in */ rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp); if (rc) { rc = -ENOMEM; goto fail; } + qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE; req.qp_flags = cpu_to_le32(qp_flags); req.sq_size = cpu_to_le32(sq->hwq.max_elements); req.rq_size = cpu_to_le32(rq->hwq.max_elements); @@ -965,6 +929,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) struct cmdq_create_qp req; struct bnxt_qplib_pbl *pbl; u32 qp_flags = 0; + u8 pg_sz_lvl; u16 max_rsge; RCFW_CMD_PREP(req, CREATE_QP, cmd_flags); @@ -1025,31 +990,14 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) } pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); - req.sq_pg_size_sq_lvl = - ((sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK) - << CMDQ_CREATE_QP_SQ_LVL_SFT) | - (pbl->pg_size == ROCE_PG_SIZE_4K ? - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K : - pbl->pg_size == ROCE_PG_SIZE_8K ? - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8K : - pbl->pg_size == ROCE_PG_SIZE_64K ? - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_64K : - pbl->pg_size == ROCE_PG_SIZE_2M ? - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_2M : - pbl->pg_size == ROCE_PG_SIZE_8M ? - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8M : - pbl->pg_size == ROCE_PG_SIZE_1G ? - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_1G : - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K); + pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << + CMDQ_CREATE_QP_SQ_PG_SIZE_SFT); + pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK); + req.sq_pg_size_sq_lvl = pg_sz_lvl; if (qp->scq) req.scq_cid = cpu_to_le32(qp->scq->id); - qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE; - qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED; - if (qp->sig_type) - qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION; - /* RQ */ if (rq->max_wqe) { hwq_attr.res = res; @@ -1071,22 +1019,10 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) } pbl = &rq->hwq.pbl[PBL_LVL_0]; req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); - req.rq_pg_size_rq_lvl = - ((rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK) << - CMDQ_CREATE_QP_RQ_LVL_SFT) | - (pbl->pg_size == ROCE_PG_SIZE_4K ? - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K : - pbl->pg_size == ROCE_PG_SIZE_8K ? - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8K : - pbl->pg_size == ROCE_PG_SIZE_64K ? - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_64K : - pbl->pg_size == ROCE_PG_SIZE_2M ? - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_2M : - pbl->pg_size == ROCE_PG_SIZE_8M ? - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8M : - pbl->pg_size == ROCE_PG_SIZE_1G ? - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_1G : - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K); + pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) << + CMDQ_CREATE_QP_RQ_PG_SIZE_SFT); + pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK); + req.rq_pg_size_rq_lvl = pg_sz_lvl; } else { /* SRQ */ if (qp->srq) { @@ -1097,7 +1033,13 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (qp->rcq) req.rcq_cid = cpu_to_le32(qp->rcq->id); + + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE; + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED; + if (qp->sig_type) + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION; req.qp_flags = cpu_to_le32(qp_flags); + req.sq_size = cpu_to_le32(sq->hwq.max_elements); req.rq_size = cpu_to_le32(rq->hwq.max_elements); qp->sq_hdr_buf = NULL; @@ -2000,6 +1942,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) struct cmdq_create_cq req; struct bnxt_qplib_pbl *pbl; u16 cmd_flags = 0; + u32 pg_sz_lvl; int rc; hwq_attr.res = res; @@ -2020,22 +1963,13 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) } req.dpi = cpu_to_le32(cq->dpi->dpi); req.cq_handle = cpu_to_le64(cq->cq_handle); - req.cq_size = cpu_to_le32(cq->hwq.max_elements); pbl = &cq->hwq.pbl[PBL_LVL_0]; - req.pg_size_lvl = cpu_to_le32( - ((cq->hwq.level & CMDQ_CREATE_CQ_LVL_MASK) << - CMDQ_CREATE_CQ_LVL_SFT) | - (pbl->pg_size == ROCE_PG_SIZE_4K ? CMDQ_CREATE_CQ_PG_SIZE_PG_4K : - pbl->pg_size == ROCE_PG_SIZE_8K ? CMDQ_CREATE_CQ_PG_SIZE_PG_8K : - pbl->pg_size == ROCE_PG_SIZE_64K ? CMDQ_CREATE_CQ_PG_SIZE_PG_64K : - pbl->pg_size == ROCE_PG_SIZE_2M ? CMDQ_CREATE_CQ_PG_SIZE_PG_2M : - pbl->pg_size == ROCE_PG_SIZE_8M ? CMDQ_CREATE_CQ_PG_SIZE_PG_8M : - pbl->pg_size == ROCE_PG_SIZE_1G ? CMDQ_CREATE_CQ_PG_SIZE_PG_1G : - CMDQ_CREATE_CQ_PG_SIZE_PG_4K)); - + pg_sz_lvl = (bnxt_qplib_base_pg_size(&cq->hwq) << + CMDQ_CREATE_CQ_PG_SIZE_SFT); + pg_sz_lvl |= (cq->hwq.level & CMDQ_CREATE_CQ_LVL_MASK); + req.pg_size_lvl = cpu_to_le32(pg_sz_lvl); req.pbl = cpu_to_le64(pbl->pg_map_arr[0]); - req.cq_fco_cnq_id = cpu_to_le32( (cq->cnq_hw_ring_id & CMDQ_CREATE_CQ_CNQ_ID_MASK) << CMDQ_CREATE_CQ_CNQ_ID_SFT); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index f01e864bb611..fe5e06f85ffc 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -468,29 +468,13 @@ int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw) return 0; } -static int __get_pbl_pg_idx(struct bnxt_qplib_pbl *pbl) -{ - return (pbl->pg_size == ROCE_PG_SIZE_4K ? - CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_4K : - pbl->pg_size == ROCE_PG_SIZE_8K ? - CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8K : - pbl->pg_size == ROCE_PG_SIZE_64K ? - CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_64K : - pbl->pg_size == ROCE_PG_SIZE_2M ? - CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_2M : - pbl->pg_size == ROCE_PG_SIZE_8M ? - CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8M : - pbl->pg_size == ROCE_PG_SIZE_1G ? - CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_1G : - CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_4K); -} - int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_ctx *ctx, int is_virtfn) { - struct cmdq_initialize_fw req; struct creq_initialize_fw_resp resp; - u16 cmd_flags = 0, level; + struct cmdq_initialize_fw req; + u16 cmd_flags = 0; + u8 pgsz, lvl; int rc; RCFW_CMD_PREP(req, INITIALIZE_FW, cmd_flags); @@ -511,32 +495,30 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, if (bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx)) goto config_vf_res; - level = ctx->qpc_tbl.level; - req.qpc_pg_size_qpc_lvl = (level << CMDQ_INITIALIZE_FW_QPC_LVL_SFT) | - __get_pbl_pg_idx(&ctx->qpc_tbl.pbl[level]); - level = ctx->mrw_tbl.level; - req.mrw_pg_size_mrw_lvl = (level << CMDQ_INITIALIZE_FW_MRW_LVL_SFT) | - __get_pbl_pg_idx(&ctx->mrw_tbl.pbl[level]); - level = ctx->srqc_tbl.level; - req.srq_pg_size_srq_lvl = (level << CMDQ_INITIALIZE_FW_SRQ_LVL_SFT) | - __get_pbl_pg_idx(&ctx->srqc_tbl.pbl[level]); - level = ctx->cq_tbl.level; - req.cq_pg_size_cq_lvl = (level << CMDQ_INITIALIZE_FW_CQ_LVL_SFT) | - __get_pbl_pg_idx(&ctx->cq_tbl.pbl[level]); - level = ctx->srqc_tbl.level; - req.srq_pg_size_srq_lvl = (level << CMDQ_INITIALIZE_FW_SRQ_LVL_SFT) | - __get_pbl_pg_idx(&ctx->srqc_tbl.pbl[level]); - level = ctx->cq_tbl.level; - req.cq_pg_size_cq_lvl = (level << CMDQ_INITIALIZE_FW_CQ_LVL_SFT) | - __get_pbl_pg_idx(&ctx->cq_tbl.pbl[level]); - level = ctx->tim_tbl.level; - req.tim_pg_size_tim_lvl = (level << CMDQ_INITIALIZE_FW_TIM_LVL_SFT) | - __get_pbl_pg_idx(&ctx->tim_tbl.pbl[level]); - level = ctx->tqm_ctx.pde.level; - req.tqm_pg_size_tqm_lvl = - (level << CMDQ_INITIALIZE_FW_TQM_LVL_SFT) | - __get_pbl_pg_idx(&ctx->tqm_ctx.pde.pbl[level]); - + lvl = ctx->qpc_tbl.level; + pgsz = bnxt_qplib_base_pg_size(&ctx->qpc_tbl); + req.qpc_pg_size_qpc_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) | + lvl; + lvl = ctx->mrw_tbl.level; + pgsz = bnxt_qplib_base_pg_size(&ctx->mrw_tbl); + req.mrw_pg_size_mrw_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) | + lvl; + lvl = ctx->srqc_tbl.level; + pgsz = bnxt_qplib_base_pg_size(&ctx->srqc_tbl); + req.srq_pg_size_srq_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) | + lvl; + lvl = ctx->cq_tbl.level; + pgsz = bnxt_qplib_base_pg_size(&ctx->cq_tbl); + req.cq_pg_size_cq_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) | + lvl; + lvl = ctx->tim_tbl.level; + pgsz = bnxt_qplib_base_pg_size(&ctx->tim_tbl); + req.tim_pg_size_tim_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) | + lvl; + lvl = ctx->tqm_ctx.pde.level; + pgsz = bnxt_qplib_base_pg_size(&ctx->tqm_ctx.pde); + req.tqm_pg_size_tqm_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) | + lvl; req.qpc_page_dir = cpu_to_le64(ctx->qpc_tbl.pbl[PBL_LVL_0].pg_map_arr[0]); req.mrw_page_dir = diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 95b645dbbc2d..79109ef6c70c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -80,6 +80,15 @@ enum bnxt_qplib_pbl_lvl { #define ROCE_PG_SIZE_8M (8 * 1024 * 1024) #define ROCE_PG_SIZE_1G (1024 * 1024 * 1024) +enum bnxt_qplib_hwrm_pg_size { + BNXT_QPLIB_HWRM_PG_SIZE_4K = 0, + BNXT_QPLIB_HWRM_PG_SIZE_8K = 1, + BNXT_QPLIB_HWRM_PG_SIZE_64K = 2, + BNXT_QPLIB_HWRM_PG_SIZE_2M = 3, + BNXT_QPLIB_HWRM_PG_SIZE_8M = 4, + BNXT_QPLIB_HWRM_PG_SIZE_1G = 5, +}; + struct bnxt_qplib_reg_desc { u8 bar_id; resource_size_t bar_base; @@ -263,6 +272,37 @@ static inline u8 bnxt_qplib_get_ring_type(struct bnxt_qplib_chip_ctx *cctx) RING_ALLOC_REQ_RING_TYPE_ROCE_CMPL; } +static inline u8 bnxt_qplib_base_pg_size(struct bnxt_qplib_hwq *hwq) +{ + u8 pg_size = BNXT_QPLIB_HWRM_PG_SIZE_4K; + struct bnxt_qplib_pbl *pbl; + + pbl = &hwq->pbl[PBL_LVL_0]; + switch (pbl->pg_size) { + case ROCE_PG_SIZE_4K: + pg_size = BNXT_QPLIB_HWRM_PG_SIZE_4K; + break; + case ROCE_PG_SIZE_8K: + pg_size = BNXT_QPLIB_HWRM_PG_SIZE_8K; + break; + case ROCE_PG_SIZE_64K: + pg_size = BNXT_QPLIB_HWRM_PG_SIZE_64K; + break; + case ROCE_PG_SIZE_2M: + pg_size = BNXT_QPLIB_HWRM_PG_SIZE_2M; + break; + case ROCE_PG_SIZE_8M: + pg_size = BNXT_QPLIB_HWRM_PG_SIZE_8M; + break; + case ROCE_PG_SIZE_1G: + pg_size = BNXT_QPLIB_HWRM_PG_SIZE_1G; + break; + default: + break; + } + + return pg_size; +} #define to_bnxt_qplib(ptr, type, member) \ container_of(ptr, type, member) From c78671a4e65ae0b2e639ea61b4c65842c4200f2d Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 2 Apr 2020 14:12:13 -0400 Subject: [PATCH 021/562] RDMA/bnxt_re: Update missing hsi data structures Adding fast path support data structure into hardware HSI. These structures are header only definition of RQE/SRQE/SQE. This is to help calculating the size of hardware wqe size. Link: https://lore.kernel.org/r/1585851136-2316-3-git-send-email-devesh.sharma@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/roce_hsi.h | 106 +++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index e4b09e7c2175..6f00f07420b7 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -210,6 +210,20 @@ struct sq_send { __le32 data[24]; }; +/* sq_send_hdr (size:256b/32B) */ +struct sq_send_hdr { + u8 wqe_type; + u8 flags; + u8 wqe_size; + u8 reserved8_1; + __le32 inv_key_or_imm_data; + __le32 length; + __le32 q_key; + __le32 dst_qp; + __le32 avid; + __le64 reserved64; +}; + /* Send Raw Ethernet and QP1 SQ WQE (40 bytes) */ struct sq_send_raweth_qp1 { u8 wqe_type; @@ -265,6 +279,21 @@ struct sq_send_raweth_qp1 { __le32 data[24]; }; +/* sq_send_raweth_qp1_hdr (size:256b/32B) */ +struct sq_send_raweth_qp1_hdr { + u8 wqe_type; + u8 flags; + u8 wqe_size; + u8 reserved8; + __le16 lflags; + __le16 cfa_action; + __le32 length; + __le32 reserved32_1; + __le32 cfa_meta; + __le32 reserved32_2; + __le64 reserved64; +}; + /* RDMA SQ WQE (40 bytes) */ struct sq_rdma { u8 wqe_type; @@ -288,6 +317,20 @@ struct sq_rdma { __le32 data[24]; }; +/* sq_rdma_hdr (size:256b/32B) */ +struct sq_rdma_hdr { + u8 wqe_type; + u8 flags; + u8 wqe_size; + u8 reserved8; + __le32 imm_data; + __le32 length; + __le32 reserved32_1; + __le64 remote_va; + __le32 remote_key; + __le32 reserved32_2; +}; + /* Atomic SQ WQE (40 bytes) */ struct sq_atomic { u8 wqe_type; @@ -307,6 +350,17 @@ struct sq_atomic { __le32 data[24]; }; +/* sq_atomic_hdr (size:256b/32B) */ +struct sq_atomic_hdr { + u8 wqe_type; + u8 flags; + __le16 reserved16; + __le32 remote_key; + __le64 remote_va; + __le64 swap_data; + __le64 cmp_data; +}; + /* Local Invalidate SQ WQE (40 bytes) */ struct sq_localinvalidate { u8 wqe_type; @@ -324,6 +378,16 @@ struct sq_localinvalidate { __le32 data[24]; }; +/* sq_localinvalidate_hdr (size:256b/32B) */ +struct sq_localinvalidate_hdr { + u8 wqe_type; + u8 flags; + __le16 reserved16; + __le32 inv_l_key; + __le64 reserved64; + u8 reserved128[16]; +}; + /* FR-PMR SQ WQE (40 bytes) */ struct sq_fr_pmr { u8 wqe_type; @@ -380,6 +444,21 @@ struct sq_fr_pmr { __le32 data[24]; }; +/* sq_fr_pmr_hdr (size:256b/32B) */ +struct sq_fr_pmr_hdr { + u8 wqe_type; + u8 flags; + u8 access_cntl; + u8 zero_based_page_size_log; + __le32 l_key; + u8 length[5]; + u8 reserved8_1; + u8 reserved8_2; + u8 numlevels_pbl_page_size_log; + __le64 pblptr; + __le64 va; +}; + /* Bind SQ WQE (40 bytes) */ struct sq_bind { u8 wqe_type; @@ -417,6 +496,22 @@ struct sq_bind { #define SQ_BIND_DATA_SFT 0 }; +/* sq_bind_hdr (size:256b/32B) */ +struct sq_bind_hdr { + u8 wqe_type; + u8 flags; + u8 access_cntl; + u8 reserved8_1; + u8 mw_type_zero_based; + u8 reserved8_2; + __le16 reserved16; + __le32 parent_l_key; + __le32 l_key; + __le64 va; + u8 length[5]; + u8 reserved24[3]; +}; + /* RQ/SRQ WQE Structures */ /* RQ/SRQ WQE (40 bytes) */ struct rq_wqe { @@ -435,6 +530,17 @@ struct rq_wqe { __le32 data[24]; }; +/* rq_wqe_hdr (size:256b/32B) */ +struct rq_wqe_hdr { + u8 wqe_type; + u8 flags; + u8 wqe_size; + u8 reserved8; + __le32 reserved32; + __le32 wr_id[2]; + u8 reserved128[16]; +}; + /* CQ CQE Structures */ /* Base CQE (32 bytes) */ struct cq_base { From fddcbbb02af42a5d6ec0c6ed38f823cc9dba1414 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 2 Apr 2020 14:12:14 -0400 Subject: [PATCH 022/562] RDMA/bnxt_re: Simplify obtaining queue entry from hw ring Restructring the data path and control path queue management code to simplify the way a queue element is extracted from the hardware ring. Introduced a new function which will give a pointer to the next ring item depending upon the current cons/prod index in the hardware queue. Further, there are hardcoding when size of queue entry is calculated, replacing it with an inline function. This function would be easier to expand if need going forward. The code section to initialize the PSN search areas has also been restructured and couple of functions has been added there. Link: https://lore.kernel.org/r/1585851136-2316-4-git-send-email-devesh.sharma@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 65 +++--- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 10 + drivers/infiniband/hw/bnxt_re/qplib_fp.c | 220 ++++++++++----------- drivers/infiniband/hw/bnxt_re/qplib_fp.h | 42 +--- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 16 +- drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 41 ---- drivers/infiniband/hw/bnxt_re/qplib_res.c | 1 + drivers/infiniband/hw/bnxt_re/qplib_res.h | 13 ++ 8 files changed, 178 insertions(+), 230 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 95f6d493d1b9..d98348e82422 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -856,7 +856,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) return -EFAULT; - bytes = (qplib_qp->sq.max_wqe * BNXT_QPLIB_MAX_SQE_ENTRY_SIZE); + bytes = (qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size); /* Consider mapping PSN search memory only for RC QPs. */ if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) { psn_sz = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? @@ -879,7 +879,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, qplib_qp->qp_handle = ureq.qp_handle; if (!qp->qplib_qp.srq) { - bytes = (qplib_qp->rq.max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); + bytes = (qplib_qp->rq.max_wqe * qplib_qp->rq.wqe_size); bytes = PAGE_ALIGN(bytes); umem = ib_umem_get(&rdev->ibdev, ureq.qprva, bytes, IB_ACCESS_LOCAL_WRITE); @@ -976,6 +976,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp qp->qplib_qp.sig_type = true; /* Shadow QP SQ depth should be same as QP1 RQ depth */ + qp->qplib_qp.sq.wqe_size = bnxt_re_get_swqe_size(); qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe; qp->qplib_qp.sq.max_sge = 2; /* Q full delta can be 1 since it is internal QP */ @@ -986,6 +987,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp qp->qplib_qp.scq = qp1_qp->scq; qp->qplib_qp.rcq = qp1_qp->rcq; + qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(); qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe; qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge; /* Q full delta can be 1 since it is internal QP */ @@ -1021,10 +1023,12 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp, struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_qp *qplqp; struct bnxt_re_dev *rdev; + struct bnxt_qplib_q *rq; int entries; rdev = qp->rdev; qplqp = &qp->qplib_qp; + rq = &qplqp->rq; dev_attr = &rdev->dev_attr; if (init_attr->srq) { @@ -1036,23 +1040,21 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp, return -EINVAL; } qplqp->srq = &srq->qplib_srq; - qplqp->rq.max_wqe = 0; + rq->max_wqe = 0; } else { + rq->wqe_size = bnxt_re_get_rwqe_size(); /* Allocate 1 more than what's provided so posting max doesn't * mean empty. */ entries = roundup_pow_of_two(init_attr->cap.max_recv_wr + 1); - qplqp->rq.max_wqe = min_t(u32, entries, - dev_attr->max_qp_wqes + 1); - - qplqp->rq.q_full_delta = qplqp->rq.max_wqe - - init_attr->cap.max_recv_wr; - qplqp->rq.max_sge = init_attr->cap.max_recv_sge; - if (qplqp->rq.max_sge > dev_attr->max_qp_sges) - qplqp->rq.max_sge = dev_attr->max_qp_sges; + rq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); + rq->q_full_delta = rq->max_wqe - init_attr->cap.max_recv_wr; + rq->max_sge = init_attr->cap.max_recv_sge; + if (rq->max_sge > dev_attr->max_qp_sges) + rq->max_sge = dev_attr->max_qp_sges; } - qplqp->rq.sg_info.pgsize = PAGE_SIZE; - qplqp->rq.sg_info.pgshft = PAGE_SHIFT; + rq->sg_info.pgsize = PAGE_SIZE; + rq->sg_info.pgshft = PAGE_SHIFT; return 0; } @@ -1080,15 +1082,18 @@ static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_qp *qplqp; struct bnxt_re_dev *rdev; + struct bnxt_qplib_q *sq; int entries; rdev = qp->rdev; qplqp = &qp->qplib_qp; + sq = &qplqp->sq; dev_attr = &rdev->dev_attr; - qplqp->sq.max_sge = init_attr->cap.max_send_sge; - if (qplqp->sq.max_sge > dev_attr->max_qp_sges) - qplqp->sq.max_sge = dev_attr->max_qp_sges; + sq->wqe_size = bnxt_re_get_swqe_size(); + sq->max_sge = init_attr->cap.max_send_sge; + if (sq->max_sge > dev_attr->max_qp_sges) + sq->max_sge = dev_attr->max_qp_sges; /* * Change the SQ depth if user has requested minimum using * configfs. Only supported for kernel consumers @@ -1096,9 +1101,9 @@ static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, entries = init_attr->cap.max_send_wr; /* Allocate 128 + 1 more than what's provided */ entries = roundup_pow_of_two(entries + BNXT_QPLIB_RESERVED_QP_WRS + 1); - qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + - BNXT_QPLIB_RESERVED_QP_WRS + 1); - qplqp->sq.q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1; + sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + + BNXT_QPLIB_RESERVED_QP_WRS + 1); + sq->q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1; /* * Reserving one slot for Phantom WQE. Application can * post one extra entry in this case. But allowing this to avoid @@ -1511,7 +1516,7 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) return -EFAULT; - bytes = (qplib_srq->max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); + bytes = (qplib_srq->max_wqe * qplib_srq->wqe_size); bytes = PAGE_ALIGN(bytes); umem = ib_umem_get(&rdev->ibdev, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE); @@ -1534,15 +1539,20 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata) { - struct ib_pd *ib_pd = ib_srq->pd; - struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); - struct bnxt_re_dev *rdev = pd->rdev; - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; - struct bnxt_re_srq *srq = - container_of(ib_srq, struct bnxt_re_srq, ib_srq); + struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_nq *nq = NULL; + struct bnxt_re_dev *rdev; + struct bnxt_re_srq *srq; + struct bnxt_re_pd *pd; + struct ib_pd *ib_pd; int rc, entries; + ib_pd = ib_srq->pd; + pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); + rdev = pd->rdev; + dev_attr = &rdev->dev_attr; + srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); + if (srq_init_attr->attr.max_wr >= dev_attr->max_srq_wqes) { ibdev_err(&rdev->ibdev, "Create CQ failed - max exceeded"); rc = -EINVAL; @@ -1563,8 +1573,9 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, entries = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1); if (entries > dev_attr->max_srq_wqes + 1) entries = dev_attr->max_srq_wqes + 1; - srq->qplib_srq.max_wqe = entries; + + srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(); srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge; srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; srq->srq_limit = srq_init_attr->attr.srq_limit; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 23d972da5652..18dd46f46cf4 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -142,6 +142,16 @@ struct bnxt_re_ucontext { spinlock_t sh_lock; /* protect shpg */ }; +static inline u16 bnxt_re_get_swqe_size(void) +{ + return sizeof(struct sq_send); +} + +static inline u16 bnxt_re_get_rwqe_size(void) +{ + return sizeof(struct rq_wqe); +} + int bnxt_re_query_device(struct ib_device *ibdev, struct ib_device_attr *ib_attr, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index d3bf9f665982..a4de56bdd6e8 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -301,9 +301,9 @@ static void bnxt_qplib_service_nq(unsigned long data) struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data; struct bnxt_qplib_hwq *hwq = &nq->hwq; struct nq_base *nqe, **nq_ptr; - struct bnxt_qplib_cq *cq; - int num_cqne_processed = 0; int num_srqne_processed = 0; + int num_cqne_processed = 0; + struct bnxt_qplib_cq *cq; int budget = nq->budget; u32 sw_cons, raw_cons; uintptr_t q_handle; @@ -315,7 +315,7 @@ static void bnxt_qplib_service_nq(unsigned long data) while (budget--) { sw_cons = HWQ_CMP(raw_cons, hwq); nq_ptr = (struct nq_base **)hwq->pbl_ptr; - nqe = &nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)]; + nqe = bnxt_qplib_get_qe(hwq, sw_cons, NULL); if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements)) break; @@ -392,13 +392,11 @@ static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance) { struct bnxt_qplib_nq *nq = dev_instance; struct bnxt_qplib_hwq *hwq = &nq->hwq; - struct nq_base **nq_ptr; u32 sw_cons; /* Prefetch the NQ element */ sw_cons = HWQ_CMP(hwq->cons, hwq); - nq_ptr = (struct nq_base **)nq->hwq.pbl_ptr; - prefetch(&nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)]); + prefetch(bnxt_qplib_get_qe(hwq, sw_cons, NULL)); /* Fan out to CPU affinitized kthreads? */ tasklet_schedule(&nq->nq_tasklet); @@ -618,7 +616,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, hwq_attr.res = res; hwq_attr.sginfo = &srq->sg_info; hwq_attr.depth = srq->max_wqe; - hwq_attr.stride = BNXT_QPLIB_MAX_RQE_ENTRY_SIZE; + hwq_attr.stride = srq->wqe_size; hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&srq->hwq, &hwq_attr); if (rc) @@ -730,7 +728,7 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, struct bnxt_qplib_swqe *wqe) { struct bnxt_qplib_hwq *srq_hwq = &srq->hwq; - struct rq_wqe *srqe, **srqe_ptr; + struct rq_wqe *srqe; struct sq_sge *hw_sge; u32 sw_prod, sw_cons, count = 0; int i, rc = 0, next; @@ -748,9 +746,8 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, spin_unlock(&srq_hwq->lock); sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq); - srqe_ptr = (struct rq_wqe **)srq_hwq->pbl_ptr; - srqe = &srqe_ptr[RQE_PG(sw_prod)][RQE_IDX(sw_prod)]; - memset(srqe, 0, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); + srqe = bnxt_qplib_get_qe(srq_hwq, sw_prod, NULL); + memset(srqe, 0, srq->wqe_size); /* Calculate wqe_size16 and data_len */ for (i = 0, hw_sge = (struct sq_sge *)srqe->data; i < wqe->num_sge; i++, hw_sge++) { @@ -813,7 +810,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.res = res; hwq_attr.sginfo = &sq->sg_info; hwq_attr.depth = sq->max_wqe; - hwq_attr.stride = BNXT_QPLIB_MAX_SQE_ENTRY_SIZE; + hwq_attr.stride = sq->wqe_size; hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) @@ -837,7 +834,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (rq->max_wqe) { hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; - hwq_attr.stride = BNXT_QPLIB_MAX_RQE_ENTRY_SIZE; + hwq_attr.stride = rq->wqe_size; hwq_attr.depth = qp->rq.max_wqe; hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); @@ -912,22 +909,45 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) return rc; } +static void bnxt_qplib_init_psn_ptr(struct bnxt_qplib_qp *qp, int size) +{ + struct bnxt_qplib_hwq *hwq; + struct bnxt_qplib_q *sq; + u64 fpsne, psne, psn_pg; + u16 indx_pad = 0, indx; + u16 pg_num, pg_indx; + u64 *page; + + sq = &qp->sq; + hwq = &sq->hwq; + + fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->max_elements, &psn_pg); + if (!IS_ALIGNED(fpsne, PAGE_SIZE)) + indx_pad = ALIGN(fpsne, PAGE_SIZE) / size; + + page = (u64 *)psn_pg; + for (indx = 0; indx < hwq->max_elements; indx++) { + pg_num = (indx + indx_pad) / (PAGE_SIZE / size); + pg_indx = (indx + indx_pad) % (PAGE_SIZE / size); + psne = page[pg_num] + pg_indx * size; + sq->swq[indx].psn_ext = (struct sq_psn_search_ext *)psne; + sq->swq[indx].psn_search = (struct sq_psn_search *)psne; + } +} + int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct bnxt_qplib_hwq_attr hwq_attr = {}; - unsigned long int psn_search, poff = 0; struct bnxt_qplib_sg_info sginfo = {}; - struct sq_psn_search **psn_search_ptr; struct bnxt_qplib_q *sq = &qp->sq; struct bnxt_qplib_q *rq = &qp->rq; - int i, rc, req_size, psn_sz = 0; - struct sq_send **hw_sq_send_ptr; struct creq_create_qp_resp resp; + int rc, req_size, psn_sz = 0; struct bnxt_qplib_hwq *xrrq; u16 cmd_flags = 0, max_ssge; - struct cmdq_create_qp req; struct bnxt_qplib_pbl *pbl; + struct cmdq_create_qp req; u32 qp_flags = 0; u8 pg_sz_lvl; u16 max_rsge; @@ -948,7 +968,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.res = res; hwq_attr.sginfo = &sq->sg_info; - hwq_attr.stride = BNXT_QPLIB_MAX_SQE_ENTRY_SIZE; + hwq_attr.stride = sq->wqe_size; hwq_attr.depth = sq->max_wqe; hwq_attr.aux_stride = psn_sz; hwq_attr.aux_depth = hwq_attr.depth; @@ -962,32 +982,10 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) rc = -ENOMEM; goto fail_sq; } - hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr; - if (psn_sz) { - psn_search_ptr = (struct sq_psn_search **) - &hw_sq_send_ptr[get_sqe_pg - (sq->hwq.max_elements)]; - psn_search = (unsigned long int) - &hw_sq_send_ptr[get_sqe_pg(sq->hwq.max_elements)] - [get_sqe_idx(sq->hwq.max_elements)]; - if (psn_search & ~PAGE_MASK) { - /* If the psn_search does not start on a page boundary, - * then calculate the offset - */ - poff = (psn_search & ~PAGE_MASK) / - BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE; - } - for (i = 0; i < sq->hwq.max_elements; i++) { - sq->swq[i].psn_search = - &psn_search_ptr[get_psne_pg(i + poff)] - [get_psne_idx(i + poff)]; - /*psns_ext will be used only for P5 chips. */ - sq->swq[i].psn_ext = - (struct sq_psn_search_ext *) - &psn_search_ptr[get_psne_pg(i + poff)] - [get_psne_idx(i + poff)]; - } - } + + if (psn_sz) + bnxt_qplib_init_psn_ptr(qp, psn_sz); + pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << @@ -1002,7 +1000,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (rq->max_wqe) { hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; - hwq_attr.stride = BNXT_QPLIB_MAX_RQE_ENTRY_SIZE; + hwq_attr.stride = rq->wqe_size; hwq_attr.depth = rq->max_wqe; hwq_attr.aux_stride = 0; hwq_attr.aux_depth = 0; @@ -1425,12 +1423,11 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp) { struct bnxt_qplib_hwq *cq_hwq = &cq->hwq; - struct cq_base *hw_cqe, **hw_cqe_ptr; + struct cq_base *hw_cqe; int i; for (i = 0; i < cq_hwq->max_elements; i++) { - hw_cqe_ptr = (struct cq_base **)cq_hwq->pbl_ptr; - hw_cqe = &hw_cqe_ptr[CQE_PG(i)][CQE_IDX(i)]; + hw_cqe = bnxt_qplib_get_qe(cq_hwq, i, NULL); if (!CQE_CMP_VALID(hw_cqe, i, cq_hwq->max_elements)) continue; /* @@ -1557,6 +1554,34 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp, return NULL; } +static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_swqe *wqe, + struct bnxt_qplib_swq *swq) +{ + struct sq_psn_search_ext *psns_ext; + struct sq_psn_search *psns; + u32 flg_npsn; + u32 op_spsn; + + psns = swq->psn_search; + psns_ext = swq->psn_ext; + + op_spsn = ((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) & + SQ_PSN_SEARCH_START_PSN_MASK); + op_spsn |= ((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) & + SQ_PSN_SEARCH_OPCODE_MASK); + flg_npsn = ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) & + SQ_PSN_SEARCH_NEXT_PSN_MASK); + + if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) { + psns_ext->opcode_start_psn = cpu_to_le32(op_spsn); + psns_ext->flags_next_psn = cpu_to_le32(flg_npsn); + } else { + psns->opcode_start_psn = cpu_to_le32(op_spsn); + psns->flags_next_psn = cpu_to_le32(flg_npsn); + } +} + void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *sq = &qp->sq; @@ -1567,16 +1592,16 @@ void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp) int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, struct bnxt_qplib_swqe *wqe) { - struct bnxt_qplib_q *sq = &qp->sq; - struct bnxt_qplib_swq *swq; - struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr; - struct sq_sge *hw_sge; struct bnxt_qplib_nq_work *nq_work = NULL; - bool sch_handler = false; - u32 sw_prod; - u8 wqe_size16; int i, rc = 0, data_len = 0, pkt_num = 0; + struct bnxt_qplib_q *sq = &qp->sq; + struct sq_send *hw_sq_send_hdr; + struct bnxt_qplib_swq *swq; + bool sch_handler = false; + struct sq_sge *hw_sge; + u8 wqe_size16; __le32 temp32; + u32 sw_prod; if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS) { if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { @@ -1605,11 +1630,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP; swq->start_psn = sq->psn & BTH_PSN_MASK; - hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr; - hw_sq_send_hdr = &hw_sq_send_ptr[get_sqe_pg(sw_prod)] - [get_sqe_idx(sw_prod)]; - - memset(hw_sq_send_hdr, 0, BNXT_QPLIB_MAX_SQE_ENTRY_SIZE); + hw_sq_send_hdr = bnxt_qplib_get_qe(&sq->hwq, sw_prod, NULL); + memset(hw_sq_send_hdr, 0, sq->wqe_size); if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) { /* Copy the inline data */ @@ -1796,28 +1818,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, goto done; } swq->next_psn = sq->psn & BTH_PSN_MASK; - if (swq->psn_search) { - u32 opcd_spsn; - u32 flg_npsn; - - opcd_spsn = ((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) & - SQ_PSN_SEARCH_START_PSN_MASK); - opcd_spsn |= ((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) & - SQ_PSN_SEARCH_OPCODE_MASK); - flg_npsn = ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) & - SQ_PSN_SEARCH_NEXT_PSN_MASK); - if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) { - swq->psn_ext->opcode_start_psn = - cpu_to_le32(opcd_spsn); - swq->psn_ext->flags_next_psn = - cpu_to_le32(flg_npsn); - } else { - swq->psn_search->opcode_start_psn = - cpu_to_le32(opcd_spsn); - swq->psn_search->flags_next_psn = - cpu_to_le32(flg_npsn); - } - } + if (qp->type == CMDQ_CREATE_QP_TYPE_RC) + bnxt_qplib_fill_psn_search(qp, wqe, swq); queue_err: if (sch_handler) { /* Store the ULP info in the software structures */ @@ -1860,13 +1862,13 @@ void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp) int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, struct bnxt_qplib_swqe *wqe) { - struct bnxt_qplib_q *rq = &qp->rq; - struct rq_wqe *rqe, **rqe_ptr; - struct sq_sge *hw_sge; struct bnxt_qplib_nq_work *nq_work = NULL; + struct bnxt_qplib_q *rq = &qp->rq; bool sch_handler = false; - u32 sw_prod; + struct sq_sge *hw_sge; + struct rq_wqe *rqe; int i, rc = 0; + u32 sw_prod; if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { sch_handler = true; @@ -1883,10 +1885,8 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); rq->swq[sw_prod].wr_id = wqe->wr_id; - rqe_ptr = (struct rq_wqe **)rq->hwq.pbl_ptr; - rqe = &rqe_ptr[RQE_PG(sw_prod)][RQE_IDX(sw_prod)]; - - memset(rqe, 0, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); + rqe = bnxt_qplib_get_qe(&rq->hwq, sw_prod, NULL); + memset(rqe, 0, rq->wqe_size); /* Calculate wqe_size16 and data_len */ for (i = 0, hw_sge = (struct sq_sge *)rqe->data; @@ -1939,8 +1939,8 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct bnxt_qplib_hwq_attr hwq_attr = {}; struct creq_create_cq_resp resp; - struct cmdq_create_cq req; struct bnxt_qplib_pbl *pbl; + struct cmdq_create_cq req; u16 cmd_flags = 0; u32 pg_sz_lvl; int rc; @@ -2128,13 +2128,13 @@ void bnxt_qplib_mark_qp_error(void *qp_handle) static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, u32 cq_cons, u32 sw_sq_cons, u32 cqe_sq_cons) { - struct bnxt_qplib_q *sq = &qp->sq; - struct bnxt_qplib_swq *swq; u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx; - struct cq_base *peek_hwcqe, **peek_hw_cqe_ptr; + struct bnxt_qplib_q *sq = &qp->sq; struct cq_req *peek_req_hwcqe; struct bnxt_qplib_qp *peek_qp; struct bnxt_qplib_q *peek_sq; + struct bnxt_qplib_swq *swq; + struct cq_base *peek_hwcqe; int i, rc = 0; /* Normal mode */ @@ -2164,9 +2164,8 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, i = cq->hwq.max_elements; while (i--) { peek_sw_cq_cons = HWQ_CMP((peek_sw_cq_cons), &cq->hwq); - peek_hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr; - peek_hwcqe = &peek_hw_cqe_ptr[CQE_PG(peek_sw_cq_cons)] - [CQE_IDX(peek_sw_cq_cons)]; + peek_hwcqe = bnxt_qplib_get_qe(&cq->hwq, + peek_sw_cq_cons, NULL); /* If the next hwcqe is VALID */ if (CQE_CMP_VALID(peek_hwcqe, peek_raw_cq_cons, cq->hwq.max_elements)) { @@ -2228,11 +2227,11 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe **pcqe, int *budget, u32 cq_cons, struct bnxt_qplib_qp **lib_qp) { - struct bnxt_qplib_qp *qp; - struct bnxt_qplib_q *sq; - struct bnxt_qplib_cqe *cqe; u32 sw_sq_cons, cqe_sq_cons; struct bnxt_qplib_swq *swq; + struct bnxt_qplib_cqe *cqe; + struct bnxt_qplib_qp *qp; + struct bnxt_qplib_q *sq; int rc = 0; qp = (struct bnxt_qplib_qp *)((unsigned long) @@ -2342,10 +2341,10 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe **pcqe, int *budget) { - struct bnxt_qplib_qp *qp; - struct bnxt_qplib_q *rq; struct bnxt_qplib_srq *srq; struct bnxt_qplib_cqe *cqe; + struct bnxt_qplib_qp *qp; + struct bnxt_qplib_q *rq; u32 wr_id_idx; int rc = 0; @@ -2417,10 +2416,10 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe **pcqe, int *budget) { - struct bnxt_qplib_qp *qp; - struct bnxt_qplib_q *rq; struct bnxt_qplib_srq *srq; struct bnxt_qplib_cqe *cqe; + struct bnxt_qplib_qp *qp; + struct bnxt_qplib_q *rq; u32 wr_id_idx; int rc = 0; @@ -2495,15 +2494,13 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq) { - struct cq_base *hw_cqe, **hw_cqe_ptr; + struct cq_base *hw_cqe; u32 sw_cons, raw_cons; bool rc = true; raw_cons = cq->hwq.cons; sw_cons = HWQ_CMP(raw_cons, &cq->hwq); - hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr; - hw_cqe = &hw_cqe_ptr[CQE_PG(sw_cons)][CQE_IDX(sw_cons)]; - + hw_cqe = bnxt_qplib_get_qe(&cq->hwq, sw_cons, NULL); /* Check for Valid bit. If the CQE is valid, return false */ rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements); return rc; @@ -2747,7 +2744,7 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq, int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, int num_cqes, struct bnxt_qplib_qp **lib_qp) { - struct cq_base *hw_cqe, **hw_cqe_ptr; + struct cq_base *hw_cqe; u32 sw_cons, raw_cons; int budget, rc = 0; @@ -2756,8 +2753,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, while (budget) { sw_cons = HWQ_CMP(raw_cons, &cq->hwq); - hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr; - hw_cqe = &hw_cqe_ptr[CQE_PG(sw_cons)][CQE_IDX(sw_cons)]; + hw_cqe = bnxt_qplib_get_qe(&cq->hwq, sw_cons, NULL); /* Check for Valid bit */ if (!CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements)) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 7edb70b6bb16..568ca390322c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -45,6 +45,7 @@ struct bnxt_qplib_srq { struct bnxt_qplib_db_info dbinfo; u64 srq_handle; u32 id; + u16 wqe_size; u32 max_wqe; u32 max_sge; u32 threshold; @@ -65,38 +66,7 @@ struct bnxt_qplib_sge { u32 size; }; -#define BNXT_QPLIB_MAX_SQE_ENTRY_SIZE sizeof(struct sq_send) - -#define SQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_SQE_ENTRY_SIZE) -#define SQE_MAX_IDX_PER_PG (SQE_CNT_PER_PG - 1) - -static inline u32 get_sqe_pg(u32 val) -{ - return ((val & ~SQE_MAX_IDX_PER_PG) / SQE_CNT_PER_PG); -} - -static inline u32 get_sqe_idx(u32 val) -{ - return (val & SQE_MAX_IDX_PER_PG); -} - -#define BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE sizeof(struct sq_psn_search) - -#define PSNE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE) -#define PSNE_MAX_IDX_PER_PG (PSNE_CNT_PER_PG - 1) - -static inline u32 get_psne_pg(u32 val) -{ - return ((val & ~PSNE_MAX_IDX_PER_PG) / PSNE_CNT_PER_PG); -} - -static inline u32 get_psne_idx(u32 val) -{ - return (val & PSNE_MAX_IDX_PER_PG); -} - #define BNXT_QPLIB_QP_MAX_SGL 6 - struct bnxt_qplib_swq { u64 wr_id; int next_idx; @@ -226,19 +196,13 @@ struct bnxt_qplib_swqe { }; }; -#define BNXT_QPLIB_MAX_RQE_ENTRY_SIZE sizeof(struct rq_wqe) - -#define RQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_RQE_ENTRY_SIZE) -#define RQE_MAX_IDX_PER_PG (RQE_CNT_PER_PG - 1) -#define RQE_PG(x) (((x) & ~RQE_MAX_IDX_PER_PG) / RQE_CNT_PER_PG) -#define RQE_IDX(x) ((x) & RQE_MAX_IDX_PER_PG) - struct bnxt_qplib_q { struct bnxt_qplib_hwq hwq; struct bnxt_qplib_swq *swq; struct bnxt_qplib_db_info dbinfo; struct bnxt_qplib_sg_info sg_info; u32 max_wqe; + u16 wqe_size; u16 q_full_delta; u16 max_sge; u32 psn; @@ -256,7 +220,7 @@ struct bnxt_qplib_qp { struct bnxt_qplib_dpi *dpi; struct bnxt_qplib_chip_ctx *cctx; u64 qp_handle; -#define BNXT_QPLIB_QP_ID_INVALID 0xFFFFFFFF +#define BNXT_QPLIB_QP_ID_INVALID 0xFFFFFFFF u32 id; u8 type; u8 sig_type; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index fe5e06f85ffc..4e211162acee 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -89,10 +89,9 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, struct creq_base *resp, void *sb, u8 is_block) { struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq; - struct bnxt_qplib_cmdqe *cmdqe, **hwq_ptr; struct bnxt_qplib_hwq *hwq = &cmdq->hwq; struct bnxt_qplib_crsqe *crsqe; - u32 cmdq_depth = rcfw->cmdq_depth; + struct bnxt_qplib_cmdqe *cmdqe; u32 sw_prod, cmdq_prod; struct pci_dev *pdev; unsigned long flags; @@ -163,13 +162,11 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, BNXT_QPLIB_CMDQE_UNITS; } - hwq_ptr = (struct bnxt_qplib_cmdqe **)hwq->pbl_ptr; preq = (u8 *)req; do { /* Locate the next cmdq slot */ sw_prod = HWQ_CMP(hwq->prod, hwq); - cmdqe = &hwq_ptr[get_cmdq_pg(sw_prod, cmdq_depth)] - [get_cmdq_idx(sw_prod, cmdq_depth)]; + cmdqe = bnxt_qplib_get_qe(hwq, sw_prod, NULL); if (!cmdqe) { dev_err(&pdev->dev, "RCFW request failed with no cmdqe!\n"); @@ -378,7 +375,7 @@ static void bnxt_qplib_service_creq(unsigned long data) struct bnxt_qplib_creq_ctx *creq = &rcfw->creq; u32 type, budget = CREQ_ENTRY_POLL_BUDGET; struct bnxt_qplib_hwq *hwq = &creq->hwq; - struct creq_base *creqe, **hwq_ptr; + struct creq_base *creqe; u32 sw_cons, raw_cons; unsigned long flags; @@ -387,8 +384,7 @@ static void bnxt_qplib_service_creq(unsigned long data) raw_cons = hwq->cons; while (budget > 0) { sw_cons = HWQ_CMP(raw_cons, hwq); - hwq_ptr = (struct creq_base **)hwq->pbl_ptr; - creqe = &hwq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)]; + creqe = bnxt_qplib_get_qe(hwq, sw_cons, NULL); if (!CREQ_CMP_VALID(creqe, raw_cons, hwq->max_elements)) break; /* The valid test of the entry must be done first before @@ -434,7 +430,6 @@ static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance) { struct bnxt_qplib_rcfw *rcfw = dev_instance; struct bnxt_qplib_creq_ctx *creq; - struct creq_base **creq_ptr; struct bnxt_qplib_hwq *hwq; u32 sw_cons; @@ -442,8 +437,7 @@ static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance) hwq = &creq->hwq; /* Prefetch the CREQ element */ sw_cons = HWQ_CMP(hwq->cons, hwq); - creq_ptr = (struct creq_base **)creq->hwq.pbl_ptr; - prefetch(&creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)]); + prefetch(bnxt_qplib_get_qe(hwq, sw_cons, NULL)); tasklet_schedule(&creq->creq_tasklet); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 411fce3493b6..bf384098f4b2 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -87,12 +87,6 @@ static inline u32 bnxt_qplib_cmdqe_page_size(u32 depth) return (bnxt_qplib_cmdqe_npages(depth) * PAGE_SIZE); } -static inline u32 bnxt_qplib_cmdqe_cnt_per_pg(u32 depth) -{ - return (bnxt_qplib_cmdqe_page_size(depth) / - BNXT_QPLIB_CMDQE_UNITS); -} - /* Set the cmd_size to a factor of CMDQE unit */ static inline void bnxt_qplib_set_cmd_slots(struct cmdq_base *req) { @@ -100,30 +94,12 @@ static inline void bnxt_qplib_set_cmd_slots(struct cmdq_base *req) BNXT_QPLIB_CMDQE_UNITS; } -#define MAX_CMDQ_IDX(depth) ((depth) - 1) - -static inline u32 bnxt_qplib_max_cmdq_idx_per_pg(u32 depth) -{ - return (bnxt_qplib_cmdqe_cnt_per_pg(depth) - 1); -} - #define RCFW_MAX_COOKIE_VALUE 0x7FFF #define RCFW_CMD_IS_BLOCKING 0x8000 #define RCFW_BLOCKED_CMD_WAIT_COUNT 0x4E20 #define HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK 0x1000900020011ULL -static inline u32 get_cmdq_pg(u32 val, u32 depth) -{ - return (val & ~(bnxt_qplib_max_cmdq_idx_per_pg(depth))) / - (bnxt_qplib_cmdqe_cnt_per_pg(depth)); -} - -static inline u32 get_cmdq_idx(u32 val, u32 depth) -{ - return val & (bnxt_qplib_max_cmdq_idx_per_pg(depth)); -} - /* Crsq buf is 1024-Byte */ struct bnxt_qplib_crsbe { u8 data[1024]; @@ -133,23 +109,6 @@ struct bnxt_qplib_crsbe { /* Allocate 1 per QP for async error notification for now */ #define BNXT_QPLIB_CREQE_MAX_CNT (64 * 1024) #define BNXT_QPLIB_CREQE_UNITS 16 /* 16-Bytes per prod unit */ -#define BNXT_QPLIB_CREQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_CREQE_UNITS) - -#define MAX_CREQ_IDX (BNXT_QPLIB_CREQE_MAX_CNT - 1) -#define MAX_CREQ_IDX_PER_PG (BNXT_QPLIB_CREQE_CNT_PER_PG - 1) - -static inline u32 get_creq_pg(u32 val) -{ - return (val & ~MAX_CREQ_IDX_PER_PG) / BNXT_QPLIB_CREQE_CNT_PER_PG; -} - -static inline u32 get_creq_idx(u32 val) -{ - return val & MAX_CREQ_IDX_PER_PG; -} - -#define BNXT_QPLIB_CREQE_PER_PG (PAGE_SIZE / sizeof(struct creq_base)) - #define CREQ_CMP_VALID(hdr, raw_cons, cp_bit) \ (!!((hdr)->v & CREQ_BASE_V) == \ !((raw_cons) & (cp_bit))) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index cab1adf1fed9..7efa6e5dce62 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -347,6 +347,7 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, hwq->depth = hwq_attr->depth; hwq->max_elements = depth; hwq->element_size = stride; + hwq->qe_ppg = pg_size / stride; /* For direct access to the elements */ lvl = hwq->level; if (hwq_attr->sginfo->nopte && hwq->level) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 79109ef6c70c..c29cbd3a2d7b 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -135,6 +135,7 @@ struct bnxt_qplib_hwq { u32 max_elements; u32 depth; u16 element_size; /* Size of each entry */ + u16 qe_ppg; /* queue entry per page */ u32 prod; /* raw */ u32 cons; /* raw */ @@ -304,6 +305,18 @@ static inline u8 bnxt_qplib_base_pg_size(struct bnxt_qplib_hwq *hwq) return pg_size; } +static inline void *bnxt_qplib_get_qe(struct bnxt_qplib_hwq *hwq, + u32 indx, u64 *pg) +{ + u32 pg_num, pg_idx; + + pg_num = (indx / hwq->qe_ppg); + pg_idx = (indx % hwq->qe_ppg); + if (pg) + *pg = (u64)&hwq->pbl_ptr[pg_num]; + return (void *)(hwq->pbl_ptr[pg_num] + hwq->element_size * pg_idx); +} + #define to_bnxt_qplib(ptr, type, member) \ container_of(ptr, type, member) From 8ce111d00e64b52e7c77a5956434384e906f7394 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 2 Apr 2020 14:12:15 -0400 Subject: [PATCH 023/562] RDMA/bnxt_re: Remove dead code from rcfw In the previous refactoring serise there were few leftover functions which are not is use anymore. Removed them as it is a dead code. Fixes: 6f53196bc5e7 ("RDMA/bnxt_re: Refactor doorbell management functions") Link: https://lore.kernel.org/r/1585851136-2316-5-git-send-email-devesh.sharma@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 50 ---------------------- 1 file changed, 50 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index bf384098f4b2..157387636d00 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -112,56 +112,6 @@ struct bnxt_qplib_crsbe { #define CREQ_CMP_VALID(hdr, raw_cons, cp_bit) \ (!!((hdr)->v & CREQ_BASE_V) == \ !((raw_cons) & (cp_bit))) - -#define CREQ_DB_KEY_CP (0x2 << CMPL_DOORBELL_KEY_SFT) -#define CREQ_DB_IDX_VALID CMPL_DOORBELL_IDX_VALID -#define CREQ_DB_IRQ_DIS CMPL_DOORBELL_MASK -#define CREQ_DB_CP_FLAGS_REARM (CREQ_DB_KEY_CP | \ - CREQ_DB_IDX_VALID) -#define CREQ_DB_CP_FLAGS (CREQ_DB_KEY_CP | \ - CREQ_DB_IDX_VALID | \ - CREQ_DB_IRQ_DIS) - -static inline void bnxt_qplib_ring_creq_db64(void __iomem *db, u32 index, - u32 xid, bool arm) -{ - u64 val = 0; - - val = xid & DBC_DBC_XID_MASK; - val |= DBC_DBC_PATH_ROCE; - val |= arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ; - val <<= 32; - val |= index & DBC_DBC_INDEX_MASK; - - writeq(val, db); -} - -static inline void bnxt_qplib_ring_creq_db_rearm(void __iomem *db, u32 raw_cons, - u32 max_elements, u32 xid, - bool gen_p5) -{ - u32 index = raw_cons & (max_elements - 1); - - if (gen_p5) - bnxt_qplib_ring_creq_db64(db, index, xid, true); - else - writel(CREQ_DB_CP_FLAGS_REARM | (index & DBC_DBC32_XID_MASK), - db); -} - -static inline void bnxt_qplib_ring_creq_db(void __iomem *db, u32 raw_cons, - u32 max_elements, u32 xid, - bool gen_p5) -{ - u32 index = raw_cons & (max_elements - 1); - - if (gen_p5) - bnxt_qplib_ring_creq_db64(db, index, xid, true); - else - writel(CREQ_DB_CP_FLAGS | (index & DBC_DBC32_XID_MASK), - db); -} - #define CREQ_ENTRY_POLL_BUDGET 0x100 /* HWQ */ From dd302ee41e6ed204f8d9534d511edc72b5ce5e53 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 13 Apr 2020 16:23:23 +0300 Subject: [PATCH 024/562] RDMA/cma: Limit the scope of rdma_is_consumer_reject function The function is local to cma.c, so let's limit its scope. Link: https://lore.kernel.org/r/20200413132323.930869-1-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 9 +++++++-- include/rdma/rdma_cm.h | 8 -------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 26e6f7df247b..6406a597dfb6 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -91,7 +91,13 @@ const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id, } EXPORT_SYMBOL(rdma_reject_msg); -bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason) +/** + * rdma_is_consumer_reject - return true if the consumer rejected the connect + * request. + * @id: Communication identifier that received the REJECT event. + * @reason: Value returned in the REJECT event status field. + */ +static bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason) { if (rdma_ib_or_roce(id->device, id->port_num)) return reason == IB_CM_REJ_CONSUMER_DEFINED; @@ -102,7 +108,6 @@ bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason) WARN_ON_ONCE(1); return false; } -EXPORT_SYMBOL(rdma_is_consumer_reject); const void *rdma_consumer_reject_data(struct rdma_cm_id *id, struct rdma_cm_event *ev, u8 *data_len) diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 71f48cfdc24c..ea8e794785ed 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -389,14 +389,6 @@ __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr); */ const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id, int reason); -/** - * rdma_is_consumer_reject - return true if the consumer rejected the connect - * request. - * @id: Communication identifier that received the REJECT event. - * @reason: Value returned in the REJECT event status field. - */ -bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason); - /** * rdma_consumer_reject_data - return the consumer reject private data and * length, if any. From d221aed19c80d83c2de2fa5e8a5ba045065c6d4b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sun, 12 Apr 2020 19:13:59 -0700 Subject: [PATCH 025/562] scsi: qla2xxx: Use ARRAY_SIZE() instead of open-coding it This patch does not change any functionality. Link: https://lore.kernel.org/r/20200413021359.21725-1-bvanassche@acm.org Cc: Nilesh Javali Cc: Quinn Tran Cc: Himanshu Madhani Cc: Martin Wilck Cc: Daniel Wagner Cc: Roman Bolshakov Reviewed-by: Daniel Wagner Reviewed-by: Roman Bolshakov Reviewed-by: Himanshu Madhani Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_dbg.c | 36 +++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index f301a8048b2f..8b7d0e476773 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -778,16 +778,16 @@ qla2300_fw_dump(scsi_qla_host_t *vha, int hardware_locked) if (rval == QLA_SUCCESS) { dmp_reg = ®->flash_address; - for (cnt = 0; cnt < sizeof(fw->pbiu_reg) / 2; cnt++, dmp_reg++) + for (cnt = 0; cnt < ARRAY_SIZE(fw->pbiu_reg); cnt++, dmp_reg++) fw->pbiu_reg[cnt] = htons(RD_REG_WORD(dmp_reg)); dmp_reg = ®->u.isp2300.req_q_in; - for (cnt = 0; cnt < sizeof(fw->risc_host_reg) / 2; + for (cnt = 0; cnt < ARRAY_SIZE(fw->risc_host_reg); cnt++, dmp_reg++) fw->risc_host_reg[cnt] = htons(RD_REG_WORD(dmp_reg)); dmp_reg = ®->u.isp2300.mailbox0; - for (cnt = 0; cnt < sizeof(fw->mailbox_reg) / 2; + for (cnt = 0; cnt < ARRAY_SIZE(fw->mailbox_reg); cnt++, dmp_reg++) fw->mailbox_reg[cnt] = htons(RD_REG_WORD(dmp_reg)); @@ -799,7 +799,7 @@ qla2300_fw_dump(scsi_qla_host_t *vha, int hardware_locked) WRT_REG_WORD(®->ctrl_status, 0x00); dmp_reg = ®->risc_hw; - for (cnt = 0; cnt < sizeof(fw->risc_hdw_reg) / 2; + for (cnt = 0; cnt < ARRAY_SIZE(fw->risc_hdw_reg); cnt++, dmp_reg++) fw->risc_hdw_reg[cnt] = htons(RD_REG_WORD(dmp_reg)); @@ -860,12 +860,12 @@ qla2300_fw_dump(scsi_qla_host_t *vha, int hardware_locked) /* Get RISC SRAM. */ if (rval == QLA_SUCCESS) rval = qla2xxx_dump_ram(ha, 0x800, fw->risc_ram, - sizeof(fw->risc_ram) / 2, &nxt); + ARRAY_SIZE(fw->risc_ram), &nxt); /* Get stack SRAM. */ if (rval == QLA_SUCCESS) rval = qla2xxx_dump_ram(ha, 0x10000, fw->stack_ram, - sizeof(fw->stack_ram) / 2, &nxt); + ARRAY_SIZE(fw->stack_ram), &nxt); /* Get data SRAM. */ if (rval == QLA_SUCCESS) @@ -944,7 +944,7 @@ qla2100_fw_dump(scsi_qla_host_t *vha, int hardware_locked) } if (rval == QLA_SUCCESS) { dmp_reg = ®->flash_address; - for (cnt = 0; cnt < sizeof(fw->pbiu_reg) / 2; cnt++, dmp_reg++) + for (cnt = 0; cnt < ARRAY_SIZE(fw->pbiu_reg); cnt++, dmp_reg++) fw->pbiu_reg[cnt] = htons(RD_REG_WORD(dmp_reg)); dmp_reg = ®->u.isp2100.mailbox0; @@ -956,12 +956,12 @@ qla2100_fw_dump(scsi_qla_host_t *vha, int hardware_locked) } dmp_reg = ®->u.isp2100.unused_2[0]; - for (cnt = 0; cnt < sizeof(fw->dma_reg) / 2; cnt++, dmp_reg++) + for (cnt = 0; cnt < ARRAY_SIZE(fw->dma_reg); cnt++, dmp_reg++) fw->dma_reg[cnt] = htons(RD_REG_WORD(dmp_reg)); WRT_REG_WORD(®->ctrl_status, 0x00); dmp_reg = ®->risc_hw; - for (cnt = 0; cnt < sizeof(fw->risc_hdw_reg) / 2; cnt++, dmp_reg++) + for (cnt = 0; cnt < ARRAY_SIZE(fw->risc_hdw_reg); cnt++, dmp_reg++) fw->risc_hdw_reg[cnt] = htons(RD_REG_WORD(dmp_reg)); WRT_REG_WORD(®->pcr, 0x2000); @@ -1041,7 +1041,7 @@ qla2100_fw_dump(scsi_qla_host_t *vha, int hardware_locked) WRT_MAILBOX_REG(ha, reg, 0, MBC_READ_RAM_WORD); clear_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags); } - for (cnt = 0; cnt < sizeof(fw->risc_ram) / 2 && rval == QLA_SUCCESS; + for (cnt = 0; cnt < ARRAY_SIZE(fw->risc_ram) && rval == QLA_SUCCESS; cnt++, risc_address++) { WRT_MAILBOX_REG(ha, reg, 1, risc_address); WRT_REG_WORD(®->hccr, HCCR_SET_HOST_INT); @@ -1145,7 +1145,7 @@ qla24xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) /* Host interface registers. */ dmp_reg = ®->flash_addr; - for (cnt = 0; cnt < sizeof(fw->host_reg) / 4; cnt++, dmp_reg++) + for (cnt = 0; cnt < ARRAY_SIZE(fw->host_reg); cnt++, dmp_reg++) fw->host_reg[cnt] = htonl(RD_REG_DWORD(dmp_reg)); /* Disable interrupts. */ @@ -1178,7 +1178,7 @@ qla24xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) /* Mailbox registers. */ mbx_reg = ®->mailbox0; - for (cnt = 0; cnt < sizeof(fw->mailbox_reg) / 2; cnt++, mbx_reg++) + for (cnt = 0; cnt < ARRAY_SIZE(fw->mailbox_reg); cnt++, mbx_reg++) fw->mailbox_reg[cnt] = htons(RD_REG_WORD(mbx_reg)); /* Transfer sequence registers. */ @@ -1421,7 +1421,7 @@ qla25xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) /* Host interface registers. */ dmp_reg = ®->flash_addr; - for (cnt = 0; cnt < sizeof(fw->host_reg) / 4; cnt++, dmp_reg++) + for (cnt = 0; cnt < ARRAY_SIZE(fw->host_reg); cnt++, dmp_reg++) fw->host_reg[cnt] = htonl(RD_REG_DWORD(dmp_reg)); /* Disable interrupts. */ @@ -1470,7 +1470,7 @@ qla25xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) /* Mailbox registers. */ mbx_reg = ®->mailbox0; - for (cnt = 0; cnt < sizeof(fw->mailbox_reg) / 2; cnt++, mbx_reg++) + for (cnt = 0; cnt < ARRAY_SIZE(fw->mailbox_reg); cnt++, mbx_reg++) fw->mailbox_reg[cnt] = htons(RD_REG_WORD(mbx_reg)); /* Transfer sequence registers. */ @@ -1745,7 +1745,7 @@ qla81xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) /* Host interface registers. */ dmp_reg = ®->flash_addr; - for (cnt = 0; cnt < sizeof(fw->host_reg) / 4; cnt++, dmp_reg++) + for (cnt = 0; cnt < ARRAY_SIZE(fw->host_reg); cnt++, dmp_reg++) fw->host_reg[cnt] = htonl(RD_REG_DWORD(dmp_reg)); /* Disable interrupts. */ @@ -1794,7 +1794,7 @@ qla81xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) /* Mailbox registers. */ mbx_reg = ®->mailbox0; - for (cnt = 0; cnt < sizeof(fw->mailbox_reg) / 2; cnt++, mbx_reg++) + for (cnt = 0; cnt < ARRAY_SIZE(fw->mailbox_reg); cnt++, mbx_reg++) fw->mailbox_reg[cnt] = htons(RD_REG_WORD(mbx_reg)); /* Transfer sequence registers. */ @@ -2093,7 +2093,7 @@ qla83xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) /* Host interface registers. */ dmp_reg = ®->flash_addr; - for (cnt = 0; cnt < sizeof(fw->host_reg) / 4; cnt++, dmp_reg++) + for (cnt = 0; cnt < ARRAY_SIZE(fw->host_reg); cnt++, dmp_reg++) fw->host_reg[cnt] = htonl(RD_REG_DWORD(dmp_reg)); /* Disable interrupts. */ @@ -2142,7 +2142,7 @@ qla83xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) /* Mailbox registers. */ mbx_reg = ®->mailbox0; - for (cnt = 0; cnt < sizeof(fw->mailbox_reg) / 2; cnt++, mbx_reg++) + for (cnt = 0; cnt < ARRAY_SIZE(fw->mailbox_reg); cnt++, mbx_reg++) fw->mailbox_reg[cnt] = htons(RD_REG_WORD(mbx_reg)); /* Transfer sequence registers. */ From 78e8df28c5c3063a14669c5e029de8aa176a8389 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Tue, 7 Apr 2020 12:21:14 -0400 Subject: [PATCH 026/562] scsi: gdth: Make __gdth_execute static Fix sparse warning: drivers/scsi/gdth.c:332:5: warning: symbol '__gdth_execute' was not declared. Should it be static? Link: https://lore.kernel.org/r/1586276474-34480-1-git-send-email-wanghai38@huawei.com Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: Martin K. Petersen --- drivers/scsi/gdth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c index fe03410268e6..7f150d52b4a6 100644 --- a/drivers/scsi/gdth.c +++ b/drivers/scsi/gdth.c @@ -329,8 +329,8 @@ static void gdth_scsi_done(struct scsi_cmnd *scp) scp->scsi_done(scp); } -int __gdth_execute(struct scsi_device *sdev, gdth_cmd_str *gdtcmd, char *cmnd, - int timeout, u32 *info) +static int __gdth_execute(struct scsi_device *sdev, gdth_cmd_str *gdtcmd, + char *cmnd, int timeout, u32 *info) { gdth_ha_str *ha = shost_priv(sdev->host); struct scsi_cmnd *scp; From 037773f3b6dd9559a02315047f61eee7b7c96011 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 7 Apr 2020 11:21:56 +0800 Subject: [PATCH 027/562] scsi: bfa: bfa_svc.c: make two functions static Fix the following sparse warning: drivers/scsi/bfa/bfa_svc.c:4288:1: warning: symbol 'bfa_fcport_ddportenable' was not declared. Should it be static? drivers/scsi/bfa/bfa_svc.c:4297:1: warning: symbol 'bfa_fcport_ddportdisable' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200407032202.36789-2-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfa_svc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/bfa/bfa_svc.c b/drivers/scsi/bfa/bfa_svc.c index 6d2131441f0a..d8a9e40fa257 100644 --- a/drivers/scsi/bfa/bfa_svc.c +++ b/drivers/scsi/bfa/bfa_svc.c @@ -4284,7 +4284,7 @@ bfa_fcport_dportdisable(struct bfa_s *bfa) bfa_port_set_dportenabled(&bfa->modules.port, BFA_FALSE); } -void +static void bfa_fcport_ddportenable(struct bfa_s *bfa) { /* @@ -4293,7 +4293,7 @@ bfa_fcport_ddportenable(struct bfa_s *bfa) bfa_sm_send_event(BFA_FCPORT_MOD(bfa), BFA_FCPORT_SM_DDPORTENABLE); } -void +static void bfa_fcport_ddportdisable(struct bfa_s *bfa) { /* From b9ddee1ee15f0dc17cdb919cadf837cd6429d7a1 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 7 Apr 2020 11:21:57 +0800 Subject: [PATCH 028/562] scsi: bfa: bfa_core.c: make bfa_isr_rspq() static Fix the following sparse warning: drivers/scsi/bfa/bfa_core.c:712:1: warning: symbol 'bfa_isr_rspq' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200407032202.36789-3-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfa_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/bfa/bfa_core.c b/drivers/scsi/bfa/bfa_core.c index 0f554ebb8f2c..fb4c469bd89f 100644 --- a/drivers/scsi/bfa/bfa_core.c +++ b/drivers/scsi/bfa/bfa_core.c @@ -708,7 +708,7 @@ bfa_reqq_resume(struct bfa_s *bfa, int qid) } } -bfa_boolean_t +static bfa_boolean_t bfa_isr_rspq(struct bfa_s *bfa, int qid) { struct bfi_msg_s *m; From 84a2fd236d75bb72d44d56192f21f627f0421853 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 7 Apr 2020 11:21:58 +0800 Subject: [PATCH 029/562] scsi: bfa: bfa_fcpim.c: make two functions static Fix the following sparse warning: drivers/scsi/bfa/bfa_fcpim.c:440:1: warning: symbol 'bfa_ioim_profile_comp' was not declared. Should it be static? drivers/scsi/bfa/bfa_fcpim.c:457:1: warning: symbol 'bfa_ioim_profile_start' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200407032202.36789-4-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfa_fcpim.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/bfa/bfa_fcpim.c b/drivers/scsi/bfa/bfa_fcpim.c index 284baa3b0c8e..766f2b5ed2ab 100644 --- a/drivers/scsi/bfa/bfa_fcpim.c +++ b/drivers/scsi/bfa/bfa_fcpim.c @@ -436,7 +436,7 @@ bfa_fcpim_port_iostats(struct bfa_s *bfa, return BFA_STATUS_OK; } -void +static void bfa_ioim_profile_comp(struct bfa_ioim_s *ioim) { struct bfa_itnim_latency_s *io_lat = @@ -453,7 +453,7 @@ bfa_ioim_profile_comp(struct bfa_ioim_s *ioim) io_lat->avg[idx] += val; } -void +static void bfa_ioim_profile_start(struct bfa_ioim_s *ioim) { ioim->start_time = jiffies; From 901ad27c6998817b2b6abeb55f9b24cf189cbcfe Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 7 Apr 2020 11:21:59 +0800 Subject: [PATCH 030/562] scsi: bfa: bfa_fcs_lport.c: make bfa_fcport_get_loop_attr() static Fix the following sparse warning: drivers/scsi/bfa/bfa_fcs_lport.c:1287:1: warning: symbol 'bfa_fcport_get_loop_attr' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200407032202.36789-5-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfa_fcs_lport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/bfa/bfa_fcs_lport.c b/drivers/scsi/bfa/bfa_fcs_lport.c index 7c3eadc58b98..e09bf0729deb 100644 --- a/drivers/scsi/bfa/bfa_fcs_lport.c +++ b/drivers/scsi/bfa/bfa_fcs_lport.c @@ -1283,7 +1283,7 @@ bfa_fcs_lport_n2n_offline(struct bfa_fcs_lport_s *port) n2n_port->reply_oxid = 0; } -void +static void bfa_fcport_get_loop_attr(struct bfa_fcs_lport_s *port) { int i = 0, j = 0, bit = 0, alpa_bit = 0; From eae9b1788ab2418a96466e096a2fdb1c40848e11 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 7 Apr 2020 11:22:00 +0800 Subject: [PATCH 031/562] scsi: bfa: bfa_ioc_ct.c: make two funcitons static Fix the following sparse warning: drivers/scsi/bfa/bfa_ioc_ct.c:368:1: warning: symbol 'bfa_ioc_ct2_lpu_read_stat' was not declared. Should it be static? drivers/scsi/bfa/bfa_ioc_ct.c:748:1: warning: symbol 'bfa_ioc_ct2_mac_reset' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200407032202.36789-6-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfa_ioc_ct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/bfa/bfa_ioc_ct.c b/drivers/scsi/bfa/bfa_ioc_ct.c index 18b58b2f304f..6fd3383ee538 100644 --- a/drivers/scsi/bfa/bfa_ioc_ct.c +++ b/drivers/scsi/bfa/bfa_ioc_ct.c @@ -364,7 +364,7 @@ bfa_ioc_ct_isr_mode_set(struct bfa_ioc_s *ioc, bfa_boolean_t msix) writel(r32, rb + FNC_PERS_REG); } -bfa_boolean_t +static bfa_boolean_t bfa_ioc_ct2_lpu_read_stat(struct bfa_ioc_s *ioc) { u32 r32; @@ -744,7 +744,7 @@ bfa_ioc_ct2_mem_init(void __iomem *rb) writel(0, (rb + CT2_MBIST_CTL_REG)); } -void +static void bfa_ioc_ct2_mac_reset(void __iomem *rb) { /* put port0, port1 MAC & AHB in reset */ From 938b18462ecd8b47e2852cad676516031b315edb Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 7 Apr 2020 11:22:01 +0800 Subject: [PATCH 032/562] scsi: bfa: bfad_attr.c: make two funcitons static Fix the following sparse warning: drivers/scsi/bfa/bfad_attr.c:441:1: warning: symbol 'bfad_im_issue_fc_host_lip' was not declared. Should it be static? drivers/scsi/bfa/bfad_attr.c:566:1: warning: symbol 'bfad_im_vport_set_symbolic_name' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200407032202.36789-7-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfad_attr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/bfa/bfad_attr.c b/drivers/scsi/bfa/bfad_attr.c index fbfce02e5b93..5ae1e3f78910 100644 --- a/drivers/scsi/bfa/bfad_attr.c +++ b/drivers/scsi/bfa/bfad_attr.c @@ -437,7 +437,7 @@ bfad_im_vport_create(struct fc_vport *fc_vport, bool disable) return status; } -int +static int bfad_im_issue_fc_host_lip(struct Scsi_Host *shost) { struct bfad_im_port_s *im_port = @@ -562,7 +562,7 @@ bfad_im_vport_disable(struct fc_vport *fc_vport, bool disable) return 0; } -void +static void bfad_im_vport_set_symbolic_name(struct fc_vport *fc_vport) { struct bfad_vport_s *vport = (struct bfad_vport_s *)fc_vport->dd_data; From 4d5956fb5137423dc916994b632aff942454f23b Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 7 Apr 2020 11:22:02 +0800 Subject: [PATCH 033/562] scsi: bfa: bfad.c: make max_rport_logins static Fix the following sparse warning: drivers/scsi/bfa/bfad.c:53:17: warning: symbol 'max_rport_logins' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200407032202.36789-8-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index eb0c76338295..bc5d84f87d8f 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -50,7 +50,7 @@ int pcie_max_read_reqsz; int bfa_debugfs_enable = 1; int msix_disable_cb = 0, msix_disable_ct = 0; int max_xfer_size = BFAD_MAX_SECTORS >> 1; -int max_rport_logins = BFA_FCS_MAX_RPORT_LOGINS; +static int max_rport_logins = BFA_FCS_MAX_RPORT_LOGINS; /* Firmware releated */ u32 bfi_image_cb_size, bfi_image_ct_size, bfi_image_ct2_size; From 02ff107046e0fe469a62cabea4a583d88dd34839 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 7 Apr 2020 17:28:24 +0800 Subject: [PATCH 034/562] scsi: megaraid: make two symbols static in megaraid_mbox.c Fix the following sparse warning: drivers/scsi/megaraid/megaraid_mbox.c:305:5: warning: symbol 'dev_attr_megaraid_mbox_app_hndl' was not declared. Should it be static? drivers/scsi/megaraid/megaraid_mbox.c:315:5: warning: symbol 'dev_attr_megaraid_mbox_ld' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200407092827.18074-2-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_mbox.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c index 8443f2f35be2..8f918df631bf 100644 --- a/drivers/scsi/megaraid/megaraid_mbox.c +++ b/drivers/scsi/megaraid/megaraid_mbox.c @@ -302,8 +302,8 @@ static struct pci_driver megaraid_pci_driver = { // definitions for the device attributes for exporting logical drive number // for a scsi address (Host, Channel, Id, Lun) -DEVICE_ATTR(megaraid_mbox_app_hndl, S_IRUSR, megaraid_sysfs_show_app_hndl, - NULL); +static DEVICE_ATTR(megaraid_mbox_app_hndl, S_IRUSR, megaraid_sysfs_show_app_hndl, + NULL); // Host template initializer for megaraid mbox sysfs device attributes static struct device_attribute *megaraid_shost_attrs[] = { @@ -312,7 +312,7 @@ static struct device_attribute *megaraid_shost_attrs[] = { }; -DEVICE_ATTR(megaraid_mbox_ld, S_IRUSR, megaraid_sysfs_show_ldnum, NULL); +static DEVICE_ATTR(megaraid_mbox_ld, S_IRUSR, megaraid_sysfs_show_ldnum, NULL); // Host template initializer for megaraid mbox sysfs device attributes static struct device_attribute *megaraid_sdev_attrs[] = { From 3c3c6f663baa5d2089771697829de19a43f93f0b Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 7 Apr 2020 17:28:25 +0800 Subject: [PATCH 035/562] scsi: megaraid: make some symbols static in megaraid_sas_fp.c Fix the following sparse warning: drivers/scsi/megaraid/megaraid_sas_fp.c:88:5: warning: symbol 'mega_div64_32' was not declared. Should it be static? drivers/scsi/megaraid/megaraid_sas_fp.c:370:5: warning: symbol 'MR_GetSpanBlock' was not declared. Should it be static? drivers/scsi/megaraid/megaraid_sas_fp.c:420:5: warning: symbol 'mr_spanset_get_span_block' was not declared. Should it be static? drivers/scsi/megaraid/megaraid_sas_fp.c:645:4: warning: symbol 'get_arm' was not declared. Should it be static? drivers/scsi/megaraid/megaraid_sas_fp.c:788:4: warning: symbol 'MR_GetPhyParams' was not declared. Should it be static? drivers/scsi/megaraid/megaraid_sas_fp.c:1345:4: warning: symbol 'megasas_get_best_arm_pd' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200407092827.18074-3-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_fp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fp.c b/drivers/scsi/megaraid/megaraid_sas_fp.c index 89c3685f5163..3b3d04d7671f 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fp.c +++ b/drivers/scsi/megaraid/megaraid_sas_fp.c @@ -85,7 +85,7 @@ u32 mega_mod64(u64 dividend, u32 divisor) * * @return quotient **/ -u64 mega_div64_32(uint64_t dividend, uint32_t divisor) +static u64 mega_div64_32(uint64_t dividend, uint32_t divisor) { u32 remainder; u64 d; @@ -367,7 +367,7 @@ u8 MR_ValidateMapInfo(struct megasas_instance *instance, u64 map_id) return 1; } -u32 MR_GetSpanBlock(u32 ld, u64 row, u64 *span_blk, +static u32 MR_GetSpanBlock(u32 ld, u64 row, u64 *span_blk, struct MR_DRV_RAID_MAP_ALL *map) { struct MR_SPAN_BLOCK_INFO *pSpanBlock = MR_LdSpanInfoGet(ld, map); @@ -417,7 +417,7 @@ u32 MR_GetSpanBlock(u32 ld, u64 row, u64 *span_blk, * div_error - Devide error code. */ -u32 mr_spanset_get_span_block(struct megasas_instance *instance, +static u32 mr_spanset_get_span_block(struct megasas_instance *instance, u32 ld, u64 row, u64 *span_blk, struct MR_DRV_RAID_MAP_ALL *map) { struct fusion_context *fusion = instance->ctrl_context; @@ -642,7 +642,7 @@ static u32 get_arm_from_strip(struct megasas_instance *instance, } /* This Function will return Phys arm */ -u8 get_arm(struct megasas_instance *instance, u32 ld, u8 span, u64 stripe, +static u8 get_arm(struct megasas_instance *instance, u32 ld, u8 span, u64 stripe, struct MR_DRV_RAID_MAP_ALL *map) { struct MR_LD_RAID *raid = MR_LdRaidGet(ld, map); @@ -785,7 +785,7 @@ static u8 mr_spanset_get_phy_params(struct megasas_instance *instance, u32 ld, * span - Span number * block - Absolute Block number in the physical disk */ -u8 MR_GetPhyParams(struct megasas_instance *instance, u32 ld, u64 stripRow, +static u8 MR_GetPhyParams(struct megasas_instance *instance, u32 ld, u64 stripRow, u16 stripRef, struct IO_REQUEST_INFO *io_info, struct RAID_CONTEXT *pRAID_Context, struct MR_DRV_RAID_MAP_ALL *map) @@ -1342,7 +1342,7 @@ void mr_update_load_balance_params(struct MR_DRV_RAID_MAP_ALL *drv_map, } } -u8 megasas_get_best_arm_pd(struct megasas_instance *instance, +static u8 megasas_get_best_arm_pd(struct megasas_instance *instance, struct LD_LOAD_BALANCE_INFO *lbInfo, struct IO_REQUEST_INFO *io_info, struct MR_DRV_RAID_MAP_ALL *drv_map) From 057d1c0d1b03707bffb8c54c2ca104b049d1ce5d Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 7 Apr 2020 17:28:26 +0800 Subject: [PATCH 036/562] scsi: megaraid: make some symbols static in megaraid_sas_fusion.c Fix the following sparse warning: drivers/scsi/megaraid/megaraid_sas_fusion.c:180:1: warning: symbol 'megasas_enable_intr_fusion' was not declared. Should it be static? drivers/scsi/megaraid/megaraid_sas_fusion.c:202:1: warning: symbol 'megasas_disable_intr_fusion' was not declared. Should it be static? drivers/scsi/megaraid/megaraid_sas_fusion.c:4233:6: warning: symbol 'megasas_refire_mgmt_cmd' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200407092827.18074-4-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index b2ad96564484..bec3d4cca74f 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -176,7 +176,7 @@ static inline bool megasas_check_same_4gb_region * megasas_enable_intr_fusion - Enables interrupts * @regs: MFI register set */ -void +static void megasas_enable_intr_fusion(struct megasas_instance *instance) { struct megasas_register_set __iomem *regs; @@ -198,7 +198,7 @@ megasas_enable_intr_fusion(struct megasas_instance *instance) * megasas_disable_intr_fusion - Disables interrupt * @regs: MFI register set */ -void +static void megasas_disable_intr_fusion(struct megasas_instance *instance) { u32 mask = 0xFFFFFFFF; @@ -4230,7 +4230,7 @@ void megasas_reset_reply_desc(struct megasas_instance *instance) * megasas_refire_mgmt_cmd : Re-fire management commands * @instance: Controller's soft instance */ -void megasas_refire_mgmt_cmd(struct megasas_instance *instance, +static void megasas_refire_mgmt_cmd(struct megasas_instance *instance, bool return_ioctl) { int j; From 1909a4386c7e66d307e9c4383efb2d598a475b0e Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 7 Apr 2020 17:28:27 +0800 Subject: [PATCH 037/562] scsi: megaraid: make two symbols static in megaraid_sas_base.c Fix the following sparse warning: drivers/scsi/megaraid/megaraid_sas_base.c:84:5: warning: symbol 'rdpq_enable' was not declared. Should it be static? drivers/scsi/megaraid/megaraid_sas_base.c:92:14: warning: symbol 'scmd_timeout' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200407092827.18074-5-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index babe85d7b537..fb9c3ceed508 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -81,7 +81,7 @@ int smp_affinity_enable = 1; module_param(smp_affinity_enable, int, 0444); MODULE_PARM_DESC(smp_affinity_enable, "SMP affinity feature enable/disable Default: enable(1)"); -int rdpq_enable = 1; +static int rdpq_enable = 1; module_param(rdpq_enable, int, 0444); MODULE_PARM_DESC(rdpq_enable, "Allocate reply queue in chunks for large queue depth enable/disable Default: enable(1)"); @@ -89,7 +89,7 @@ unsigned int dual_qdepth_disable; module_param(dual_qdepth_disable, int, 0444); MODULE_PARM_DESC(dual_qdepth_disable, "Disable dual queue depth feature. Default: 0"); -unsigned int scmd_timeout = MEGASAS_DEFAULT_CMD_TIMEOUT; +static unsigned int scmd_timeout = MEGASAS_DEFAULT_CMD_TIMEOUT; module_param(scmd_timeout, int, 0444); MODULE_PARM_DESC(scmd_timeout, "scsi command timeout (10-90s), default 90s. See megasas_reset_timer."); From 5d296cc9ab6e5cafaef2f81e1376320a6b6de50b Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Thu, 9 Apr 2020 16:49:10 +0800 Subject: [PATCH 038/562] scsi: qla1280: make qla1280_firmware_mutex and qla1280_fw_tbl static Fix the following sparse warning: drivers/scsi/qla1280.c:529:1: warning: symbol 'qla1280_firmware_mutex' was not declared. Should it be static? drivers/scsi/qla1280.c:538:15: warning: symbol 'qla1280_fw_tbl' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200409084910.44336-1-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/qla1280.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c index 3337cd341d21..441a45349349 100644 --- a/drivers/scsi/qla1280.c +++ b/drivers/scsi/qla1280.c @@ -526,7 +526,7 @@ static struct pci_device_id qla1280_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, qla1280_pci_tbl); -DEFINE_MUTEX(qla1280_firmware_mutex); +static DEFINE_MUTEX(qla1280_firmware_mutex); struct qla_fw { char *fwname; @@ -535,7 +535,7 @@ struct qla_fw { #define QL_NUM_FW_IMAGES 3 -struct qla_fw qla1280_fw_tbl[QL_NUM_FW_IMAGES] = { +static struct qla_fw qla1280_fw_tbl[QL_NUM_FW_IMAGES] = { {"qlogic/1040.bin", NULL}, /* image 0 */ {"qlogic/1280.bin", NULL}, /* image 1 */ {"qlogic/12160.bin", NULL}, /* image 2 */ From 4a0b746fa9a7e7936fa4511116aab5a98d1af49a Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Mon, 13 Apr 2020 16:28:22 +0800 Subject: [PATCH 039/562] scsi: ipr: remove unneeded semicolon Fix the following coccicheck warning: drivers/scsi/ipr.c:1167:2-3: Unneeded semicolon Link: https://lore.kernel.org/r/20200413082822.24356-1-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/ipr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index d48a8fa997b9..0db37b4f7265 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -1164,7 +1164,7 @@ static void ipr_update_ata_class(struct ipr_resource_entry *res, unsigned int pr default: res->ata_class = ATA_DEV_UNKNOWN; break; - }; + } } /** From 1e4ffb8344c53b3d8297c9591c59debe4406e6cb Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Sat, 11 Apr 2020 01:19:30 +0100 Subject: [PATCH 040/562] scsi: libsas: Add missing annotation for sas_ata_qc_issue() Sparse reports a warning at sas_ata_qc_issue() warning: context imbalance in sas_ata_qc_issue() - unexpected unlock The root cause is the missing annotation at sas_ata_qc_issue() Add the missing __must_hold(ap->lock) annotation Link: https://lore.kernel.org/r/20200411001933.10072-7-jbi.octave@gmail.com Reviewed-by: John Garry Signed-off-by: Jules Irenge Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_ata.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index c5a828a041e0..5d716d388707 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -160,6 +160,7 @@ static void sas_ata_task_done(struct sas_task *task) } static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) + __must_hold(ap->lock) { struct sas_task *task; struct scatterlist *sg; From ea06a482a47c41f5d04565dffbc21156bcfdd3e8 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sat, 11 Apr 2020 20:33:51 -0500 Subject: [PATCH 041/562] gpiolib: of: improve gpiolib-of support of pull up/down on expanders When using GPIO expanders attached to I2C ports, their set_config function needs to be passed a config setting which contains options to enable pull up or pull down bias feature. In order to set this config properly, the gpio parser needs to handle GPIO_PULL_UP and GPIO_PULL_DOWN. This patch enables the flags corresponding to GPIO_PULL_UP and GPIO_PULL_DOWN. Signed-off-by: Adam Ford Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index ccc449df3792..2c5dd1349f16 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -344,6 +344,12 @@ struct gpio_desc *gpiod_get_from_of_node(struct device_node *node, if (transitory) lflags |= GPIO_TRANSITORY; + if (flags & OF_GPIO_PULL_UP) + lflags |= GPIO_PULL_UP; + + if (flags & OF_GPIO_PULL_DOWN) + lflags |= GPIO_PULL_DOWN; + ret = gpiod_configure_flags(desc, propname, lflags, dflags); if (ret < 0) { gpiod_put(desc); @@ -585,6 +591,10 @@ static struct gpio_desc *of_parse_own_gpio(struct device_node *np, *lflags |= GPIO_ACTIVE_LOW; if (xlate_flags & OF_GPIO_TRANSITORY) *lflags |= GPIO_TRANSITORY; + if (xlate_flags & OF_GPIO_PULL_UP) + *lflags |= GPIO_PULL_UP; + if (xlate_flags & OF_GPIO_PULL_DOWN) + *lflags |= GPIO_PULL_DOWN; if (of_property_read_bool(np, "input")) *dflags |= GPIOD_IN; From 95a776e8a6282ac19051655c55124e711318ae6e Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 13 Apr 2020 16:39:05 +0300 Subject: [PATCH 042/562] RDMA/rw: use DIV_ROUND_UP to calculate nr_ops Don't open-code DIV_ROUND_UP() kernel macro. Link: https://lore.kernel.org/r/20200413133905.933343-1-leon@kernel.org Signed-off-by: Max Gurtovoy Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 557efbf29197..614cff89fc71 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -129,7 +129,7 @@ static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, qp->integrity_en); int i, j, ret = 0, count = 0; - ctx->nr_ops = (sg_cnt + pages_per_mr - 1) / pages_per_mr; + ctx->nr_ops = DIV_ROUND_UP(sg_cnt, pages_per_mr); ctx->reg = kcalloc(ctx->nr_ops, sizeof(*ctx->reg), GFP_KERNEL); if (!ctx->reg) { ret = -ENOMEM; From 04fd1ca77918a2395d0dc73c9a68436fdfaa26de Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 25 Mar 2020 11:39:56 +0100 Subject: [PATCH 043/562] gpiolib: acpi: Add missing __init(const) markers to initcall-s The gpiolib ACPI code uses 2 initcall-s and the called function (and used DMI table) is missing __init(const) markers. This commit fixes this freeing up some extra memory once the kernel has completed booting. Acked-by: Mika Westerberg Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200325103956.109284-2-hdegoede@redhat.com Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-acpi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 0017367e94ee..9276051663da 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1353,7 +1353,7 @@ int acpi_gpio_count(struct device *dev, const char *con_id) } /* Run deferred acpi_gpiochip_request_irqs() */ -static int acpi_gpio_handle_deferred_request_irqs(void) +static int __init acpi_gpio_handle_deferred_request_irqs(void) { struct acpi_gpio_chip *acpi_gpio, *tmp; @@ -1371,7 +1371,7 @@ static int acpi_gpio_handle_deferred_request_irqs(void) /* We must use _sync so that this runs after the first deferred_probe run */ late_initcall_sync(acpi_gpio_handle_deferred_request_irqs); -static const struct dmi_system_id gpiolib_acpi_quirks[] = { +static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = { { /* * The Minix Neo Z83-4 has a micro-USB-B id-pin handler for @@ -1455,7 +1455,7 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = { {} /* Terminating entry */ }; -static int acpi_gpio_setup_params(void) +static int __init acpi_gpio_setup_params(void) { const struct acpi_gpiolib_dmi_quirk *quirk = NULL; const struct dmi_system_id *id; From 96d7c7b3e6545612c1d37944621fdd611afd6adf Mon Sep 17 00:00:00 2001 From: Paul Thomas Date: Tue, 14 Apr 2020 11:28:42 -0400 Subject: [PATCH 044/562] gpio: gpio-pca953x, Add get_multiple function Implement a get_multiple function for gpio-pca953x. If a driver leaves get_multiple unimplemented then gpio_chip_get_multiple() in gpiolib.c takes care of it by calling chip->get() as needed. For i2c chips this is very inefficient. For example if you do an 8-bit read then instead of a single i2c transaction there are 8 transactions reading the same byte! Signed-off-by: Paul Thomas Acked-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 5638b4e5355f..6317510b0dc3 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -115,6 +115,7 @@ MODULE_DEVICE_TABLE(acpi, pca953x_acpi_ids); #define MAX_BANK 5 #define BANK_SZ 8 +#define BANK_SFT 3 /* ilog2(BANK_SZ) */ #define MAX_LINE (MAX_BANK * BANK_SZ) #define NBANK(chip) DIV_ROUND_UP(chip->gpio_chip.ngpio, BANK_SZ) @@ -466,6 +467,41 @@ static int pca953x_gpio_get_direction(struct gpio_chip *gc, unsigned off) return GPIO_LINE_DIRECTION_OUT; } +static int pca953x_gpio_get_multiple(struct gpio_chip *gc, + unsigned long *mask, unsigned long *bits) +{ + struct pca953x_chip *chip = gpiochip_get_data(gc); + unsigned int reg_val; + int offset, value, i, ret = 0; + u8 inreg; + + /* Force offset outside the range of i so that + * at least the first relevant register is read + */ + offset = gc->ngpio; + for_each_set_bit(i, mask, gc->ngpio) { + /* whenever i goes into a new bank update inreg + * and read the register + */ + if ((offset >> BANK_SFT) != (i >> BANK_SFT)) { + offset = i; + inreg = pca953x_recalc_addr(chip, chip->regs->input, + offset, true, false); + mutex_lock(&chip->i2c_lock); + ret = regmap_read(chip->regmap, inreg, ®_val); + mutex_unlock(&chip->i2c_lock); + if (ret < 0) + return ret; + } + /* reg_val is relative to the last read byte, + * so only shift the relative bits + */ + value = (reg_val >> (i % 8)) & 0x01; + __assign_bit(i, bits, value); + } + return ret; +} + static void pca953x_gpio_set_multiple(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits) { @@ -551,6 +587,7 @@ static void pca953x_setup_gpio(struct pca953x_chip *chip, int gpios) gc->get = pca953x_gpio_get_value; gc->set = pca953x_gpio_set_value; gc->get_direction = pca953x_gpio_get_direction; + gc->get_multiple = pca953x_gpio_get_multiple; gc->set_multiple = pca953x_gpio_set_multiple; gc->set_config = pca953x_gpio_set_config; gc->can_sleep = true; From 5c85418ab35bc84d33947e45b2e0ffe55aa8a484 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 14 Apr 2020 20:48:57 +0300 Subject: [PATCH 045/562] gpio: pch: Use BIT() and GENMASK() where it's appropriate Use BIT() and GENMASK() where it's appropriate. At the same time drop it where it's not appropriate. Signed-off-by: Andy Shevchenko --- drivers/gpio/gpio-pch.c | 45 +++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/gpio/gpio-pch.c b/drivers/gpio/gpio-pch.c index 3f3d9a94b709..03eeacdb04fb 100644 --- a/drivers/gpio/gpio-pch.c +++ b/drivers/gpio/gpio-pch.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2011 LAPIS Semiconductor Co., Ltd. */ +#include #include #include #include @@ -11,11 +12,11 @@ #include #define PCH_EDGE_FALLING 0 -#define PCH_EDGE_RISING BIT(0) -#define PCH_LEVEL_L BIT(1) -#define PCH_LEVEL_H (BIT(0) | BIT(1)) -#define PCH_EDGE_BOTH BIT(2) -#define PCH_IM_MASK (BIT(0) | BIT(1) | BIT(2)) +#define PCH_EDGE_RISING 1 +#define PCH_LEVEL_L 2 +#define PCH_LEVEL_H 3 +#define PCH_EDGE_BOTH 4 +#define PCH_IM_MASK GENMASK(2, 0) #define PCH_IRQ_BASE 24 @@ -103,9 +104,9 @@ static void pch_gpio_set(struct gpio_chip *gpio, unsigned nr, int val) spin_lock_irqsave(&chip->spinlock, flags); reg_val = ioread32(&chip->reg->po); if (val) - reg_val |= (1 << nr); + reg_val |= BIT(nr); else - reg_val &= ~(1 << nr); + reg_val &= ~BIT(nr); iowrite32(reg_val, &chip->reg->po); spin_unlock_irqrestore(&chip->spinlock, flags); @@ -115,7 +116,7 @@ static int pch_gpio_get(struct gpio_chip *gpio, unsigned nr) { struct pch_gpio *chip = gpiochip_get_data(gpio); - return (ioread32(&chip->reg->pi) >> nr) & 1; + return !!(ioread32(&chip->reg->pi) & BIT(nr)); } static int pch_gpio_direction_output(struct gpio_chip *gpio, unsigned nr, @@ -130,13 +131,14 @@ static int pch_gpio_direction_output(struct gpio_chip *gpio, unsigned nr, reg_val = ioread32(&chip->reg->po); if (val) - reg_val |= (1 << nr); + reg_val |= BIT(nr); else - reg_val &= ~(1 << nr); + reg_val &= ~BIT(nr); iowrite32(reg_val, &chip->reg->po); - pm = ioread32(&chip->reg->pm) & ((1 << gpio_pins[chip->ioh]) - 1); - pm |= (1 << nr); + pm = ioread32(&chip->reg->pm); + pm &= BIT(gpio_pins[chip->ioh]) - 1; + pm |= BIT(nr); iowrite32(pm, &chip->reg->pm); spin_unlock_irqrestore(&chip->spinlock, flags); @@ -151,8 +153,9 @@ static int pch_gpio_direction_input(struct gpio_chip *gpio, unsigned nr) unsigned long flags; spin_lock_irqsave(&chip->spinlock, flags); - pm = ioread32(&chip->reg->pm) & ((1 << gpio_pins[chip->ioh]) - 1); - pm &= ~(1 << nr); + pm = ioread32(&chip->reg->pm); + pm &= BIT(gpio_pins[chip->ioh]) - 1; + pm &= ~BIT(nr); iowrite32(pm, &chip->reg->pm); spin_unlock_irqrestore(&chip->spinlock, flags); @@ -277,7 +280,7 @@ static void pch_irq_unmask(struct irq_data *d) struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct pch_gpio *chip = gc->private; - iowrite32(1 << (d->irq - chip->irq_base), &chip->reg->imaskclr); + iowrite32(BIT(d->irq - chip->irq_base), &chip->reg->imaskclr); } static void pch_irq_mask(struct irq_data *d) @@ -285,7 +288,7 @@ static void pch_irq_mask(struct irq_data *d) struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct pch_gpio *chip = gc->private; - iowrite32(1 << (d->irq - chip->irq_base), &chip->reg->imask); + iowrite32(BIT(d->irq - chip->irq_base), &chip->reg->imask); } static void pch_irq_ack(struct irq_data *d) @@ -293,7 +296,7 @@ static void pch_irq_ack(struct irq_data *d) struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct pch_gpio *chip = gc->private; - iowrite32(1 << (d->irq - chip->irq_base), &chip->reg->iclr); + iowrite32(BIT(d->irq - chip->irq_base), &chip->reg->iclr); } static irqreturn_t pch_gpio_handler(int irq, void *dev_id) @@ -344,7 +347,6 @@ static int pch_gpio_probe(struct pci_dev *pdev, s32 ret; struct pch_gpio *chip; int irq_base; - u32 msk; chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL); if (chip == NULL) @@ -357,7 +359,7 @@ static int pch_gpio_probe(struct pci_dev *pdev, return ret; } - ret = pcim_iomap_regions(pdev, 1 << 1, KBUILD_MODNAME); + ret = pcim_iomap_regions(pdev, BIT(1), KBUILD_MODNAME); if (ret) { dev_err(&pdev->dev, "pci_request_regions FAILED-%d", ret); return ret; @@ -393,9 +395,8 @@ static int pch_gpio_probe(struct pci_dev *pdev, chip->irq_base = irq_base; /* Mask all interrupts, but enable them */ - msk = (1 << gpio_pins[chip->ioh]) - 1; - iowrite32(msk, &chip->reg->imask); - iowrite32(msk, &chip->reg->ien); + iowrite32(BIT(gpio_pins[chip->ioh]) - 1, &chip->reg->imask); + iowrite32(BIT(gpio_pins[chip->ioh]) - 1, &chip->reg->ien); ret = devm_request_irq(&pdev->dev, pdev->irq, pch_gpio_handler, IRQF_SHARED, KBUILD_MODNAME, chip); From 5a4245de48d87f9300c3cac7c62e1af18916fb22 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 14 Apr 2020 20:48:58 +0300 Subject: [PATCH 046/562] gpio: pch: Get rid of unneeded variable in IRQ handler There is no need to have an additional variable in IRQ handler. We may simple rely on the fact of having non-zero register value we read from the hardware. While here, drop repetitive messages in time critical function. Signed-off-by: Andy Shevchenko --- drivers/gpio/gpio-pch.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpio/gpio-pch.c b/drivers/gpio/gpio-pch.c index 03eeacdb04fb..708272db6baf 100644 --- a/drivers/gpio/gpio-pch.c +++ b/drivers/gpio/gpio-pch.c @@ -303,14 +303,15 @@ static irqreturn_t pch_gpio_handler(int irq, void *dev_id) { struct pch_gpio *chip = dev_id; unsigned long reg_val = ioread32(&chip->reg->istatus); - int i, ret = IRQ_NONE; + int i; - for_each_set_bit(i, ®_val, gpio_pins[chip->ioh]) { - dev_dbg(chip->dev, "[%d]:irq=%d status=0x%lx\n", i, irq, reg_val); + dev_dbg(chip->dev, "irq=%d status=0x%lx\n", irq, reg_val); + + reg_val &= BIT(gpio_pins[chip->ioh]) - 1; + for_each_set_bit(i, ®_val, gpio_pins[chip->ioh]) generic_handle_irq(chip->irq_base + i); - ret = IRQ_HANDLED; - } - return ret; + + return IRQ_RETVAL(reg_val); } static int pch_gpio_alloc_generic_chip(struct pch_gpio *chip, From 368b8436011ac5138230b98bb34923b7f77ae533 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 14 Apr 2020 20:48:59 +0300 Subject: [PATCH 047/562] gpio: pch: Refactor pch_irq_type() to avoid unnecessary locking When type is not supported there is no need to lock and check. Signed-off-by: Andy Shevchenko --- drivers/gpio/gpio-pch.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpio/gpio-pch.c b/drivers/gpio/gpio-pch.c index 708272db6baf..9c34230f2e84 100644 --- a/drivers/gpio/gpio-pch.c +++ b/drivers/gpio/gpio-pch.c @@ -229,17 +229,15 @@ static int pch_irq_type(struct irq_data *d, unsigned int type) int ch, irq = d->irq; ch = irq - chip->irq_base; - if (irq <= chip->irq_base + 7) { + if (irq < chip->irq_base + 8) { im_reg = &chip->reg->im0; - im_pos = ch; + im_pos = ch - 0; } else { im_reg = &chip->reg->im1; im_pos = ch - 8; } dev_dbg(chip->dev, "irq=%d type=%d ch=%d pos=%d\n", irq, type, ch, im_pos); - spin_lock_irqsave(&chip->spinlock, flags); - switch (type) { case IRQ_TYPE_EDGE_RISING: val = PCH_EDGE_RISING; @@ -257,9 +255,11 @@ static int pch_irq_type(struct irq_data *d, unsigned int type) val = PCH_LEVEL_L; break; default: - goto unlock; + return 0; } + spin_lock_irqsave(&chip->spinlock, flags); + /* Set interrupt mode */ im = ioread32(im_reg) & ~(PCH_IM_MASK << (im_pos * 4)); iowrite32(im | (val << (im_pos * 4)), im_reg); @@ -270,7 +270,6 @@ static int pch_irq_type(struct irq_data *d, unsigned int type) else if (type & (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING)) irq_set_handler_locked(d, handle_edge_irq); -unlock: spin_unlock_irqrestore(&chip->spinlock, flags); return 0; } From 5376b0b31295364f3a166ee82f1a7d893bcbf8e9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 14 Apr 2020 20:49:00 +0300 Subject: [PATCH 048/562] gpio: pch: Use in pch_irq_type() macros provided by IRQ core Use in pch_irq_type() the macros provided by IRQ core for IRQ type. Signed-off-by: Andy Shevchenko --- drivers/gpio/gpio-pch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-pch.c b/drivers/gpio/gpio-pch.c index 9c34230f2e84..e96d28bf43b4 100644 --- a/drivers/gpio/gpio-pch.c +++ b/drivers/gpio/gpio-pch.c @@ -265,9 +265,9 @@ static int pch_irq_type(struct irq_data *d, unsigned int type) iowrite32(im | (val << (im_pos * 4)), im_reg); /* And the handler */ - if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH)) + if (type & IRQ_TYPE_LEVEL_MASK) irq_set_handler_locked(d, handle_level_irq); - else if (type & (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING)) + else if (type & IRQ_TYPE_EDGE_BOTH) irq_set_handler_locked(d, handle_edge_irq); spin_unlock_irqrestore(&chip->spinlock, flags); From 6b1c7837af0e29ad630f0ae18634f6c58a3381ee Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 8 Apr 2020 18:41:54 +0300 Subject: [PATCH 049/562] gpio: merrifield: Switch over to MSI interrupts Some devices may support MSI interrupts. Let's at least try to use them in platforms that provide MSI capability. Signed-off-by: Andy Shevchenko --- drivers/gpio/gpio-merrifield.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c index 48918a016cd8..11e6ea70568a 100644 --- a/drivers/gpio/gpio-merrifield.c +++ b/drivers/gpio/gpio-merrifield.c @@ -473,6 +473,10 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id raw_spin_lock_init(&priv->lock); + retval = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); + if (retval < 0) + return retval; + girq = &priv->chip.irq; girq->chip = &mrfld_irqchip; girq->init_hw = mrfld_irq_init_hw; @@ -482,7 +486,7 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id sizeof(*girq->parents), GFP_KERNEL); if (!girq->parents) return -ENOMEM; - girq->parents[0] = pdev->irq; + girq->parents[0] = pci_irq_vector(pdev, 0); girq->first = irq_base; girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_bad_irq; From 7e73aa90a38c8815acea7af71e285658bf5ab879 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 8 Apr 2020 18:41:55 +0300 Subject: [PATCH 050/562] gpio: merrifield: Better show how GPIO and IRQ bases are derived from hardware It's a bit hard to realize what the BAR1 is for and what is the layout of the data in it. Be slightly more verbose to better show how GPIO and IRQ bases are derived from the hardware. Signed-off-by: Andy Shevchenko --- drivers/gpio/gpio-merrifield.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c index 11e6ea70568a..706687fab634 100644 --- a/drivers/gpio/gpio-merrifield.c +++ b/drivers/gpio/gpio-merrifield.c @@ -443,8 +443,8 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id base = pcim_iomap_table(pdev)[1]; - irq_base = readl(base); - gpio_base = readl(sizeof(u32) + base); + irq_base = readl(base + 0 * sizeof(u32)); + gpio_base = readl(base + 1 * sizeof(u32)); /* Release the IO mapping, since we already get the info from BAR1 */ pcim_iounmap_regions(pdev, BIT(1)); From aa58a21ae37894d456a2f91a37e9fd71ad4aa27e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 7 Apr 2020 17:42:45 +0200 Subject: [PATCH 051/562] gpio: pca953x: disable regmap locking This driver uses its own locking but regmap silently uses a mutex for all operations too. Add the option to disable locking to the regmap config struct. Signed-off-by: Bartosz Golaszewski Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Acked-by: Linus Walleij --- drivers/gpio/gpio-pca953x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 6317510b0dc3..60ae18e4b5f5 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -307,6 +307,7 @@ static const struct regmap_config pca953x_i2c_regmap = { .writeable_reg = pca953x_writeable_register, .volatile_reg = pca953x_volatile_register, + .disable_locking = true, .cache_type = REGCACHE_RBTREE, /* REVISIT: should be 0x7f but some 24 bit chips use REG_ADDR_AI */ .max_register = 0xff, From 657a06df993c92ca404da4c251f7f354f1069eaa Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 23 Mar 2020 22:53:56 +0300 Subject: [PATCH 052/562] dt-bindings: gpio: Convert snps,dw-apb-gpio to DT schema Modern device tree bindings are supposed to be created as YAML-files in accordance with DT schema. This commit replaces Synopsys DW GPIO legacy bare text binding with YAML file. As before the binding file states that the corresponding dts node is supposed to be compatible with generic DW I2C controller indicated by the "snps,dw-apb-gpio" compatible string and to provide a mandatory registers memory range. It may also have an optional clock and reset phandle references. There must be specified at least one subnode with "snps,dw-apb-gpio-port" compatible string indicating the GPIO port, which would actually export the GPIO controller functionality. Such nodes should have traditional GPIO controller properties together with optional interrupt-controller attributes if the corresponding controller was synthesized to detect and report the input values change to the parental IRQ controller. Signed-off-by: Serge Semin Cc: Alexey Malahov Cc: Thomas Bogendoerfer Cc: Paul Burton Cc: Ralf Baechle Reviewed-by: Andy Shevchenko Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200323195401.30338-2-Sergey.Semin@baikalelectronics.ru Signed-off-by: Linus Walleij --- .../bindings/gpio/snps,dw-apb-gpio.yaml | 129 ++++++++++++++++++ .../bindings/gpio/snps-dwapb-gpio.txt | 65 --------- 2 files changed, 129 insertions(+), 65 deletions(-) create mode 100644 Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml delete mode 100644 Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt diff --git a/Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml b/Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml new file mode 100644 index 000000000000..f84f92c4dd53 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml @@ -0,0 +1,129 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/gpio/snps,dw-apb-gpio.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Synopsys DesignWare APB GPIO controller + +description: | + Synopsys DesignWare GPIO controllers have a configurable number of ports, + each of which are intended to be represented as child nodes with the generic + GPIO-controller properties as desribed in this bindings file. + +maintainers: + - Hoan Tran + +properties: + $nodename: + pattern: "^gpio@[0-9a-f]+$" + + compatible: + const: snps,dw-apb-gpio + + "#address-cells": + const: 1 + + "#size-cells": + const: 0 + + reg: + maxItems: 1 + + clocks: + items: + - description: APB interface clock source + + clock-names: + items: + - const: bus + + resets: + maxItems: 1 + +patternProperties: + "^gpio-(port|controller)@[0-9a-f]+$": + type: object + properties: + compatible: + const: snps,dw-apb-gpio-port + + reg: + maxItems: 1 + + gpio-controller: true + + '#gpio-cells': + const: 2 + + snps,nr-gpios: + description: The number of GPIO pins exported by the port. + default: 32 + allOf: + - $ref: /schemas/types.yaml#/definitions/uint32 + - minimum: 1 + maximum: 32 + + interrupts: + description: | + The interrupts to the parent controller raised when GPIOs generate + the interrupts. If the controller provides one combined interrupt + for all GPIOs, specify a single interrupt. If the controller provides + one interrupt for each GPIO, provide a list of interrupts that + correspond to each of the GPIO pins. + minItems: 1 + maxItems: 32 + + interrupt-controller: true + + '#interrupt-cells': + const: 2 + + required: + - compatible + - reg + - gpio-controller + - '#gpio-cells' + + dependencies: + interrupt-controller: [ interrupts ] + + additionalProperties: false + +additionalProperties: false + +required: + - compatible + - reg + - "#address-cells" + - "#size-cells" + +examples: + - | + gpio: gpio@20000 { + compatible = "snps,dw-apb-gpio"; + reg = <0x20000 0x1000>; + #address-cells = <1>; + #size-cells = <0>; + + porta: gpio-port@0 { + compatible = "snps,dw-apb-gpio-port"; + reg = <0>; + gpio-controller; + #gpio-cells = <2>; + snps,nr-gpios = <8>; + interrupt-controller; + #interrupt-cells = <2>; + interrupt-parent = <&vic1>; + interrupts = <0>; + }; + + portb: gpio-port@1 { + compatible = "snps,dw-apb-gpio-port"; + reg = <1>; + gpio-controller; + #gpio-cells = <2>; + snps,nr-gpios = <8>; + }; + }; +... diff --git a/Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt b/Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt deleted file mode 100644 index 839dd32ffe11..000000000000 --- a/Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt +++ /dev/null @@ -1,65 +0,0 @@ -* Synopsys DesignWare APB GPIO controller - -Required properties: -- compatible : Should contain "snps,dw-apb-gpio" -- reg : Address and length of the register set for the device. -- #address-cells : should be 1 (for addressing port subnodes). -- #size-cells : should be 0 (port subnodes). - -The GPIO controller has a configurable number of ports, each of which are -represented as child nodes with the following properties: - -Required properties: -- compatible : "snps,dw-apb-gpio-port" -- gpio-controller : Marks the device node as a gpio controller. -- #gpio-cells : Should be two. The first cell is the pin number and - the second cell is used to specify the gpio polarity: - 0 = active high - 1 = active low -- reg : The integer port index of the port, a single cell. - -Optional properties: -- interrupt-controller : The first port may be configured to be an interrupt -controller. -- #interrupt-cells : Specifies the number of cells needed to encode an - interrupt. Shall be set to 2. The first cell defines the interrupt number, - the second encodes the triger flags encoded as described in - Documentation/devicetree/bindings/interrupt-controller/interrupts.txt -- interrupts : The interrupts to the parent controller raised when GPIOs - generate the interrupts. If the controller provides one combined interrupt - for all GPIOs, specify a single interrupt. If the controller provides one - interrupt for each GPIO, provide a list of interrupts that correspond to each - of the GPIO pins. When specifying multiple interrupts, if any are unconnected, - use the interrupts-extended property to specify the interrupts and set the - interrupt controller handle for unused interrupts to 0. -- snps,nr-gpios : The number of pins in the port, a single cell. -- resets : Reset line for the controller. - -Example: - -gpio: gpio@20000 { - compatible = "snps,dw-apb-gpio"; - reg = <0x20000 0x1000>; - #address-cells = <1>; - #size-cells = <0>; - - porta: gpio@0 { - compatible = "snps,dw-apb-gpio-port"; - gpio-controller; - #gpio-cells = <2>; - snps,nr-gpios = <8>; - reg = <0>; - interrupt-controller; - #interrupt-cells = <2>; - interrupt-parent = <&vic1>; - interrupts = <0>; - }; - - portb: gpio@1 { - compatible = "snps,dw-apb-gpio-port"; - gpio-controller; - #gpio-cells = <2>; - snps,nr-gpios = <8>; - reg = <1>; - }; -}; From 4f18093743673d4e21a5619dc323adb1dc2021e9 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 23 Mar 2020 22:53:57 +0300 Subject: [PATCH 053/562] dt-bindings: gpio: Add DW GPIO debounce clock property Port A of the DW GPIO controller may optionally have a debounce logic enabled if it was synthesized with that functionality enabled. In this case a dedicated reference clock should be declared in the node with corresponding "db" name presented in the clock-names property. Signed-off-by: Serge Semin Reviewed-by: Andy Shevchenko Acked-by: Rob Herring Cc: Alexey Malahov Cc: Thomas Bogendoerfer Cc: Paul Burton Cc: Ralf Baechle Link: https://lore.kernel.org/r/20200323195401.30338-3-Sergey.Semin@baikalelectronics.ru Signed-off-by: Linus Walleij --- Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml b/Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml index f84f92c4dd53..58a8df367623 100644 --- a/Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml +++ b/Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml @@ -31,12 +31,16 @@ properties: maxItems: 1 clocks: + minItems: 1 items: - description: APB interface clock source + - description: DW GPIO debounce reference clock source clock-names: + minItems: 1 items: - const: bus + - const: db resets: maxItems: 1 From c55812afd3954c20c0d06b00ee9271ca6a3d8eab Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 23 Mar 2020 22:53:58 +0300 Subject: [PATCH 054/562] dt-bindings: gpio: Add Sergey Semin to DW APB GPIO driver maintainers Seeing Hoan has been silent for a long time Linus suggested to me to be also maintaining the driver. This patch adds myself to the list of maintainers in the DT schema of the driver. Suggested-by: Linus Walleij Signed-off-by: Serge Semin Reviewed-by: Andy Shevchenko Acked-by: Rob Herring Cc: Hoan Tran Cc: Alexey Malahov Cc: Thomas Bogendoerfer Cc: Paul Burton Cc: Ralf Baechle Link: https://lore.kernel.org/r/20200323195401.30338-4-Sergey.Semin@baikalelectronics.ru Signed-off-by: Linus Walleij --- Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml b/Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml index 58a8df367623..04a3c51e1dc1 100644 --- a/Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml +++ b/Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml @@ -13,6 +13,7 @@ description: | maintainers: - Hoan Tran + - Serge Semin properties: $nodename: From 3ea8094c3b45495a18b75afd27cfda9597bf0e7d Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 23 Mar 2020 22:53:59 +0300 Subject: [PATCH 055/562] gpio: dwapb: Use optional-clocks interface for APB ref-clock The common clocks kernel framework provides a generic way to use an optional reference clock sources. If it's utilized there is no need in checking whether the clock descriptor pointer is actually a negative error at the moment of the prepare/unprepare clocks method calling. So if the corresponding clock source is provided, then getting an error shall actually terminate the device probe procedure. If it isn't specified then the driver shall proceed with further initializations. We'll use the optional clocks getting method to handle the APB reference clock, which can be provided for instance in the device of-node with "bus" clock-name. Signed-off-by: Serge Semin Reviewed-by: Linus Walleij Reviewed-by: Andy Shevchenko Cc: Alexey Malahov Cc: Thomas Bogendoerfer Cc: Paul Burton Cc: Ralf Baechle Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200323195401.30338-5-Sergey.Semin@baikalelectronics.ru Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index 92e127e74813..0c5abfa361e6 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -690,13 +690,16 @@ static int dwapb_gpio_probe(struct platform_device *pdev) return PTR_ERR(gpio->regs); /* Optional bus clock */ - gpio->clk = devm_clk_get(&pdev->dev, "bus"); - if (!IS_ERR(gpio->clk)) { - err = clk_prepare_enable(gpio->clk); - if (err) { - dev_info(&pdev->dev, "Cannot enable clock\n"); - return err; - } + gpio->clk = devm_clk_get_optional(&pdev->dev, "bus"); + if (IS_ERR(gpio->clk)) { + dev_err(&pdev->dev, "Cannot get APB clock\n"); + return PTR_ERR(gpio->clk); + } + + err = clk_prepare_enable(gpio->clk); + if (err) { + dev_err(&pdev->dev, "Cannot enable APB clock\n"); + return err; } gpio->flags = 0; @@ -793,8 +796,7 @@ static int dwapb_gpio_resume(struct device *dev) unsigned long flags; int i; - if (!IS_ERR(gpio->clk)) - clk_prepare_enable(gpio->clk); + clk_prepare_enable(gpio->clk); spin_lock_irqsave(&gc->bgpio_lock, flags); for (i = 0; i < gpio->nr_ports; i++) { From 5c544c92d6cd058d2a95a49fe75265043264b942 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 23 Mar 2020 22:54:00 +0300 Subject: [PATCH 056/562] gpio: dwapb: Add debounce reference clock support Aside from the APB reference clock DW GPIO controller can have a dedicated clock connected to setup a debounce time interval for GPIO-based IRQs. Since this functionality is optional the corresponding clock source is also optional. Due to this lets handle the debounce clock in the same way as it has been developed for the APB reference clock, but using the bulk request/enable-disable methods. Signed-off-by: Serge Semin Reviewed-by: Linus Walleij Cc: Alexey Malahov Cc: Thomas Bogendoerfer Cc: Paul Burton Cc: Ralf Baechle Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200323195401.30338-6-Sergey.Semin@baikalelectronics.ru Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index 0c5abfa361e6..d2ed11510f3c 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -62,6 +62,8 @@ #define GPIO_INTSTATUS_V2 0x3c #define GPIO_PORTA_EOI_V2 0x40 +#define DWAPB_NR_CLOCKS 2 + struct dwapb_gpio; #ifdef CONFIG_PM_SLEEP @@ -97,7 +99,7 @@ struct dwapb_gpio { struct irq_domain *domain; unsigned int flags; struct reset_control *rst; - struct clk *clk; + struct clk_bulk_data clks[DWAPB_NR_CLOCKS]; }; static inline u32 gpio_reg_v2_convert(unsigned int offset) @@ -689,16 +691,19 @@ static int dwapb_gpio_probe(struct platform_device *pdev) if (IS_ERR(gpio->regs)) return PTR_ERR(gpio->regs); - /* Optional bus clock */ - gpio->clk = devm_clk_get_optional(&pdev->dev, "bus"); - if (IS_ERR(gpio->clk)) { - dev_err(&pdev->dev, "Cannot get APB clock\n"); - return PTR_ERR(gpio->clk); + /* Optional bus and debounce clocks */ + gpio->clks[0].id = "bus"; + gpio->clks[1].id = "db"; + err = devm_clk_bulk_get_optional(&pdev->dev, DWAPB_NR_CLOCKS, + gpio->clks); + if (err) { + dev_err(&pdev->dev, "Cannot get APB/Debounce clocks\n"); + return err; } - err = clk_prepare_enable(gpio->clk); + err = clk_bulk_prepare_enable(DWAPB_NR_CLOCKS, gpio->clks); if (err) { - dev_err(&pdev->dev, "Cannot enable APB clock\n"); + dev_err(&pdev->dev, "Cannot enable APB/Debounce clocks\n"); return err; } @@ -727,7 +732,7 @@ static int dwapb_gpio_probe(struct platform_device *pdev) out_unregister: dwapb_gpio_unregister(gpio); dwapb_irq_teardown(gpio); - clk_disable_unprepare(gpio->clk); + clk_bulk_disable_unprepare(DWAPB_NR_CLOCKS, gpio->clks); return err; } @@ -739,7 +744,7 @@ static int dwapb_gpio_remove(struct platform_device *pdev) dwapb_gpio_unregister(gpio); dwapb_irq_teardown(gpio); reset_control_assert(gpio->rst); - clk_disable_unprepare(gpio->clk); + clk_bulk_disable_unprepare(DWAPB_NR_CLOCKS, gpio->clks); return 0; } @@ -784,7 +789,7 @@ static int dwapb_gpio_suspend(struct device *dev) } spin_unlock_irqrestore(&gc->bgpio_lock, flags); - clk_disable_unprepare(gpio->clk); + clk_bulk_disable_unprepare(DWAPB_NR_CLOCKS, gpio->clks); return 0; } @@ -794,9 +799,13 @@ static int dwapb_gpio_resume(struct device *dev) struct dwapb_gpio *gpio = dev_get_drvdata(dev); struct gpio_chip *gc = &gpio->ports[0].gc; unsigned long flags; - int i; + int i, err; - clk_prepare_enable(gpio->clk); + err = clk_bulk_prepare_enable(DWAPB_NR_CLOCKS, gpio->clks); + if (err) { + dev_err(gpio->dev, "Cannot reenable APB/Debounce clocks\n"); + return err; + } spin_lock_irqsave(&gc->bgpio_lock, flags); for (i = 0; i < gpio->nr_ports; i++) { From a26fa4b5d368289803259dbb7df8778c7800a0a6 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 23 Mar 2020 22:54:01 +0300 Subject: [PATCH 057/562] MAINTAINERS: Add Segey Semin to maintainers of DW APB GPIO driver Add myself as a co-maintainer of the Synopsis DesignWare APB GPIO driver. Suggested-by: Linus Walleij Signed-off-by: Serge Semin Cc: Hoan Tran Cc: Alexey Malahov Cc: Thomas Bogendoerfer Cc: Paul Burton Cc: Ralf Baechle Cc: linux-gpio@vger.kernel.org Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200323195401.30338-7-Sergey.Semin@baikalelectronics.ru Signed-off-by: Linus Walleij --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e64e5db31497..4306b329987d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16227,6 +16227,7 @@ F: drivers/tty/serial/8250/8250_lpss.c SYNOPSYS DESIGNWARE APB GPIO DRIVER M: Hoan Tran +M: Serge Semin L: linux-gpio@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt From c58220cba2e03618659fa7d5dfae31f5ad4ae9d0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 15 Apr 2020 17:15:21 +0300 Subject: [PATCH 058/562] gpio: dwapb: Append MODULE_ALIAS for platform driver The commit 3d2613c4289f ("GPIO: gpio-dwapb: Enable platform driver binding to MFD driver") introduced a use of the platform driver but missed to add the following line to it: MODULE_ALIAS("platform:gpio-dwapb"); Add this to get driver loaded automatically if platform device is registered. Fixes: 3d2613c4289f ("GPIO: gpio-dwapb: Enable platform driver binding to MFD driver") Signed-off-by: Andy Shevchenko Tested-by: Serge Semin Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/20200415141534.31240-2-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index d2ed11510f3c..c1b6d4f7307e 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -49,7 +49,9 @@ #define GPIO_EXT_PORTC 0x58 #define GPIO_EXT_PORTD 0x5c +#define DWAPB_DRIVER_NAME "gpio-dwapb" #define DWAPB_MAX_PORTS 4 + #define GPIO_EXT_PORT_STRIDE 0x04 /* register stride 32 bits */ #define GPIO_SWPORT_DR_STRIDE 0x0c /* register stride 3*32 bits */ #define GPIO_SWPORT_DDR_STRIDE 0x0c /* register stride 3*32 bits */ @@ -400,7 +402,7 @@ static void dwapb_configure_irqs(struct dwapb_gpio *gpio, return; err = irq_alloc_domain_generic_chips(gpio->domain, ngpio, 2, - "gpio-dwapb", handle_level_irq, + DWAPB_DRIVER_NAME, handle_level_irq, IRQ_NOREQUEST, 0, IRQ_GC_INIT_NESTED_LOCK); if (err) { @@ -457,7 +459,7 @@ static void dwapb_configure_irqs(struct dwapb_gpio *gpio, */ err = devm_request_irq(gpio->dev, pp->irq[0], dwapb_irq_handler_mfd, - IRQF_SHARED, "gpio-dwapb-mfd", gpio); + IRQF_SHARED, DWAPB_DRIVER_NAME, gpio); if (err) { dev_err(gpio->dev, "error requesting IRQ\n"); irq_domain_remove(gpio->domain); @@ -847,7 +849,7 @@ static SIMPLE_DEV_PM_OPS(dwapb_gpio_pm_ops, dwapb_gpio_suspend, static struct platform_driver dwapb_gpio_driver = { .driver = { - .name = "gpio-dwapb", + .name = DWAPB_DRIVER_NAME, .pm = &dwapb_gpio_pm_ops, .of_match_table = of_match_ptr(dwapb_of_match), .acpi_match_table = ACPI_PTR(dwapb_acpi_match), @@ -861,3 +863,4 @@ module_platform_driver(dwapb_gpio_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jamie Iles"); MODULE_DESCRIPTION("Synopsys DesignWare APB GPIO driver"); +MODULE_ALIAS("platform:" DWAPB_DRIVER_NAME); From 038aa1f0aba9596639c8ec2057169fa8e77d60d1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 15 Apr 2020 17:15:22 +0300 Subject: [PATCH 059/562] gpio: dwapb: Refactor IRQ handler to use bit operations Refactor IRQ handler in order to use for_each_set_bit() helper. Signed-off-by: Andy Shevchenko Tested-by: Serge Semin Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/20200415141534.31240-3-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index c1b6d4f7307e..e89a3c6877aa 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -193,22 +193,21 @@ static void dwapb_toggle_trigger(struct dwapb_gpio *gpio, unsigned int offs) static u32 dwapb_do_irq(struct dwapb_gpio *gpio) { - u32 irq_status = dwapb_read(gpio, GPIO_INTSTATUS); - u32 ret = irq_status; + unsigned long irq_status; + int hwirq; - while (irq_status) { - int hwirq = fls(irq_status) - 1; + irq_status = dwapb_read(gpio, GPIO_INTSTATUS); + for_each_set_bit(hwirq, &irq_status, 32) { int gpio_irq = irq_find_mapping(gpio->domain, hwirq); + u32 irq_type = irq_get_trigger_type(gpio_irq); generic_handle_irq(gpio_irq); - irq_status &= ~BIT(hwirq); - if ((irq_get_trigger_type(gpio_irq) & IRQ_TYPE_SENSE_MASK) - == IRQ_TYPE_EDGE_BOTH) + if ((irq_type & IRQ_TYPE_SENSE_MASK) == IRQ_TYPE_EDGE_BOTH) dwapb_toggle_trigger(gpio, hwirq); } - return ret; + return irq_status; } static void dwapb_irq_handler(struct irq_desc *desc) From 9b0aef32c725d7ff92f4628c119867bc1a04d321 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 15 Apr 2020 17:15:23 +0300 Subject: [PATCH 060/562] gpio: dwapb: Use chained IRQ prologue and epilogue Refactor IRQ handler in order to enter and exit chained IRQ by using respective prologue and epilogue calls. Signed-off-by: Andy Shevchenko Tested-by: Serge Semin Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/20200415141534.31240-4-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index e89a3c6877aa..f61139f787d9 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -215,10 +215,9 @@ static void dwapb_irq_handler(struct irq_desc *desc) struct dwapb_gpio *gpio = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); + chained_irq_enter(chip, desc); dwapb_do_irq(gpio); - - if (chip->irq_eoi) - chip->irq_eoi(irq_desc_get_irq_data(desc)); + chained_irq_exit(chip, desc); } static void dwapb_irq_enable(struct irq_data *d) From f9754c79707fb71f51208ae8a0d00a30250b6d08 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 15 Apr 2020 17:15:24 +0300 Subject: [PATCH 061/562] gpio: dwapb: set default handler to be handle_bad_irq() We switch the default handler to be handle_bad_irq() instead of handle_level_irq(), though for now apply it later in the code, to make the difference between IRQ chips more visible. Signed-off-by: Andy Shevchenko Tested-by: Serge Semin Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/20200415141534.31240-5-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index f61139f787d9..588d5c61ae42 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -400,7 +400,7 @@ static void dwapb_configure_irqs(struct dwapb_gpio *gpio, return; err = irq_alloc_domain_generic_chips(gpio->domain, ngpio, 2, - DWAPB_DRIVER_NAME, handle_level_irq, + DWAPB_DRIVER_NAME, handle_bad_irq, IRQ_NOREQUEST, 0, IRQ_GC_INIT_NESTED_LOCK); if (err) { @@ -439,6 +439,7 @@ static void dwapb_configure_irqs(struct dwapb_gpio *gpio, } irq_gc->chip_types[0].type = IRQ_TYPE_LEVEL_MASK; + irq_gc->chip_types[0].handler = handle_level_irq; irq_gc->chip_types[1].type = IRQ_TYPE_EDGE_BOTH; irq_gc->chip_types[1].handler = handle_edge_irq; From 43296bf22e16a044500870e6063fc33b17b4253c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 15 Apr 2020 17:15:25 +0300 Subject: [PATCH 062/562] gpio: dwapb: Deduplicate IRQ resource management GPIO library provides default IRQ resource management hooks, there is no need to repeat this in the individual driver. Remove them for good. Signed-off-by: Andy Shevchenko Tested-by: Serge Semin Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/20200415141534.31240-6-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index 588d5c61ae42..c0c267cddd80 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -250,31 +250,6 @@ static void dwapb_irq_disable(struct irq_data *d) spin_unlock_irqrestore(&gc->bgpio_lock, flags); } -static int dwapb_irq_reqres(struct irq_data *d) -{ - struct irq_chip_generic *igc = irq_data_get_irq_chip_data(d); - struct dwapb_gpio *gpio = igc->private; - struct gpio_chip *gc = &gpio->ports[0].gc; - int ret; - - ret = gpiochip_lock_as_irq(gc, irqd_to_hwirq(d)); - if (ret) { - dev_err(gpio->dev, "unable to lock HW IRQ %lu for IRQ\n", - irqd_to_hwirq(d)); - return ret; - } - return 0; -} - -static void dwapb_irq_relres(struct irq_data *d) -{ - struct irq_chip_generic *igc = irq_data_get_irq_chip_data(d); - struct dwapb_gpio *gpio = igc->private; - struct gpio_chip *gc = &gpio->ports[0].gc; - - gpiochip_unlock_as_irq(gc, irqd_to_hwirq(d)); -} - static int dwapb_irq_set_type(struct irq_data *d, u32 type) { struct irq_chip_generic *igc = irq_data_get_irq_chip_data(d); @@ -428,8 +403,6 @@ static void dwapb_configure_irqs(struct dwapb_gpio *gpio, ct->chip.irq_set_type = dwapb_irq_set_type; ct->chip.irq_enable = dwapb_irq_enable; ct->chip.irq_disable = dwapb_irq_disable; - ct->chip.irq_request_resources = dwapb_irq_reqres; - ct->chip.irq_release_resources = dwapb_irq_relres; #ifdef CONFIG_PM_SLEEP ct->chip.irq_set_wake = dwapb_irq_set_wake; #endif From e092bc5044ca8bb7acfa2930a96a5f1dba4148aa Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 15 Apr 2020 17:15:26 +0300 Subject: [PATCH 063/562] gpio: dwapb: Convert to use irqd_to_hwirq() Convert to use irqd_to_hwirq() instead of direct access to the hwirq member. Also amend the type of the hwirq holding variable to be irq_hw_number_t. Signed-off-by: Andy Shevchenko Tested-by: Serge Semin Reviewed-by: Serge Semin Cc: Serge Semin Link: https://lore.kernel.org/r/20200415141534.31240-7-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index c0c267cddd80..2edccc5bbc25 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -194,7 +194,7 @@ static void dwapb_toggle_trigger(struct dwapb_gpio *gpio, unsigned int offs) static u32 dwapb_do_irq(struct dwapb_gpio *gpio) { unsigned long irq_status; - int hwirq; + irq_hw_number_t hwirq; irq_status = dwapb_read(gpio, GPIO_INTSTATUS); for_each_set_bit(hwirq, &irq_status, 32) { @@ -230,7 +230,7 @@ static void dwapb_irq_enable(struct irq_data *d) spin_lock_irqsave(&gc->bgpio_lock, flags); val = dwapb_read(gpio, GPIO_INTEN); - val |= BIT(d->hwirq); + val |= BIT(irqd_to_hwirq(d)); dwapb_write(gpio, GPIO_INTEN, val); spin_unlock_irqrestore(&gc->bgpio_lock, flags); } @@ -245,7 +245,7 @@ static void dwapb_irq_disable(struct irq_data *d) spin_lock_irqsave(&gc->bgpio_lock, flags); val = dwapb_read(gpio, GPIO_INTEN); - val &= ~BIT(d->hwirq); + val &= ~BIT(irqd_to_hwirq(d)); dwapb_write(gpio, GPIO_INTEN, val); spin_unlock_irqrestore(&gc->bgpio_lock, flags); } @@ -255,7 +255,7 @@ static int dwapb_irq_set_type(struct irq_data *d, u32 type) struct irq_chip_generic *igc = irq_data_get_irq_chip_data(d); struct dwapb_gpio *gpio = igc->private; struct gpio_chip *gc = &gpio->ports[0].gc; - int bit = d->hwirq; + irq_hw_number_t bit = irqd_to_hwirq(d); unsigned long level, polarity, flags; if (type & ~(IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING | @@ -305,11 +305,12 @@ static int dwapb_irq_set_wake(struct irq_data *d, unsigned int enable) struct irq_chip_generic *igc = irq_data_get_irq_chip_data(d); struct dwapb_gpio *gpio = igc->private; struct dwapb_context *ctx = gpio->ports[0].ctx; + irq_hw_number_t bit = irqd_to_hwirq(d); if (enable) - ctx->wake_en |= BIT(d->hwirq); + ctx->wake_en |= BIT(bit); else - ctx->wake_en &= ~BIT(d->hwirq); + ctx->wake_en &= ~BIT(bit); return 0; } @@ -365,8 +366,9 @@ static void dwapb_configure_irqs(struct dwapb_gpio *gpio, struct gpio_chip *gc = &port->gc; struct fwnode_handle *fwnode = pp->fwnode; struct irq_chip_generic *irq_gc = NULL; - unsigned int hwirq, ngpio = gc->ngpio; + unsigned int ngpio = gc->ngpio; struct irq_chip_type *ct; + irq_hw_number_t hwirq; int err, i; gpio->domain = irq_domain_create_linear(fwnode, ngpio, From 9826bbe1fef0b5a2432acce0292562891b1feb57 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 15 Apr 2020 17:15:27 +0300 Subject: [PATCH 064/562] gpio: dwapb: Use device_get_match_data() to simplify code Use device_get_match_data() here to simplify the code a bit. Signed-off-by: Andy Shevchenko Tested-by: Serge Semin Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/20200415141534.31240-8-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index 2edccc5bbc25..8b30ded9322a 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -683,18 +683,7 @@ static int dwapb_gpio_probe(struct platform_device *pdev) return err; } - gpio->flags = 0; - if (dev->of_node) { - gpio->flags = (uintptr_t)of_device_get_match_data(dev); - } else if (has_acpi_companion(dev)) { - const struct acpi_device_id *acpi_id; - - acpi_id = acpi_match_device(dwapb_acpi_match, dev); - if (acpi_id) { - if (acpi_id->driver_data) - gpio->flags = acpi_id->driver_data; - } - } + gpio->flags = (uintptr_t)device_get_match_data(dev); for (i = 0; i < gpio->nr_ports; i++) { err = dwapb_gpio_add_port(gpio, &pdata->properties[i], i); From d31275a9dc0baa6b084917f8d42f1e197239fe6c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 15 Apr 2020 17:15:28 +0300 Subject: [PATCH 065/562] gpio: dwapb: Convert to use IRQ core provided macros IRQ core provides macros such as IRQ_RETVAL(). Convert code to use them. Signed-off-by: Andy Shevchenko Tested-by: Serge Semin Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/20200415141534.31240-9-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index 8b30ded9322a..4edac592c253 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -258,8 +258,7 @@ static int dwapb_irq_set_type(struct irq_data *d, u32 type) irq_hw_number_t bit = irqd_to_hwirq(d); unsigned long level, polarity, flags; - if (type & ~(IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING | - IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_LEVEL_LOW)) + if (type & ~IRQ_TYPE_SENSE_MASK) return -EINVAL; spin_lock_irqsave(&gc->bgpio_lock, flags); @@ -351,12 +350,7 @@ static int dwapb_gpio_set_config(struct gpio_chip *gc, unsigned offset, static irqreturn_t dwapb_irq_handler_mfd(int irq, void *dev_id) { - u32 worked; - struct dwapb_gpio *gpio = dev_id; - - worked = dwapb_do_irq(gpio); - - return worked ? IRQ_HANDLED : IRQ_NONE; + return IRQ_RETVAL(dwapb_do_irq(dev_id)); } static void dwapb_configure_irqs(struct dwapb_gpio *gpio, From 48ce805683461f0205e5b1baa2385a0cb425388e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 15 Apr 2020 17:15:29 +0300 Subject: [PATCH 066/562] gpio: dwapb: Switch to more usual pattern of RMW in dwapb_gpio_set_debounce() More usual pattern is to prepare value and then write it in a single place. Switch code in dwapb_gpio_set_debounce() to it. Signed-off-by: Andy Shevchenko Tested-by: Serge Semin Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/20200415141534.31240-10-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index 4edac592c253..0b073cbc003b 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -327,9 +327,10 @@ static int dwapb_gpio_set_debounce(struct gpio_chip *gc, val_deb = dwapb_read(gpio, GPIO_PORTA_DEBOUNCE); if (debounce) - dwapb_write(gpio, GPIO_PORTA_DEBOUNCE, val_deb | mask); + val_deb |= mask; else - dwapb_write(gpio, GPIO_PORTA_DEBOUNCE, val_deb & ~mask); + val_deb &= ~mask; + dwapb_write(gpio, GPIO_PORTA_DEBOUNCE, val_deb); spin_unlock_irqrestore(&gc->bgpio_lock, flags); From 4f344e86c7396015e83f31c08aef2eb73838f338 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 15 Apr 2020 17:15:30 +0300 Subject: [PATCH 067/562] gpio: dwapb: Drop bogus BUG_ON()s There is no case when no context is provided in the ->suspend() and ->resume() hooks. Moreover, BUG_ON() is harmful to user and makes kernel inoperable after the crash. Drop the BUG_ON()s for good. Signed-off-by: Andy Shevchenko Tested-by: Serge Semin Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/20200415141534.31240-11-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index 0b073cbc003b..ae4c4db8b156 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -723,8 +723,6 @@ static int dwapb_gpio_suspend(struct device *dev) unsigned int idx = gpio->ports[i].idx; struct dwapb_context *ctx = gpio->ports[i].ctx; - BUG_ON(!ctx); - offset = GPIO_SWPORTA_DDR + idx * GPIO_SWPORT_DDR_STRIDE; ctx->dir = dwapb_read(gpio, offset); @@ -773,8 +771,6 @@ static int dwapb_gpio_resume(struct device *dev) unsigned int idx = gpio->ports[i].idx; struct dwapb_context *ctx = gpio->ports[i].ctx; - BUG_ON(!ctx); - offset = GPIO_SWPORTA_DR + idx * GPIO_SWPORT_DR_STRIDE; dwapb_write(gpio, offset, ctx->data); From c59042ed896592eba6a5f8baa9c12aec7e36db20 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 15 Apr 2020 17:15:31 +0300 Subject: [PATCH 068/562] gpio: dwapb: Drop of_match_ptr() & ACPI_PTR() calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we always have a table of IDs compiled in, there is no use for of_match_ptr() nor ACPI_PTR() call. Besides that it brings a warning (depending on configuration): .../gpio-dwapb.c:638:34: warning: ‘dwapb_of_match’ defined but not used [-Wunused-const-variable=] 638 | static const struct of_device_id dwapb_of_match[] = { | ^~~~~~~~~~~~~~ Get rid of them for good. Signed-off-by: Andy Shevchenko Tested-by: Serge Semin Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/20200415141534.31240-12-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index ae4c4db8b156..98e1ffcd432b 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -805,8 +805,8 @@ static struct platform_driver dwapb_gpio_driver = { .driver = { .name = DWAPB_DRIVER_NAME, .pm = &dwapb_gpio_pm_ops, - .of_match_table = of_match_ptr(dwapb_of_match), - .acpi_match_table = ACPI_PTR(dwapb_acpi_match), + .of_match_table = dwapb_of_match, + .acpi_match_table = dwapb_acpi_match, }, .probe = dwapb_gpio_probe, .remove = dwapb_gpio_remove, From 4c2b54f73aba9c8428c1c8025f91d69ca77e1e6e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 15 Apr 2020 17:15:32 +0300 Subject: [PATCH 069/562] gpio: dwapb: Split out dwapb_get_irq() helper Split out dwapb_get_irq() helper for better readability and maintenance. Signed-off-by: Andy Shevchenko Tested-by: Serge Semin Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/20200415141534.31240-13-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 56 ++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index 98e1ffcd432b..31d29ec6ab5c 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -528,14 +528,38 @@ static void dwapb_gpio_unregister(struct dwapb_gpio *gpio) gpiochip_remove(&gpio->ports[m].gc); } -static struct dwapb_platform_data * -dwapb_gpio_get_pdata(struct device *dev) +static void dwapb_get_irq(struct device *dev, struct fwnode_handle *fwnode, + struct dwapb_port_property *pp) +{ + struct device_node *np = NULL; + int j; + + if (fwnode_property_read_bool(fwnode, "interrupt-controller")) + np = to_of_node(fwnode); + + for (j = 0; j < pp->ngpio; j++) { + pp->irq[j] = -ENXIO; + + if (np) + pp->irq[j] = of_irq_get(np, j); + else if (has_acpi_companion(dev)) + pp->irq[j] = platform_get_irq(to_platform_device(dev), j); + + if (pp->irq[j] >= 0) + pp->has_irq = true; + } + + if (!pp->has_irq) + dev_warn(dev, "no irq for port%d\n", pp->idx); +} + +static struct dwapb_platform_data *dwapb_gpio_get_pdata(struct device *dev) { struct fwnode_handle *fwnode; struct dwapb_platform_data *pdata; struct dwapb_port_property *pp; int nports; - int i, j; + int i; nports = device_get_child_node_count(dev); if (nports == 0) @@ -553,8 +577,6 @@ dwapb_gpio_get_pdata(struct device *dev) i = 0; device_for_each_child_node(dev, fwnode) { - struct device_node *np = NULL; - pp = &pdata->properties[i++]; pp->fwnode = fwnode; @@ -581,28 +603,8 @@ dwapb_gpio_get_pdata(struct device *dev) * Only port A can provide interrupts in all configurations of * the IP. */ - if (pp->idx != 0) - continue; - - if (dev->of_node && fwnode_property_read_bool(fwnode, - "interrupt-controller")) { - np = to_of_node(fwnode); - } - - for (j = 0; j < pp->ngpio; j++) { - pp->irq[j] = -ENXIO; - - if (np) - pp->irq[j] = of_irq_get(np, j); - else if (has_acpi_companion(dev)) - pp->irq[j] = platform_get_irq(to_platform_device(dev), j); - - if (pp->irq[j] >= 0) - pp->has_irq = true; - } - - if (!pp->has_irq) - dev_warn(dev, "no irq for port%d\n", pp->idx); + if (pp->idx == 0) + dwapb_get_irq(dev, fwnode, pp); } return pdata; From 9f676e5313c1fb7070997bd652a4ed68c6fb9a8c Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 8 Apr 2020 07:08:32 +0000 Subject: [PATCH 070/562] gpio: mlxbf2: remove unused including Remove including that don't need it. Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20200408070832.137037-1-yuehaibing@huawei.com Acked-by: Asmaa Mnebhi Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mlxbf2.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpio/gpio-mlxbf2.c b/drivers/gpio/gpio-mlxbf2.c index 7b7085050219..240b488609ac 100644 --- a/drivers/gpio/gpio-mlxbf2.c +++ b/drivers/gpio/gpio-mlxbf2.c @@ -14,7 +14,6 @@ #include #include #include -#include /* * There are 3 YU GPIO blocks: From 363f4d937501ba464d8a8248983dcfffabe69ea7 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 15 Apr 2020 17:38:07 +0800 Subject: [PATCH 071/562] scsi: fnic: make some symbols static Fix the following sparse warning: drivers/scsi/fnic/vnic_dev.c:257:5: warning: symbol 'vnic_dev_cmd1' was not declared. Should it be static? drivers/scsi/fnic/vnic_dev.c:319:5: warning: symbol 'vnic_dev_cmd2' was not declared. Should it be static? drivers/scsi/fnic/vnic_dev.c:414:5: warning: symbol 'vnic_dev_init_devcmd1' was not declared. Should it be static? drivers/scsi/fnic/vnic_dev.c:425:5: warning: symbol 'vnic_dev_init_devcmd2' was not declared. Should it be static? drivers/scsi/fnic/vnic_dev.c:495:6: warning: symbol 'vnic_dev_deinit_devcmd2' was not declared. Should it be static? drivers/scsi/fnic/vnic_dev.c:506:5: warning: symbol 'vnic_dev_cmd_no_proxy' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200415093809.9365-1-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/fnic/vnic_dev.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/fnic/vnic_dev.c b/drivers/scsi/fnic/vnic_dev.c index 1b88a3b53eee..a2beee6e09f0 100644 --- a/drivers/scsi/fnic/vnic_dev.c +++ b/drivers/scsi/fnic/vnic_dev.c @@ -254,7 +254,7 @@ void vnic_dev_free_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring) } } -int vnic_dev_cmd1(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, int wait) +static int vnic_dev_cmd1(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, int wait) { struct vnic_devcmd __iomem *devcmd = vdev->devcmd; int delay; @@ -316,7 +316,7 @@ int vnic_dev_cmd1(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, int wait) return -ETIMEDOUT; } -int vnic_dev_cmd2(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, +static int vnic_dev_cmd2(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, int wait) { struct devcmd2_controller *dc2c = vdev->devcmd2; @@ -411,7 +411,7 @@ int vnic_dev_cmd2(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, } -int vnic_dev_init_devcmd1(struct vnic_dev *vdev) +static int vnic_dev_init_devcmd1(struct vnic_dev *vdev) { vdev->devcmd = vnic_dev_get_res(vdev, RES_TYPE_DEVCMD, 0); if (!vdev->devcmd) @@ -422,7 +422,7 @@ int vnic_dev_init_devcmd1(struct vnic_dev *vdev) } -int vnic_dev_init_devcmd2(struct vnic_dev *vdev) +static int vnic_dev_init_devcmd2(struct vnic_dev *vdev) { int err; unsigned int fetch_index; @@ -492,7 +492,7 @@ int vnic_dev_init_devcmd2(struct vnic_dev *vdev) } -void vnic_dev_deinit_devcmd2(struct vnic_dev *vdev) +static void vnic_dev_deinit_devcmd2(struct vnic_dev *vdev) { vnic_dev_free_desc_ring(vdev, &vdev->devcmd2->results_ring); vnic_wq_disable(&vdev->devcmd2->wq); @@ -503,7 +503,7 @@ void vnic_dev_deinit_devcmd2(struct vnic_dev *vdev) } -int vnic_dev_cmd_no_proxy(struct vnic_dev *vdev, +static int vnic_dev_cmd_no_proxy(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, u64 *a0, u64 *a1, int wait) { int err; From 1d8baf9ed09d169ccaa8c703bd1c36a0ac69543f Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 15 Apr 2020 17:38:08 +0800 Subject: [PATCH 072/562] scsi: fnic: make fnic_list and fnic_list_lock static Fix the following sparse warning: drivers/scsi/fnic/fnic_main.c:52:1: warning: symbol 'fnic_list' was not declared. Should it be static? drivers/scsi/fnic/fnic_main.c:53:1: warning: symbol 'fnic_list_lock' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200415093809.9365-2-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/fnic/fnic_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c index 18584ab27c32..7910b573bacb 100644 --- a/drivers/scsi/fnic/fnic_main.c +++ b/drivers/scsi/fnic/fnic_main.c @@ -49,8 +49,8 @@ static struct kmem_cache *fnic_sgl_cache[FNIC_SGL_NUM_CACHES]; static struct kmem_cache *fnic_io_req_cache; -LIST_HEAD(fnic_list); -DEFINE_SPINLOCK(fnic_list_lock); +static LIST_HEAD(fnic_list); +static DEFINE_SPINLOCK(fnic_list_lock); /* Supported devices by fnic module */ static struct pci_device_id fnic_id_table[] = { From 9ae583804a331594fc91c87719d404e0dc73034a Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 15 Apr 2020 17:38:09 +0800 Subject: [PATCH 073/562] scsi: fnic: make vnic_wq_get_ctrl and vnic_wq_alloc_ring static Fix the following sparse warning: drivers/scsi/fnic/vnic_wq.c:28:5: warning: symbol 'vnic_wq_get_ctrl' was not declared. Should it be static? drivers/scsi/fnic/vnic_wq.c:40:5: warning: symbol 'vnic_wq_alloc_ring' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200415093809.9365-3-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/fnic/vnic_wq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/fnic/vnic_wq.c b/drivers/scsi/fnic/vnic_wq.c index 015af2cdabaf..442972c04e65 100644 --- a/drivers/scsi/fnic/vnic_wq.c +++ b/drivers/scsi/fnic/vnic_wq.c @@ -25,7 +25,7 @@ #include "vnic_wq.h" -int vnic_wq_get_ctrl(struct vnic_dev *vdev, struct vnic_wq *wq, +static int vnic_wq_get_ctrl(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index, enum vnic_res_type res_type) { wq->ctrl = vnic_dev_get_res(vdev, res_type, index); @@ -37,7 +37,7 @@ int vnic_wq_get_ctrl(struct vnic_dev *vdev, struct vnic_wq *wq, } -int vnic_wq_alloc_ring(struct vnic_dev *vdev, struct vnic_wq *wq, +static int vnic_wq_alloc_ring(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int desc_count, unsigned int desc_size) { return vnic_dev_alloc_desc_ring(vdev, &wq->ring, desc_count, desc_size); From 089f5b64b8af10aaef176f91144275d25e71e2ff Mon Sep 17 00:00:00 2001 From: Asutosh Das Date: Mon, 13 Apr 2020 23:14:48 -0700 Subject: [PATCH 074/562] scsi: ufs: full reinit upon resume if link was off During suspend, if the link is put to off, it would require a full initialization during resume. This patch resets and restores both the host and the card during initialization, otherwise host-only reset and restore would fail occasionally. Link: https://lore.kernel.org/r/1586844892-22720-1-git-send-email-cang@codeaurora.org Reviewed-by: Bean Huo Reviewed-by: Alim Akhtar Acked-by: Stanley Chu Acked-by: Avri Altman Signed-off-by: Asutosh Das Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 698e8d20b4ba..7d1fa1349d40 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -8055,9 +8055,13 @@ static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) else goto vendor_suspend; } else if (ufshcd_is_link_off(hba)) { - ret = ufshcd_host_reset_and_restore(hba); /* - * ufshcd_host_reset_and_restore() should have already + * A full initialization of the host and the device is + * required since the link was put to off during suspend. + */ + ret = ufshcd_reset_and_restore(hba); + /* + * ufshcd_reset_and_restore() should have already * set the link state as active */ if (ret || !ufshcd_is_link_active(hba)) From 44578ecef7ace298ea3d121777955bc2f6e90c30 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 15 Apr 2020 16:50:29 +0800 Subject: [PATCH 075/562] scsi: qedi: make qedi_ll2_buf_size static Fix the following sparse warning: drivers/scsi/qedi/qedi_main.c:44:6: warning: symbol 'qedi_ll2_buf_size' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200415085029.7170-1-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index b9a5c842a76e..4dd965860c98 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -45,7 +45,7 @@ module_param(qedi_io_tracing, uint, 0644); MODULE_PARM_DESC(qedi_io_tracing, " Enable logging of SCSI requests/completions into trace buffer. (default off)."); -uint qedi_ll2_buf_size = 0x400; +static uint qedi_ll2_buf_size = 0x400; module_param(qedi_ll2_buf_size, uint, 0644); MODULE_PARM_DESC(qedi_ll2_buf_size, "parameter to set ping packet size, default - 0x400, Jumbo packets - 0x2400."); From a677ab35a9670f3dd1f16398d1d94bb22efed38f Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 15 Apr 2020 16:50:44 +0800 Subject: [PATCH 076/562] scsi: mvsas: make mvst_host_attrs static Fix the following sparse warning: drivers/scsi/mvsas/mv_init.c:28:25: warning: symbol 'mvst_host_attrs' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200415085044.7460-1-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/mvsas/mv_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c index 7af9173c4925..f82728b2c32f 100644 --- a/drivers/scsi/mvsas/mv_init.c +++ b/drivers/scsi/mvsas/mv_init.c @@ -25,7 +25,7 @@ static const struct mvs_chip_info mvs_chips[] = { [chip_1320] = { 2, 4, 0x800, 17, 64, 8, 9, &mvs_94xx_dispatch, }, }; -struct device_attribute *mvst_host_attrs[]; +static struct device_attribute *mvst_host_attrs[]; #define SOC_SAS_NUM 2 @@ -785,7 +785,7 @@ static void __exit mvs_exit(void) sas_release_transport(mvs_stt); } -struct device_attribute *mvst_host_attrs[] = { +static struct device_attribute *mvst_host_attrs[] = { &dev_attr_driver_version, &dev_attr_interrupt_coalescing, NULL, From d21481994039d36aec21b56c4710aae684e107f1 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 15 Apr 2020 16:50:53 +0800 Subject: [PATCH 077/562] scsi: mvsas: remove unused symbol 'mvs_th' This symbol has no users so remove it. Link: https://lore.kernel.org/r/20200415085053.7633-1-yanaijie@huawei.com Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/mvsas/mv_init.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c index f82728b2c32f..5973eed94938 100644 --- a/drivers/scsi/mvsas/mv_init.c +++ b/drivers/scsi/mvsas/mv_init.c @@ -759,8 +759,6 @@ static DEVICE_ATTR(interrupt_coalescing, mvs_show_interrupt_coalescing, mvs_store_interrupt_coalescing); -/* task handler */ -struct task_struct *mvs_th; static int __init mvs_init(void) { int rc; From ab0a82991fcaae0b0d895c0aea36e91730c2d00a Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Thu, 16 Apr 2020 01:43:06 -0700 Subject: [PATCH 078/562] scsi: qedf: Keep track of num of pending flogi If a port is brought down for an extended period of time, the fipvlan counter gets exhausted and the driver will fall back to default VLAN 1002 and call fcoe_ctlr_link_up to log in. However, the switch will discard the FLOGI attempt because the VLAN is now different. Keep track of the number of FLOGI attempts and if a threshold of QEDF_FLOGI_RETRY_CNT is exceeded, perform a context soft reset. Link: https://lore.kernel.org/r/20200416084314.18851-2-skashyap@marvell.com Signed-off-by: Saurav Kashyap Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf.h | 2 ++ drivers/scsi/qedf/qedf_main.c | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/drivers/scsi/qedf/qedf.h b/drivers/scsi/qedf/qedf.h index f3f399fe10c8..042ebf6aea35 100644 --- a/drivers/scsi/qedf/qedf.h +++ b/drivers/scsi/qedf/qedf.h @@ -388,6 +388,7 @@ struct qedf_ctx { mempool_t *io_mempool; struct workqueue_struct *dpc_wq; struct delayed_work grcdump_work; + struct delayed_work stag_work; u32 slow_sge_ios; u32 fast_sge_ios; @@ -403,6 +404,7 @@ struct qedf_ctx { u32 flogi_cnt; u32 flogi_failed; + u32 flogi_pending; /* Used for fc statistics */ struct mutex stats_mutex; diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 5b19f5175c5c..89d5c12bf49c 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -282,6 +282,7 @@ static void qedf_flogi_resp(struct fc_seq *seq, struct fc_frame *fp, else if (fc_frame_payload_op(fp) == ELS_LS_ACC) { /* Set the source MAC we will use for FCoE traffic */ qedf_set_data_src_addr(qedf, fp); + qedf->flogi_pending = 0; } /* Complete flogi_compl so we can proceed to sending ADISCs */ @@ -307,6 +308,11 @@ static struct fc_seq *qedf_elsct_send(struct fc_lport *lport, u32 did, */ if (resp == fc_lport_flogi_resp) { qedf->flogi_cnt++; + if (qedf->flogi_pending >= QEDF_FLOGI_RETRY_CNT) { + schedule_delayed_work(&qedf->stag_work, 2); + return NULL; + } + qedf->flogi_pending++; return fc_elsct_send(lport, did, fp, op, qedf_flogi_resp, arg, timeout); } @@ -850,6 +856,7 @@ void qedf_ctx_soft_reset(struct fc_lport *lport) qedf = lport_priv(lport); + qedf->flogi_pending = 0; /* For host reset, essentially do a soft link up/down */ atomic_set(&qedf->link_state, QEDF_LINK_DOWN); QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_DISC, @@ -3197,6 +3204,7 @@ static int __qedf_probe(struct pci_dev *pdev, int mode) init_completion(&qedf->fipvlan_compl); mutex_init(&qedf->stats_mutex); mutex_init(&qedf->flush_mutex); + qedf->flogi_pending = 0; QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_INFO, "QLogic FastLinQ FCoE Module qedf %s, " @@ -3227,6 +3235,7 @@ static int __qedf_probe(struct pci_dev *pdev, int mode) INIT_DELAYED_WORK(&qedf->link_update, qedf_handle_link_update); INIT_DELAYED_WORK(&qedf->link_recovery, qedf_link_recovery); INIT_DELAYED_WORK(&qedf->grcdump_work, qedf_wq_grcdump); + INIT_DELAYED_WORK(&qedf->stag_work, qedf_stag_change_work); qedf->fipvlan_retries = qedf_fipvlan_retries; /* Set a default prio in case DCBX doesn't converge */ if (qedf_default_prio > -1) { @@ -3762,6 +3771,20 @@ void qedf_get_protocol_tlv_data(void *dev, void *data) fcoe->scsi_tsk_full = qedf->task_set_fulls; } +/* Deferred work function to perform soft context reset on STAG change */ +void qedf_stag_change_work(struct work_struct *work) +{ + struct qedf_ctx *qedf = + container_of(work, struct qedf_ctx, stag_work.work); + + if (!qedf) { + QEDF_ERR(&qedf->dbg_ctx, "qedf is NULL"); + return; + } + QEDF_ERR(&qedf->dbg_ctx, "Performing software context reset.\n"); + qedf_ctx_soft_reset(qedf->lport); +} + static void qedf_shutdown(struct pci_dev *pdev) { __qedf_remove(pdev, QEDF_MODE_NORMAL); From b0c4187e2616e9ecb1359f6bce2e93dd9593120b Mon Sep 17 00:00:00 2001 From: Javed Hasan Date: Thu, 16 Apr 2020 01:43:07 -0700 Subject: [PATCH 079/562] scsi: qedf: Increase the upper limit of retry delay Max time to hold the IO in case of SAM_STAT_TASK_SET_FULL or SAM_STAT_BUSY. Link: https://lore.kernel.org/r/20200416084314.18851-3-skashyap@marvell.com Signed-off-by: Javed Hasan Signed-off-by: Saurav Kashyap Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qedf/qedf.h b/drivers/scsi/qedf/qedf.h index 042ebf6aea35..aaa2ac95ea23 100644 --- a/drivers/scsi/qedf/qedf.h +++ b/drivers/scsi/qedf/qedf.h @@ -470,7 +470,7 @@ extern uint qedf_dump_frames; extern uint qedf_io_tracing; extern uint qedf_stop_io_on_error; extern uint qedf_link_down_tmo; -#define QEDF_RETRY_DELAY_MAX 20 /* 2 seconds */ +#define QEDF_RETRY_DELAY_MAX 600 /* 60 seconds */ extern bool qedf_retry_delay; extern uint qedf_debug; From 334b4f988f73dd70b80132933b0866bad0e0e221 Mon Sep 17 00:00:00 2001 From: Javed Hasan Date: Thu, 16 Apr 2020 01:43:08 -0700 Subject: [PATCH 080/562] scsi: qedf: Acquire rport_lock for resetting the delay_timestamp Retry delay timestamp is updated in queuecommand as well as in qedf_scsi_completion routine. Protect it using lock. Link: https://lore.kernel.org/r/20200416084314.18851-4-skashyap@marvell.com Signed-off-by: Javed Hasan Signed-off-by: Saurav Kashyap Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_io.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index e749a2dcaad7..64c176980752 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -1021,14 +1021,18 @@ qedf_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc_cmd) atomic_inc(&fcport->ios_to_queue); if (fcport->retry_delay_timestamp) { + /* Take fcport->rport_lock for resetting the delay_timestamp */ + spin_lock_irqsave(&fcport->rport_lock, flags); if (time_after(jiffies, fcport->retry_delay_timestamp)) { fcport->retry_delay_timestamp = 0; } else { + spin_unlock_irqrestore(&fcport->rport_lock, flags); /* If retry_delay timer is active, flow off the ML */ rc = SCSI_MLQUEUE_TARGET_BUSY; atomic_dec(&fcport->ios_to_queue); goto exit_qcmd; } + spin_unlock_irqrestore(&fcport->rport_lock, flags); } io_req = qedf_alloc_cmd(fcport, QEDF_SCSI_CMD); From fedc173e46d31b2b3a2744947ac5ea4cb351f6b1 Mon Sep 17 00:00:00 2001 From: Javed Hasan Date: Thu, 16 Apr 2020 01:43:09 -0700 Subject: [PATCH 081/562] scsi: qedf: Honor status qualifier in FCP_RSP per spec Handle scope and qualifier on SAM_STAT_TASK_SET_FULL or SAM_STAT_BUSY [mkp: added braces to fix sparse complaint] Link: https://lore.kernel.org/r/20200416084314.18851-5-skashyap@marvell.com Signed-off-by: Javed Hasan Signed-off-by: Saurav Kashyap Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_io.c | 44 ++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index 64c176980752..0f6a15c1a04b 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -1138,6 +1138,8 @@ void qedf_scsi_completion(struct qedf_ctx *qedf, struct fcoe_cqe *cqe, int refcount; u16 scope, qualifier = 0; u8 fw_residual_flag = 0; + unsigned long flags = 0; + u16 chk_scope = 0; if (!io_req) return; @@ -1271,16 +1273,8 @@ void qedf_scsi_completion(struct qedf_ctx *qedf, struct fcoe_cqe *cqe, /* Lower 14 bits */ qualifier = fcp_rsp->retry_delay_timer & 0x3FFF; - if (qedf_retry_delay && - scope > 0 && qualifier > 0 && - qualifier <= 0x3FEF) { - /* Check we don't go over the max */ - if (qualifier > QEDF_RETRY_DELAY_MAX) - qualifier = - QEDF_RETRY_DELAY_MAX; - fcport->retry_delay_timestamp = - jiffies + (qualifier * HZ / 10); - } + if (qedf_retry_delay) + chk_scope = 1; /* Record stats */ if (io_req->cdb_status == SAM_STAT_TASK_SET_FULL) @@ -1291,6 +1285,36 @@ void qedf_scsi_completion(struct qedf_ctx *qedf, struct fcoe_cqe *cqe, } if (io_req->fcp_resid) scsi_set_resid(sc_cmd, io_req->fcp_resid); + + if (chk_scope == 1) { + if ((scope == 1 || scope == 2) && + (qualifier > 0 && qualifier <= 0x3FEF)) { + /* Check we don't go over the max */ + if (qualifier > QEDF_RETRY_DELAY_MAX) { + qualifier = QEDF_RETRY_DELAY_MAX; + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO, + "qualifier = %d\n", + (fcp_rsp->retry_delay_timer & + 0x3FFF)); + } + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO, + "Scope = %d and qualifier = %d", + scope, qualifier); + /* Take fcport->rport_lock to + * update the retry_delay_timestamp + */ + spin_lock_irqsave(&fcport->rport_lock, flags); + fcport->retry_delay_timestamp = + jiffies + (qualifier * HZ / 10); + spin_unlock_irqrestore(&fcport->rport_lock, + flags); + + } else { + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO, + "combination of scope = %d and qualifier = %d is not handled in qedf.\n", + scope, qualifier); + } + } break; default: QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "fcp_status=%d.\n", From 699fed4a2d8e32d77d64928e570d7ffd93a62fdf Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Thu, 16 Apr 2020 01:43:10 -0700 Subject: [PATCH 082/562] scsi: qed: Send BW update notifications to the protocol drivers Management firmware (MFW) sends a notification whenever there is a change in the bandwidth values. Add driver support for sending this notification to the upper layer drivers (e.g., qedf). Link: https://lore.kernel.org/r/20200416084314.18851-6-skashyap@marvell.com Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Saurav Kashyap Signed-off-by: Martin K. Petersen --- drivers/net/ethernet/qlogic/qed/qed.h | 1 + drivers/net/ethernet/qlogic/qed/qed_main.c | 9 +++++++++ include/linux/qed/qed_if.h | 1 + 3 files changed, 11 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index fa41bf08a589..d00663967afe 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -1016,6 +1016,7 @@ int qed_device_num_ports(struct qed_dev *cdev); int qed_fill_dev_info(struct qed_dev *cdev, struct qed_dev_info *dev_info); void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt); +void qed_bw_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt); u32 qed_unzip_data(struct qed_hwfn *p_hwfn, u32 input_len, u8 *input_buf, u32 max_size, u8 *unzip_buf); diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 96356e897c80..645d1dbf04ce 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1940,6 +1940,15 @@ void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt) op->link_update(cookie, &if_link); } +void qed_bw_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt) +{ + void *cookie = hwfn->cdev->ops_cookie; + struct qed_common_cb_ops *op = hwfn->cdev->protocol_ops.common; + + if (IS_LEAD_HWFN(hwfn) && cookie && op && op->bw_update) + op->bw_update(cookie); +} + static int qed_drain(struct qed_dev *cdev) { struct qed_hwfn *hwfn; diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 8f29e0d8a7b3..c4956374a444 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -817,6 +817,7 @@ struct qed_common_cb_ops { void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type); void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data); void (*get_protocol_tlv_data)(void *dev, void *data); + void (*bw_update)(void *dev); }; struct qed_selftest_ops { From 6e7c8eea929e547f5f8f453a3d72b36b8e40bf4d Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Thu, 16 Apr 2020 01:43:11 -0700 Subject: [PATCH 083/562] scsi: qedf: Implement callback for bw_update Add support for the common qed bw_update callback to qedf. This function is called whenever there is a reported change in the bandwidth and updates corresponding values in sysfs. Link: https://lore.kernel.org/r/20200416084314.18851-7-skashyap@marvell.com Signed-off-by: Saurav Kashyap Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_main.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 89d5c12bf49c..50557c4dbf21 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -509,6 +509,32 @@ static void qedf_update_link_speed(struct qedf_ctx *qedf, fc_host_supported_speeds(lport->host) = lport->link_supported_speeds; } +static void qedf_bw_update(void *dev) +{ + struct qedf_ctx *qedf = (struct qedf_ctx *)dev; + struct qed_link_output link; + + /* Get the latest status of the link */ + qed_ops->common->get_link(qedf->cdev, &link); + + if (test_bit(QEDF_UNLOADING, &qedf->flags)) { + QEDF_ERR(&qedf->dbg_ctx, + "Ignore link update, driver getting unload.\n"); + return; + } + + if (link.link_up) { + if (atomic_read(&qedf->link_state) == QEDF_LINK_UP) + qedf_update_link_speed(qedf, &link); + else + QEDF_ERR(&qedf->dbg_ctx, + "Ignore bw update, link is down.\n"); + + } else { + QEDF_ERR(&qedf->dbg_ctx, "link_up is not set.\n"); + } +} + static void qedf_link_update(void *dev, struct qed_link_output *link) { struct qedf_ctx *qedf = (struct qedf_ctx *)dev; @@ -635,6 +661,7 @@ static u32 qedf_get_login_failures(void *cookie) static struct qed_fcoe_cb_ops qedf_cb_ops = { { .link_update = qedf_link_update, + .bw_update = qedf_bw_update, .dcbx_aen = qedf_dcbx_handler, .get_generic_tlv_data = qedf_get_generic_tlv_data, .get_protocol_tlv_data = qedf_get_protocol_tlv_data, From f6b172f21999cf5a18a49d6c028ba46b1c9ce02f Mon Sep 17 00:00:00 2001 From: Chad Dupuis Date: Thu, 16 Apr 2020 01:43:12 -0700 Subject: [PATCH 084/562] scsi: qedf: Add schedule recovery handler Implement recovery handler to be used by QED to signal the need for recovery to come out of an error condition like ramrod struck and firmware context reset. Link: https://lore.kernel.org/r/20200416084314.18851-8-skashyap@marvell.com Signed-off-by: Chad Dupuis Signed-off-by: Saurav Kashyap Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf.h | 1 + drivers/scsi/qedf/qedf_main.c | 43 +++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/drivers/scsi/qedf/qedf.h b/drivers/scsi/qedf/qedf.h index aaa2ac95ea23..f8a98e5e3cb5 100644 --- a/drivers/scsi/qedf/qedf.h +++ b/drivers/scsi/qedf/qedf.h @@ -387,6 +387,7 @@ struct qedf_ctx { #define QEDF_IO_WORK_MIN 64 mempool_t *io_mempool; struct workqueue_struct *dpc_wq; + struct delayed_work recovery_work; struct delayed_work grcdump_work; struct delayed_work stag_work; diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 50557c4dbf21..3823cdfec46a 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -28,6 +28,8 @@ const struct qed_fcoe_ops *qed_ops; static int qedf_probe(struct pci_dev *pdev, const struct pci_device_id *id); static void qedf_remove(struct pci_dev *pdev); static void qedf_shutdown(struct pci_dev *pdev); +static void qedf_schedule_recovery_handler(void *dev); +static void qedf_recovery_handler(struct work_struct *work); /* * Driver module parameters. @@ -662,6 +664,7 @@ static struct qed_fcoe_cb_ops qedf_cb_ops = { { .link_update = qedf_link_update, .bw_update = qedf_bw_update, + .schedule_recovery_handler = qedf_schedule_recovery_handler, .dcbx_aen = qedf_dcbx_handler, .get_generic_tlv_data = qedf_get_generic_tlv_data, .get_protocol_tlv_data = qedf_get_protocol_tlv_data, @@ -3502,6 +3505,7 @@ static int __qedf_probe(struct pci_dev *pdev, int mode) qedf->lport->host->host_no); qedf->dpc_wq = create_workqueue(host_buf); } + INIT_DELAYED_WORK(&qedf->recovery_work, qedf_recovery_handler); /* * GRC dump and sysfs parameters are not reaped during the recovery @@ -3817,6 +3821,45 @@ static void qedf_shutdown(struct pci_dev *pdev) __qedf_remove(pdev, QEDF_MODE_NORMAL); } +/* + * Recovery handler code + */ +static void qedf_schedule_recovery_handler(void *dev) +{ + struct qedf_ctx *qedf = dev; + + QEDF_ERR(&qedf->dbg_ctx, "Recovery handler scheduled.\n"); + schedule_delayed_work(&qedf->recovery_work, 0); +} + +static void qedf_recovery_handler(struct work_struct *work) +{ + struct qedf_ctx *qedf = + container_of(work, struct qedf_ctx, recovery_work.work); + + if (test_and_set_bit(QEDF_IN_RECOVERY, &qedf->flags)) + return; + + /* + * Call common_ops->recovery_prolog to allow the MFW to quiesce + * any PCI transactions. + */ + qed_ops->common->recovery_prolog(qedf->cdev); + + QEDF_ERR(&qedf->dbg_ctx, "Recovery work start.\n"); + __qedf_remove(qedf->pdev, QEDF_MODE_RECOVERY); + /* + * Reset link and dcbx to down state since we will not get a link down + * event from the MFW but calling __qedf_remove will essentially be a + * link down event. + */ + atomic_set(&qedf->link_state, QEDF_LINK_DOWN); + atomic_set(&qedf->dcbx, QEDF_DCBX_PENDING); + __qedf_probe(qedf->pdev, QEDF_MODE_RECOVERY); + clear_bit(QEDF_IN_RECOVERY, &qedf->flags); + QEDF_ERR(&qedf->dbg_ctx, "Recovery work complete.\n"); +} + /* Generic TLV data callback */ void qedf_get_generic_tlv_data(void *dev, struct qed_generic_tlvs *data) { From ad40f5256095c68dc17c991eb976261d5ea2daaa Mon Sep 17 00:00:00 2001 From: Chad Dupuis Date: Thu, 16 Apr 2020 01:43:13 -0700 Subject: [PATCH 085/562] scsi: qedf: Fix crash when MFW calls for protocol stats while function is still probing The MFW may make a call to qed and then to qedf for protocol statistics while the function is still probing. If this happens it's possible that some members of the struct qedf_ctx may not be fully initialized which can result in a NULL pointer dereference or general protection fault. To prevent this, add a new flag call QEDF_PROBING and set it when the __qedf_probe() function is active. Then in the qedf_get_protocol_tlv_data() function we can check if the function is still probing and return immediantely before any uninitialized structures can be touched. Link: https://lore.kernel.org/r/20200416084314.18851-9-skashyap@marvell.com Signed-off-by: Chad Dupuis Signed-off-by: Saurav Kashyap Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf.h | 1 + drivers/scsi/qedf/qedf_main.c | 35 +++++++++++++++++++++++++++++++---- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qedf/qedf.h b/drivers/scsi/qedf/qedf.h index f8a98e5e3cb5..e163be8af965 100644 --- a/drivers/scsi/qedf/qedf.h +++ b/drivers/scsi/qedf/qedf.h @@ -355,6 +355,7 @@ struct qedf_ctx { #define QEDF_GRCDUMP_CAPTURE 4 #define QEDF_IN_RECOVERY 5 #define QEDF_DBG_STOP_IO 6 +#define QEDF_PROBING 8 unsigned long flags; /* Miscellaneous state flags */ int fipvlan_retries; u8 num_queues; diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 3823cdfec46a..e7ddb5651261 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -3190,7 +3190,7 @@ static int __qedf_probe(struct pci_dev *pdev, int mode) { int rc = -EINVAL; struct fc_lport *lport; - struct qedf_ctx *qedf; + struct qedf_ctx *qedf = NULL; struct Scsi_Host *host; bool is_vf = false; struct qed_ll2_params params; @@ -3220,6 +3220,7 @@ static int __qedf_probe(struct pci_dev *pdev, int mode) /* Initialize qedf_ctx */ qedf = lport_priv(lport); + set_bit(QEDF_PROBING, &qedf->flags); qedf->lport = lport; qedf->ctlr.lp = lport; qedf->pdev = pdev; @@ -3244,9 +3245,12 @@ static int __qedf_probe(struct pci_dev *pdev, int mode) } else { /* Init pointers during recovery */ qedf = pci_get_drvdata(pdev); + set_bit(QEDF_PROBING, &qedf->flags); lport = qedf->lport; } + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_DISC, "Probe started.\n"); + host = lport->host; /* Allocate mempool for qedf_io_work structs */ @@ -3553,6 +3557,10 @@ static int __qedf_probe(struct pci_dev *pdev, int mode) else fc_fabric_login(lport); + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_DISC, "Probe done.\n"); + + clear_bit(QEDF_PROBING, &qedf->flags); + /* All good */ return 0; @@ -3578,6 +3586,11 @@ static int __qedf_probe(struct pci_dev *pdev, int mode) err1: scsi_host_put(lport->host); err0: + if (qedf) { + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_DISC, "Probe done.\n"); + + clear_bit(QEDF_PROBING, &qedf->flags); + } return rc; } @@ -3727,11 +3740,25 @@ void qedf_get_protocol_tlv_data(void *dev, void *data) { struct qedf_ctx *qedf = dev; struct qed_mfw_tlv_fcoe *fcoe = data; - struct fc_lport *lport = qedf->lport; - struct Scsi_Host *host = lport->host; - struct fc_host_attrs *fc_host = shost_to_fc_host(host); + struct fc_lport *lport; + struct Scsi_Host *host; + struct fc_host_attrs *fc_host; struct fc_host_statistics *hst; + if (!qedf) { + QEDF_ERR(NULL, "qedf is null.\n"); + return; + } + + if (test_bit(QEDF_PROBING, &qedf->flags)) { + QEDF_ERR(&qedf->dbg_ctx, "Function is still probing.\n"); + return; + } + + lport = qedf->lport; + host = lport->host; + fc_host = shost_to_fc_host(host); + /* Force a refresh of the fc_host stats including offload stats */ hst = qedf_fc_get_host_stats(host); From 7109cb51517267ef0631da293d80a26f4e18d3ba Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Thu, 16 Apr 2020 01:43:14 -0700 Subject: [PATCH 086/562] scsi: qedf: Get dev info after updating the params An update to pf params can change the devinfo. Get updated device information. [mkp: updated error message spotted by Sergei Shtylyov] Link: https://lore.kernel.org/r/20200416084314.18851-10-skashyap@marvell.com Signed-off-by: Saurav Kashyap Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index e7ddb5651261..36b1ca2dadbb 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -3324,6 +3324,13 @@ static int __qedf_probe(struct pci_dev *pdev, int mode) } qed_ops->common->update_pf_params(qedf->cdev, &qedf->pf_params); + /* Learn information crucial for qedf to progress */ + rc = qed_ops->fill_dev_info(qedf->cdev, &qedf->dev_info); + if (rc) { + QEDF_ERR(&qedf->dbg_ctx, "Failed to fill dev info.\n"); + goto err2; + } + /* Record BDQ producer doorbell addresses */ qedf->bdq_primary_prod = qedf->dev_info.primary_dbq_rq_addr; qedf->bdq_secondary_prod = qedf->dev_info.secondary_bdq_rq_addr; From 9d9e435f3f2492bfd196acacb61cc9a9212d8170 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 26 Mar 2020 23:48:15 -0700 Subject: [PATCH 087/562] x86/elf: Add table to document READ_IMPLIES_EXEC Add a table to document the current behavior of READ_IMPLIES_EXEC in preparation for changing the behavior. Signed-off-by: Kees Cook Signed-off-by: Borislav Petkov Reviewed-by: Jason Gunthorpe Link: https://lkml.kernel.org/r/20200327064820.12602-2-keescook@chromium.org --- arch/x86/include/asm/elf.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 69c0f892e310..ee459d4c3b45 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -281,6 +281,25 @@ extern u32 elf_hwcap2; /* * An executable for which elf_read_implies_exec() returns TRUE will * have the READ_IMPLIES_EXEC personality flag set automatically. + * + * The decision process for determining the results are: + * + *              CPU: | lacks NX*  | has NX, ia32     | has NX, x86_64 | + * ELF:              |            |                  |                | + * ---------------------|------------|------------------|----------------| + * missing PT_GNU_STACK | exec-all   | exec-all         | exec-all       | + * PT_GNU_STACK == RWX  | exec-all   | exec-all         | exec-all       | + * PT_GNU_STACK == RW   | exec-none  | exec-none        | exec-none      | + * + * exec-all : all PROT_READ user mappings are executable, except when + * backed by files on a noexec-filesystem. + * exec-none : only PROT_EXEC user mappings are executable. + * + * *this column has no architectural effect: NX markings are ignored by + * hardware, but may have behavioral effects when "wants X" collides with + * "cannot be X" constraints in memory permission flags, as in + * https://lkml.kernel.org/r/20190418055759.GA3155@mellanox.com + * */ #define elf_read_implies_exec(ex, executable_stack) \ (executable_stack != EXSTACK_DISABLE_X) From 122306117afe4ba202b5e57c61dfbeffc5c41387 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 26 Mar 2020 23:48:16 -0700 Subject: [PATCH 088/562] x86/elf: Split READ_IMPLIES_EXEC from executable PT_GNU_STACK The READ_IMPLIES_EXEC workaround was designed for old toolchains that lacked the ELF PT_GNU_STACK marking under the assumption that toolchains that couldn't specify executable permission flags for the stack may not know how to do it correctly for any memory region. This logic is sensible for having ancient binaries coexist in a system with possibly NX memory, but was implemented in a way that equated having a PT_GNU_STACK marked executable as being as "broken" as lacking the PT_GNU_STACK marking entirely. Things like unmarked assembly and stack trampolines may cause PT_GNU_STACK to need an executable bit, but they do not imply all mappings must be executable. This confusion has led to situations where modern programs with explicitly marked executable stacks are forced into the READ_IMPLIES_EXEC state when no such thing is needed. (And leads to unexpected failures when mmap()ing regions of device driver memory that wish to disallow VM_EXEC[1].) In looking for other reasons for the READ_IMPLIES_EXEC behavior, Jann Horn noted that glibc thread stacks have always been marked RWX (until 2003 when they started tracking the PT_GNU_STACK flag instead[2]). And musl doesn't support executable stacks at all[3]. As such, no breakage for multithreaded applications is expected from this change. [1] https://lkml.kernel.org/r/20190418055759.GA3155@mellanox.com [2] https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=54ee14b3882 [3] https://lkml.kernel.org/r/20190423192534.GN23599@brightrain.aerifal.cx Suggested-by: Hector Marco-Gisbert Signed-off-by: Kees Cook Signed-off-by: Borislav Petkov Reviewed-by: Jason Gunthorpe Link: https://lkml.kernel.org/r/20200327064820.12602-3-keescook@chromium.org --- arch/x86/include/asm/elf.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index ee459d4c3b45..397a1c74433e 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -288,12 +288,13 @@ extern u32 elf_hwcap2; * ELF:              |            |                  |                | * ---------------------|------------|------------------|----------------| * missing PT_GNU_STACK | exec-all   | exec-all         | exec-all       | - * PT_GNU_STACK == RWX  | exec-all   | exec-all         | exec-all       | + * PT_GNU_STACK == RWX  | exec-stack | exec-stack       | exec-stack     | * PT_GNU_STACK == RW   | exec-none  | exec-none        | exec-none      | * * exec-all : all PROT_READ user mappings are executable, except when * backed by files on a noexec-filesystem. * exec-none : only PROT_EXEC user mappings are executable. + * exec-stack: only the stack and PROT_EXEC user mappings are executable. * * *this column has no architectural effect: NX markings are ignored by * hardware, but may have behavioral effects when "wants X" collides with @@ -302,7 +303,7 @@ extern u32 elf_hwcap2; * */ #define elf_read_implies_exec(ex, executable_stack) \ - (executable_stack != EXSTACK_DISABLE_X) + (executable_stack == EXSTACK_DEFAULT) struct task_struct; From 9fccc5c0c99f238aa1b0460fccbdb30a887e7036 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 26 Mar 2020 23:48:17 -0700 Subject: [PATCH 089/562] x86/elf: Disable automatic READ_IMPLIES_EXEC on 64-bit With modern x86 64-bit environments, there should never be a need for automatic READ_IMPLIES_EXEC, as the architecture is intended to always be execute-bit aware (as in, the default memory protection should be NX unless a region explicitly requests to be executable). There were very old x86_64 systems that lacked the NX bit, but for those, the NX bit is, obviously, unenforceable, so these changes should have no impact on them. Suggested-by: Hector Marco-Gisbert Signed-off-by: Kees Cook Signed-off-by: Borislav Petkov Reviewed-by: Jason Gunthorpe Link: https://lkml.kernel.org/r/20200327064820.12602-4-keescook@chromium.org --- arch/x86/include/asm/elf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 397a1c74433e..452beed7892b 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -287,7 +287,7 @@ extern u32 elf_hwcap2; *              CPU: | lacks NX*  | has NX, ia32     | has NX, x86_64 | * ELF:              |            |                  |                | * ---------------------|------------|------------------|----------------| - * missing PT_GNU_STACK | exec-all   | exec-all         | exec-all       | + * missing PT_GNU_STACK | exec-all   | exec-all         | exec-none      | * PT_GNU_STACK == RWX  | exec-stack | exec-stack       | exec-stack     | * PT_GNU_STACK == RW   | exec-none  | exec-none        | exec-none      | * @@ -303,7 +303,7 @@ extern u32 elf_hwcap2; * */ #define elf_read_implies_exec(ex, executable_stack) \ - (executable_stack == EXSTACK_DEFAULT) + (mmap_is_ia32() && executable_stack == EXSTACK_DEFAULT) struct task_struct; From 78066055b08096ed3282c027de9d9e137f9a1580 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 26 Mar 2020 23:48:18 -0700 Subject: [PATCH 090/562] arm32/64/elf: Add tables to document READ_IMPLIES_EXEC Add tables to document the current behavior of READ_IMPLIES_EXEC in preparation for changing the behavior for both arm64 and arm. Signed-off-by: Kees Cook Signed-off-by: Borislav Petkov Reviewed-by: Jason Gunthorpe Reviewed-by: Catalin Marinas Link: https://lkml.kernel.org/r/20200327064820.12602-5-keescook@chromium.org --- arch/arm/kernel/elf.c | 24 +++++++++++++++++++++--- arch/arm64/include/asm/elf.h | 20 ++++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/arch/arm/kernel/elf.c b/arch/arm/kernel/elf.c index 182422981386..5ccd4aced6cc 100644 --- a/arch/arm/kernel/elf.c +++ b/arch/arm/kernel/elf.c @@ -78,9 +78,27 @@ void elf_set_personality(const struct elf32_hdr *x) EXPORT_SYMBOL(elf_set_personality); /* - * Set READ_IMPLIES_EXEC if: - * - the binary requires an executable stack - * - we're running on a CPU which doesn't support NX. + * An executable for which elf_read_implies_exec() returns TRUE will + * have the READ_IMPLIES_EXEC personality flag set automatically. + * + * The decision process for determining the results are: + * + *              CPU: | lacks NX*  | has NX | + * ELF:              |            |           | + * ---------------------|------------|------------| + * missing PT_GNU_STACK | exec-all   | exec-all  | + * PT_GNU_STACK == RWX  | exec-all   | exec-all  | + * PT_GNU_STACK == RW   | exec-all  | exec-none | + * + * exec-all : all PROT_READ user mappings are executable, except when + * backed by files on a noexec-filesystem. + * exec-none : only PROT_EXEC user mappings are executable. + * + * *this column has no architectural effect: NX markings are ignored by + * hardware, but may have behavioral effects when "wants X" collides with + * "cannot be X" constraints in memory permission flags, as in + * https://lkml.kernel.org/r/20190418055759.GA3155@mellanox.com + * */ int arm_elf_read_implies_exec(int executable_stack) { diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index b618017205a3..986ecf41fc0f 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -96,6 +96,26 @@ */ #define elf_check_arch(x) ((x)->e_machine == EM_AARCH64) +/* + * An executable for which elf_read_implies_exec() returns TRUE will + * have the READ_IMPLIES_EXEC personality flag set automatically. + * + * The decision process for determining the results are: + * + *              CPU*: | arm32    | arm64 | + * ELF:              |            |            | + * ---------------------|------------|------------| + * missing PT_GNU_STACK | exec-all   | exec-all   | + * PT_GNU_STACK == RWX  | exec-all   | exec-all   | + * PT_GNU_STACK == RW   | exec-none | exec-none | + * + * exec-all : all PROT_READ user mappings are executable, except when + * backed by files on a noexec-filesystem. + * exec-none : only PROT_EXEC user mappings are executable. + * + * *all arm64 CPUs support NX, so there is no "lacks NX" column. + * + */ #define elf_read_implies_exec(ex,stk) (stk != EXSTACK_DISABLE_X) #define CORE_DUMP_USE_REGSET From eaf3f9e61887332d5097dbf0b327b8377546adc5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 26 Mar 2020 23:48:19 -0700 Subject: [PATCH 091/562] arm32/64/elf: Split READ_IMPLIES_EXEC from executable PT_GNU_STACK The READ_IMPLIES_EXEC work-around was designed for old toolchains that lacked the ELF PT_GNU_STACK marking under the assumption that toolchains that couldn't specify executable permission flags for the stack may not know how to do it correctly for any memory region. This logic is sensible for having ancient binaries coexist in a system with possibly NX memory, but was implemented in a way that equated having a PT_GNU_STACK marked executable as being as "broken" as lacking the PT_GNU_STACK marking entirely. Things like unmarked assembly and stack trampolines may cause PT_GNU_STACK to need an executable bit, but they do not imply all mappings must be executable. This confusion has led to situations where modern programs with explicitly marked executable stack are forced into the READ_IMPLIES_EXEC state when no such thing is needed. (And leads to unexpected failures when mmap()ing regions of device driver memory that wish to disallow VM_EXEC[1].) In looking for other reasons for the READ_IMPLIES_EXEC behavior, Jann Horn noted that glibc thread stacks have always been marked RWX (until 2003 when they started tracking the PT_GNU_STACK flag instead[2]). And musl doesn't support executable stacks at all[3]. As such, no breakage for multithreaded applications is expected from this change. This changes arm32 and arm64 compat together, to keep behavior the same. [1] https://lkml.kernel.org/r/20190418055759.GA3155@mellanox.com [2] https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=54ee14b3882 [3] https://lkml.kernel.org/r/20190423192534.GN23599@brightrain.aerifal.cx Suggested-by: Hector Marco-Gisbert Signed-off-by: Kees Cook Signed-off-by: Borislav Petkov Reviewed-by: Jason Gunthorpe Reviewed-by: Catalin Marinas Link: https://lkml.kernel.org/r/20200327064820.12602-6-keescook@chromium.org --- arch/arm/kernel/elf.c | 5 +++-- arch/arm64/include/asm/elf.h | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm/kernel/elf.c b/arch/arm/kernel/elf.c index 5ccd4aced6cc..254ab7138c85 100644 --- a/arch/arm/kernel/elf.c +++ b/arch/arm/kernel/elf.c @@ -87,12 +87,13 @@ EXPORT_SYMBOL(elf_set_personality); * ELF:              |            |           | * ---------------------|------------|------------| * missing PT_GNU_STACK | exec-all   | exec-all  | - * PT_GNU_STACK == RWX  | exec-all   | exec-all  | + * PT_GNU_STACK == RWX  | exec-all   | exec-stack | * PT_GNU_STACK == RW   | exec-all  | exec-none | * * exec-all : all PROT_READ user mappings are executable, except when * backed by files on a noexec-filesystem. * exec-none : only PROT_EXEC user mappings are executable. + * exec-stack: only the stack and PROT_EXEC user mappings are executable. * * *this column has no architectural effect: NX markings are ignored by * hardware, but may have behavioral effects when "wants X" collides with @@ -102,7 +103,7 @@ EXPORT_SYMBOL(elf_set_personality); */ int arm_elf_read_implies_exec(int executable_stack) { - if (executable_stack != EXSTACK_DISABLE_X) + if (executable_stack == EXSTACK_DEFAULT) return 1; if (cpu_architecture() < CPU_ARCH_ARMv6) return 1; diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 986ecf41fc0f..0074e9fd6431 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -106,17 +106,18 @@ * ELF:              |            |            | * ---------------------|------------|------------| * missing PT_GNU_STACK | exec-all   | exec-all   | - * PT_GNU_STACK == RWX  | exec-all   | exec-all   | + * PT_GNU_STACK == RWX  | exec-stack | exec-stack | * PT_GNU_STACK == RW   | exec-none | exec-none | * * exec-all : all PROT_READ user mappings are executable, except when * backed by files on a noexec-filesystem. * exec-none : only PROT_EXEC user mappings are executable. + * exec-stack: only the stack and PROT_EXEC user mappings are executable. * * *all arm64 CPUs support NX, so there is no "lacks NX" column. * */ -#define elf_read_implies_exec(ex,stk) (stk != EXSTACK_DISABLE_X) +#define elf_read_implies_exec(ex, stk) (stk == EXSTACK_DEFAULT) #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE PAGE_SIZE From 6e0d6ac5f3d9d90271899f6d340872360fe1caee Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 26 Mar 2020 23:48:20 -0700 Subject: [PATCH 092/562] arm64/elf: Disable automatic READ_IMPLIES_EXEC for 64-bit address spaces With arm64 64-bit environments, there should never be a need for automatic READ_IMPLIES_EXEC, as the architecture has always been execute-bit aware (as in, the default memory protection should be NX unless a region explicitly requests to be executable). Suggested-by: Hector Marco-Gisbert Signed-off-by: Kees Cook Signed-off-by: Borislav Petkov Reviewed-by: Jason Gunthorpe Reviewed-by: Catalin Marinas Link: https://lkml.kernel.org/r/20200327064820.12602-7-keescook@chromium.org --- arch/arm64/include/asm/elf.h | 4 ++-- fs/compat_binfmt_elf.c | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 0074e9fd6431..0e7df6f1eb7a 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -105,7 +105,7 @@ *              CPU*: | arm32    | arm64 | * ELF:              |            |            | * ---------------------|------------|------------| - * missing PT_GNU_STACK | exec-all   | exec-all   | + * missing PT_GNU_STACK | exec-all   | exec-none  | * PT_GNU_STACK == RWX  | exec-stack | exec-stack | * PT_GNU_STACK == RW   | exec-none | exec-none | * @@ -117,7 +117,7 @@ * *all arm64 CPUs support NX, so there is no "lacks NX" column. * */ -#define elf_read_implies_exec(ex, stk) (stk == EXSTACK_DEFAULT) +#define compat_elf_read_implies_exec(ex, stk) (stk == EXSTACK_DEFAULT) #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE PAGE_SIZE diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index aaad4ca1217e..3068d57436b3 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c @@ -113,6 +113,11 @@ #define arch_setup_additional_pages compat_arch_setup_additional_pages #endif +#ifdef compat_elf_read_implies_exec +#undef elf_read_implies_exec +#define elf_read_implies_exec compat_elf_read_implies_exec +#endif + /* * Rename a few of the symbols that binfmt_elf.c will define. * These are all local so the names don't really matter, but it From ec34143912667c0f34cd6e0667344dde5e725f13 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Sat, 18 Apr 2020 15:06:05 +0800 Subject: [PATCH 093/562] scsi: st: remove unneeded variable 'result' in st_release() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also remove a strange '^L' after this function. Fix the following coccicheck warning: drivers/scsi/st.c:1460:5-11: Unneeded variable: "result". Return "0" on line 1473 Link: https://lore.kernel.org/r/20200418070605.11450-1-yanaijie@huawei.com Reported-by: Hulk Robot Acked-by: Kai Mäkisara Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/st.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index c5f9b348b438..4bf4ab3b70f4 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -1457,7 +1457,6 @@ static int st_flush(struct file *filp, fl_owner_t id) accessing this tape. */ static int st_release(struct inode *inode, struct file *filp) { - int result = 0; struct scsi_tape *STp = filp->private_data; if (STp->door_locked == ST_LOCKED_AUTO) @@ -1470,9 +1469,9 @@ static int st_release(struct inode *inode, struct file *filp) scsi_autopm_put_device(STp->device); scsi_tape_put(STp); - return result; + return 0; } - + /* The checks common to both reading and writing */ static ssize_t rw_checks(struct scsi_tape *STp, struct file *filp, size_t count) { From f336c7003c84c6ac6db3ccc70b8cf6a588015871 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Sat, 18 Apr 2020 15:06:25 +0800 Subject: [PATCH 094/562] scsi: ufs-qcom: remove unneeded variable 'ret' Fix the following coccicheck warning: drivers/scsi/ufs/ufs-qcom.c:575:5-8: Unneeded variable: "ret". Return "0" on line 590 Link: https://lore.kernel.org/r/20200418070625.11756-1-yanaijie@huawei.com Reported-by: Hulk Robot Reviewed-by: Bjorn Andersson Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-qcom.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index 19aa5c44e0da..701e9184adff 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -572,7 +572,6 @@ static int ufs_qcom_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) { struct ufs_qcom_host *host = ufshcd_get_variant(hba); struct phy *phy = host->generic_phy; - int ret = 0; if (ufs_qcom_is_link_off(hba)) { /* @@ -587,7 +586,7 @@ static int ufs_qcom_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) ufs_qcom_disable_lane_clks(host); } - return ret; + return 0; } static int ufs_qcom_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) From b7a9d0c6603d233280fbfef9d58154664e4aa478 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 21 Apr 2020 11:40:08 +0800 Subject: [PATCH 095/562] scsi: fcoe: remove unneeded semicolon in fcoe.c Fix the following coccicheck warning: drivers/scsi/fcoe/fcoe.c:1918:3-4: Unneeded semicolon drivers/scsi/fcoe/fcoe.c:1930:3-4: Unneeded semicolon Link: https://lore.kernel.org/r/20200421034008.27865-1-yanaijie@huawei.com Reviewed-by: Hannes Reinecke Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/fcoe/fcoe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 25dae9f0b205..cb41d166e0c0 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -1915,7 +1915,7 @@ static int fcoe_device_notification(struct notifier_block *notifier, case FCOE_CTLR_ENABLED: case FCOE_CTLR_UNUSED: fcoe_ctlr_link_up(ctlr); - }; + } } else if (fcoe_ctlr_link_down(ctlr)) { switch (cdev->enabled) { case FCOE_CTLR_DISABLED: @@ -1927,7 +1927,7 @@ static int fcoe_device_notification(struct notifier_block *notifier, stats->LinkFailureCount++; put_cpu(); fcoe_clean_pending_queue(lport); - }; + } } out: return rc; From 355f46b97dacce6e5da436822c449db000744eac Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 21 Apr 2020 11:41:01 +0800 Subject: [PATCH 096/562] scsi: mpt3sas: use true,false for bool variables Fix the following coccicheck warning: drivers/scsi/mpt3sas/mpt3sas_base.c:416:6-14: WARNING: Assignment of 0/1 to bool variable drivers/scsi/mpt3sas/mpt3sas_base.c:485:2-10: WARNING: Assignment of 0/1 to bool variable Link: https://lore.kernel.org/r/20200421034101.28273-1-yanaijie@huawei.com Acked-by: Suganath Prabu Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 663782bb790d..920a3a25a7a2 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -413,7 +413,7 @@ static void _clone_sg_entries(struct MPT3SAS_ADAPTER *ioc, { Mpi2SGESimple32_t *sgel, *sgel_next; u32 sgl_flags, sge_chain_count = 0; - bool is_write = 0; + bool is_write = false; u16 i = 0; void __iomem *buffer_iomem; phys_addr_t buffer_iomem_phys; @@ -482,7 +482,7 @@ static void _clone_sg_entries(struct MPT3SAS_ADAPTER *ioc, if (le32_to_cpu(sgel->FlagsLength) & (MPI2_SGE_FLAGS_HOST_TO_IOC << MPI2_SGE_FLAGS_SHIFT)) - is_write = 1; + is_write = true; for (i = 0; i < MPT_MIN_PHYS_SEGMENTS + ioc->facts.MaxChainDepth; i++) { From e304142c308979fff404a38118878679ca58a2ba Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 21 Apr 2020 13:33:54 -0700 Subject: [PATCH 097/562] scsi: lpfc: remove duplicate unloading checks During code reviews several instances of duplicate module unloading checks were found. Remove the duplicate checks. Link: https://lore.kernel.org/r/20200421203354.49420-1-jsmart2021@gmail.com Reviewed-by: Himanshu Madhani Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 10 ++-------- drivers/scsi/lpfc/lpfc_nvme.c | 5 ----- drivers/scsi/lpfc/lpfc_nvmet.c | 11 ----------- 3 files changed, 2 insertions(+), 24 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 80d1e661b0d4..565a21401660 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -7936,19 +7936,13 @@ lpfc_els_timeout_handler(struct lpfc_vport *vport) if (unlikely(!pring)) return; - if ((phba->pport->load_flag & FC_UNLOADING)) + if (phba->pport->load_flag & FC_UNLOADING) return; + spin_lock_irq(&phba->hbalock); if (phba->sli_rev == LPFC_SLI_REV4) spin_lock(&pring->ring_lock); - if ((phba->pport->load_flag & FC_UNLOADING)) { - if (phba->sli_rev == LPFC_SLI_REV4) - spin_unlock(&pring->ring_lock); - spin_unlock_irq(&phba->hbalock); - return; - } - list_for_each_entry_safe(piocb, tmp_iocb, &pring->txcmplq, list) { cmd = &piocb->iocb; diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index a45936e08031..12d2b2775773 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1491,11 +1491,6 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport, phba = vport->phba; - if (vport->load_flag & FC_UNLOADING) { - ret = -ENODEV; - goto out_fail; - } - if (unlikely(vport->load_flag & FC_UNLOADING)) { lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR, "6124 Fail IO, Driver unload\n"); diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 565419bf8d74..5f5aecea5b55 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -841,9 +841,6 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport, struct ulp_bde64 bpl; int rc; - if (phba->pport->load_flag & FC_UNLOADING) - return -ENODEV; - if (phba->pport->load_flag & FC_UNLOADING) return -ENODEV; @@ -938,11 +935,6 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, goto aerr; } - if (phba->pport->load_flag & FC_UNLOADING) { - rc = -ENODEV; - goto aerr; - } - #ifdef CONFIG_SCSI_LPFC_DEBUG_FS if (ctxp->ts_cmd_nvme) { if (rsp->op == NVMET_FCOP_RSP) @@ -1062,9 +1054,6 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport, struct lpfc_queue *wq; unsigned long flags; - if (phba->pport->load_flag & FC_UNLOADING) - return; - if (phba->pport->load_flag & FC_UNLOADING) return; From 3c873161a0d7d1e11f1ce9cc59f89a009fb65711 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Mon, 13 Apr 2020 19:58:06 +0800 Subject: [PATCH 098/562] RDMA/hns: Add support for addressing when hopnum is 0 Currently, WQE and EQE table have already used the mtr interface to config and access memory by multi-hop addressing when hopnum is from 1 to 3. But if hopnum is 0, each table need write its own but repetitive logic, and many duplicate code exists in the mtr interfaces invoke process. So wraps the public logic as 3 functions: hns_roce_mtr_create(), hns_roce_mtr_destroy() and hns_roce_mtr_map() to support hopnum ranges from 0 to 3. In addition, makes the mtr interfaces easier to use. Link: https://lore.kernel.org/r/1586779091-51410-2-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 46 ++- drivers/infiniband/hw/hns/hns_roce_hem.c | 9 +- drivers/infiniband/hw/hns/hns_roce_hem.h | 5 +- drivers/infiniband/hw/hns/hns_roce_mr.c | 423 +++++++++++++++++++- 4 files changed, 450 insertions(+), 33 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index f6b3cf6b95d6..4a7afec4899c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -271,6 +271,9 @@ enum { #define PAGE_ADDR_SHIFT 12 +/* The minimum page count for hardware access page directly. */ +#define HNS_HW_DIRECT_PAGE_COUNT 2 + struct hns_roce_uar { u64 pfn; unsigned long index; @@ -357,13 +360,32 @@ struct hns_roce_hem_list { struct list_head mid_bt[HNS_ROCE_MAX_BT_REGION][HNS_ROCE_MAX_BT_LEVEL]; struct list_head btm_bt; /* link all bottom bt in @mid_bt */ dma_addr_t root_ba; /* pointer to the root ba table */ - int bt_pg_shift; +}; + +struct hns_roce_buf_attr { + struct { + size_t size; /* region size */ + int hopnum; /* multi-hop addressing hop num */ + } region[HNS_ROCE_MAX_BT_REGION]; + int region_count; /* valid region count */ + int page_shift; /* buffer page shift */ + bool fixed_page; /* decide page shift is fixed-size or maximum size */ + int user_access; /* umem access flag */ + bool mtt_only; /* only alloc buffer-required MTT memory */ }; /* memory translate region */ struct hns_roce_mtr { - struct hns_roce_hem_list hem_list; - int buf_pg_shift; + struct hns_roce_hem_list hem_list; /* multi-hop addressing resource */ + struct ib_umem *umem; /* user space buffer */ + struct hns_roce_buf *kmem; /* kernel space buffer */ + struct { + dma_addr_t root_ba; /* root BA table's address */ + bool is_direct; /* addressing without BA table */ + int ba_pg_shift; /* BA table page shift */ + int buf_pg_shift; /* buffer page shift */ + int buf_pg_count; /* buffer page count */ + } hem_cfg; /* config for hardware addressing */ }; struct hns_roce_mw { @@ -1113,6 +1135,16 @@ static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset) (offset & (page_size - 1)); } +static inline u64 to_hr_hw_page_addr(u64 addr) +{ + return addr >> PAGE_ADDR_SHIFT; +} + +static inline u32 to_hr_hw_page_shift(u32 page_shift) +{ + return page_shift - PAGE_ADDR_SHIFT; +} + int hns_roce_init_uar_table(struct hns_roce_dev *dev); int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar); void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar); @@ -1144,6 +1176,14 @@ void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev, #define MTT_MIN_COUNT 2 int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr); +int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, int page_shift, + struct ib_udata *udata, unsigned long user_addr); +void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, + struct hns_roce_mtr *mtr); +int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_region *regions, int region_cnt, + dma_addr_t *pages, int page_cnt); int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev); int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 263338b90d7a..a245e753afe9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1505,7 +1505,7 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, const struct hns_roce_buf_region *regions, - int region_cnt) + int region_cnt, int bt_pg_shift) { const struct hns_roce_buf_region *r; int ofs, end; @@ -1519,7 +1519,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, return -EINVAL; } - unit = (1 << hem_list->bt_pg_shift) / BA_BYTE_LEN; + unit = (1 << bt_pg_shift) / BA_BYTE_LEN; for (i = 0; i < region_cnt; i++) { r = ®ions[i]; if (!r->count) @@ -1566,8 +1566,7 @@ void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev, hem_list->root_ba = 0; } -void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list, - int bt_page_order) +void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list) { int i, j; @@ -1576,8 +1575,6 @@ void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list, for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++) for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++) INIT_LIST_HEAD(&hem_list->mid_bt[i][j]); - - hem_list->bt_pg_shift = bt_page_order; } void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 3bb8f78fb7b0..a00b6c27735a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -133,14 +133,13 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, struct hns_roce_hem_mhop *mhop); bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type); -void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list, - int bt_page_order); +void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list); int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions, int region_cnt, int unit); int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, const struct hns_roce_buf_region *regions, - int region_cnt); + int region_cnt, int bt_pg_shift); void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list); void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 176f34692f88..b3af36938f63 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -1567,8 +1567,9 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw) void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift, int buf_pg_shift) { - hns_roce_hem_list_init(&mtr->hem_list, bt_pg_shift); - mtr->buf_pg_shift = buf_pg_shift; + hns_roce_hem_list_init(&mtr->hem_list); + mtr->hem_cfg.buf_pg_shift = buf_pg_shift; + mtr->hem_cfg.ba_pg_shift = bt_pg_shift; } void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev, @@ -1577,19 +1578,23 @@ void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev, hns_roce_hem_list_release(hr_dev, &mtr->hem_list); } -static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev, - struct hns_roce_mtr *mtr, dma_addr_t *bufs, - struct hns_roce_buf_region *r) +static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + dma_addr_t *pages, struct hns_roce_buf_region *region) { + __le64 *mtts; int offset; int count; int npage; - u64 *mtts; + u64 addr; int end; int i; - offset = r->offset; - end = offset + r->count; + /* if hopnum is 0, buffer cannot store BAs, so skip write mtt */ + if (!region->hopnum) + return 0; + + offset = region->offset; + end = offset + region->count; npage = 0; while (offset < end) { mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, @@ -1597,13 +1602,13 @@ static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev, if (!mtts) return -ENOBUFS; - /* Save page addr, low 12 bits : 0 */ for (i = 0; i < count; i++) { if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) - mtts[i] = bufs[npage] >> PAGE_ADDR_SHIFT; + addr = to_hr_hw_page_addr(pages[npage]); else - mtts[i] = bufs[npage]; + addr = pages[npage]; + mtts[i] = cpu_to_le64(addr); npage++; } offset += count; @@ -1621,13 +1626,14 @@ int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int i; ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, regions, - region_cnt); + region_cnt, mtr->hem_cfg.ba_pg_shift); if (ret) return ret; + mtr->hem_cfg.root_ba = mtr->hem_list.root_ba; for (i = 0; i < region_cnt; i++) { r = ®ions[i]; - ret = hns_roce_write_mtr(hr_dev, mtr, bufs[i], r); + ret = mtr_map_region(hr_dev, mtr, bufs[i], r); if (ret) { dev_err(hr_dev->dev, "write mtr[%d/%d] err %d,offset=%d.\n", @@ -1644,37 +1650,412 @@ int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, return ret; } +static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr) +{ + int i; + + for (i = 0; i < attr->region_count; i++) + if (attr->region[i].hopnum != HNS_ROCE_HOP_NUM_0 && + attr->region[i].hopnum > 0) + return true; + + /* because the mtr only one root base address, when hopnum is 0 means + * root base address equals the first buffer address, thus all alloced + * memory must in a continuous space accessed by direct mode. + */ + return false; +} + +static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr) +{ + size_t size = 0; + int i; + + for (i = 0; i < attr->region_count; i++) + size += attr->region[i].size; + + return size; +} + +static inline int mtr_umem_page_count(struct ib_umem *umem, int page_shift) +{ + int count = ib_umem_page_count(umem); + + if (page_shift >= PAGE_SHIFT) + count >>= page_shift - PAGE_SHIFT; + else + count <<= PAGE_SHIFT - page_shift; + + return count; +} + +static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size, + int page_shift) +{ + if (is_direct) + return ALIGN(alloc_size, 1 << page_shift); + else + return HNS_HW_DIRECT_PAGE_COUNT << page_shift; +} + +/* + * check the given pages in continuous address space + * Returns 0 on success, or the error page num. + */ +static inline int mtr_check_direct_pages(dma_addr_t *pages, int page_count, + int page_shift) +{ + size_t page_size = 1 << page_shift; + int i; + + for (i = 1; i < page_count; i++) + if (pages[i] - pages[i - 1] != page_size) + return i; + + return 0; +} + +static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) +{ + /* release user buffers */ + if (mtr->umem) { + ib_umem_release(mtr->umem); + mtr->umem = NULL; + } + + /* release kernel buffers */ + if (mtr->kmem) { + hns_roce_buf_free(hr_dev, mtr->kmem); + kfree(mtr->kmem); + mtr->kmem = NULL; + } +} + +static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, bool is_direct, + struct ib_udata *udata, unsigned long user_addr) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + int max_pg_shift = buf_attr->page_shift; + int best_pg_shift = 0; + int all_pg_count = 0; + size_t direct_size; + size_t total_size; + unsigned long tmp; + int ret = 0; + + total_size = mtr_bufs_size(buf_attr); + if (total_size < 1) { + ibdev_err(ibdev, "Failed to check mtr size\n"); + return -EINVAL; + } + + if (udata) { + mtr->kmem = NULL; + mtr->umem = ib_umem_get(ibdev, user_addr, total_size, + buf_attr->user_access); + if (IS_ERR_OR_NULL(mtr->umem)) { + ibdev_err(ibdev, "Failed to get umem, ret %ld\n", + PTR_ERR(mtr->umem)); + return -ENOMEM; + } + if (buf_attr->fixed_page) { + best_pg_shift = max_pg_shift; + } else { + tmp = GENMASK(max_pg_shift, 0); + ret = ib_umem_find_best_pgsz(mtr->umem, tmp, user_addr); + best_pg_shift = (ret <= PAGE_SIZE) ? + PAGE_SHIFT : ilog2(ret); + } + all_pg_count = mtr_umem_page_count(mtr->umem, best_pg_shift); + ret = 0; + } else { + mtr->umem = NULL; + mtr->kmem = kzalloc(sizeof(*mtr->kmem), GFP_KERNEL); + if (!mtr->kmem) { + ibdev_err(ibdev, "Failed to alloc kmem\n"); + return -ENOMEM; + } + direct_size = mtr_kmem_direct_size(is_direct, total_size, + max_pg_shift); + ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size, + mtr->kmem, max_pg_shift); + if (ret) { + ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret); + goto err_alloc_mem; + } else { + best_pg_shift = max_pg_shift; + all_pg_count = mtr->kmem->npages; + } + } + + /* must bigger than minimum hardware page shift */ + if (best_pg_shift < PAGE_ADDR_SHIFT || all_pg_count < 1) { + ret = -EINVAL; + ibdev_err(ibdev, "Failed to check mtr page shift %d count %d\n", + best_pg_shift, all_pg_count); + goto err_alloc_mem; + } + + mtr->hem_cfg.buf_pg_shift = best_pg_shift; + mtr->hem_cfg.buf_pg_count = all_pg_count; + + return 0; +err_alloc_mem: + mtr_free_bufs(hr_dev, mtr); + return ret; +} + +static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + dma_addr_t *pages, int count, int page_shift) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + int npage; + int err; + + if (mtr->umem) + npage = hns_roce_get_umem_bufs(hr_dev, pages, count, 0, + mtr->umem, page_shift); + else + npage = hns_roce_get_kmem_bufs(hr_dev, pages, count, 0, + mtr->kmem); + + if (mtr->hem_cfg.is_direct && npage > 1) { + err = mtr_check_direct_pages(pages, npage, page_shift); + if (err) { + ibdev_err(ibdev, "Failed to check %s direct page-%d\n", + mtr->umem ? "user" : "kernel", err); + npage = err; + } + } + + return npage; +} + +int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_region *regions, int region_cnt, + dma_addr_t *pages, int page_cnt) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_region *r; + int err; + int i; + + for (i = 0; i < region_cnt; i++) { + r = ®ions[i]; + if (r->offset + r->count > page_cnt) { + err = -EINVAL; + ibdev_err(ibdev, + "Failed to check mtr%d end %d + %d, max %d\n", + i, r->offset, r->count, page_cnt); + return err; + } + + err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r); + if (err) { + ibdev_err(ibdev, + "Failed to map mtr%d offset %d, err %d\n", + i, r->offset, err); + return err; + } + } + + return 0; +} + int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) { - u64 *mtts = mtt_buf; int mtt_count; int total = 0; - u64 *addr; + __le64 *mtts; int npage; + u64 addr; int left; - if (mtts == NULL || mtt_max < 1) + if (!mtt_buf || mtt_max < 1) goto done; + /* no mtt memory in direct mode, so just return the buffer address */ + if (mtr->hem_cfg.is_direct) { + npage = offset; + for (total = 0; total < mtt_max; total++, npage++) { + addr = mtr->hem_cfg.root_ba + + (npage << mtr->hem_cfg.buf_pg_shift); + + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) + mtt_buf[total] = to_hr_hw_page_addr(addr); + else + mtt_buf[total] = addr; + } + + goto done; + } + left = mtt_max; while (left > 0) { mtt_count = 0; - addr = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, + mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, offset + total, &mtt_count, NULL); - if (!addr || !mtt_count) + if (!mtts || !mtt_count) goto done; npage = min(mtt_count, left); - memcpy(&mtts[total], addr, BA_BYTE_LEN * npage); left -= npage; - total += npage; + for (mtt_count = 0; mtt_count < npage; mtt_count++) + mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]); } done: if (base_addr) - *base_addr = mtr->hem_list.root_ba; + *base_addr = mtr->hem_cfg.root_ba; return total; } + +/* convert buffer size to page index and page count */ +static int mtr_init_region(struct hns_roce_buf_attr *attr, int page_cnt, + struct hns_roce_buf_region *regions, int region_cnt, + int page_shift) +{ + unsigned int page_size = 1 << page_shift; + int max_region = attr->region_count; + struct hns_roce_buf_region *r; + int page_idx = 0; + int i = 0; + + for (; i < region_cnt && i < max_region && page_idx < page_cnt; i++) { + r = ®ions[i]; + r->hopnum = attr->region[i].hopnum == HNS_ROCE_HOP_NUM_0 ? + 0 : attr->region[i].hopnum; + r->offset = page_idx; + r->count = DIV_ROUND_UP(attr->region[i].size, page_size); + page_idx += r->count; + } + + return i; +} + +/** + * hns_roce_mtr_create - Create hns memory translate region. + * + * @mtr: memory translate region + * @init_attr: init attribute for creating mtr + * @page_shift: page shift for multi-hop base address table + * @udata: user space context, if it's NULL, means kernel space + * @user_addr: userspace virtual address to start at + * @buf_alloced: mtr has private buffer, true means need to alloc + */ +int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, int page_shift, + struct ib_udata *udata, unsigned long user_addr) +{ + struct hns_roce_buf_region regions[HNS_ROCE_MAX_BT_REGION] = {}; + struct ib_device *ibdev = &hr_dev->ib_dev; + dma_addr_t *pages = NULL; + int region_cnt = 0; + int all_pg_cnt; + int get_pg_cnt; + bool has_mtt; + int err = 0; + + has_mtt = mtr_has_mtt(buf_attr); + /* if buffer only need mtt, just init the hem cfg */ + if (buf_attr->mtt_only) { + mtr->hem_cfg.buf_pg_shift = buf_attr->page_shift; + mtr->hem_cfg.buf_pg_count = mtr_bufs_size(buf_attr) >> + buf_attr->page_shift; + mtr->umem = NULL; + mtr->kmem = NULL; + } else { + err = mtr_alloc_bufs(hr_dev, mtr, buf_attr, !has_mtt, udata, + user_addr); + if (err) { + ibdev_err(ibdev, "Failed to alloc mtr bufs, err %d\n", + err); + return err; + } + } + + /* alloc mtt memory */ + all_pg_cnt = mtr->hem_cfg.buf_pg_count; + hns_roce_hem_list_init(&mtr->hem_list); + mtr->hem_cfg.is_direct = !has_mtt; + mtr->hem_cfg.ba_pg_shift = page_shift; + if (has_mtt) { + region_cnt = mtr_init_region(buf_attr, all_pg_cnt, + regions, ARRAY_SIZE(regions), + mtr->hem_cfg.buf_pg_shift); + if (region_cnt < 1) { + err = -ENOBUFS; + ibdev_err(ibdev, "Failed to init mtr region %d\n", + region_cnt); + goto err_alloc_bufs; + } + err = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, + regions, region_cnt, + page_shift); + if (err) { + ibdev_err(ibdev, "Failed to request mtr hem, err %d\n", + err); + goto err_alloc_bufs; + } + mtr->hem_cfg.root_ba = mtr->hem_list.root_ba; + } + + /* no buffer to map */ + if (buf_attr->mtt_only) + return 0; + + /* alloc a tmp array to store buffer's dma address */ + pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL); + if (!pages) { + err = -ENOMEM; + ibdev_err(ibdev, "Failed to alloc mtr page list %d\n", + all_pg_cnt); + goto err_alloc_hem_list; + } + + get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt, + mtr->hem_cfg.buf_pg_shift); + if (get_pg_cnt != all_pg_cnt) { + ibdev_err(ibdev, "Failed to get mtr page %d != %d\n", + get_pg_cnt, all_pg_cnt); + err = -ENOBUFS; + goto err_alloc_page_list; + } + + if (!has_mtt) { + mtr->hem_cfg.root_ba = pages[0]; + } else { + /* write buffer's dma address to BA table */ + err = hns_roce_mtr_map(hr_dev, mtr, regions, region_cnt, pages, + all_pg_cnt); + if (err) { + ibdev_err(ibdev, "Failed to map mtr pages, err %d\n", + err); + goto err_alloc_page_list; + } + } + + /* drop tmp array */ + kvfree(pages); + return 0; +err_alloc_page_list: + kvfree(pages); +err_alloc_hem_list: + hns_roce_hem_list_release(hr_dev, &mtr->hem_list); +err_alloc_bufs: + mtr_free_bufs(hr_dev, mtr); + return err; +} + +void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) +{ + /* release multi-hop addressing resource */ + hns_roce_hem_list_release(hr_dev, &mtr->hem_list); + + /* free buffers */ + mtr_free_bufs(hr_dev, mtr); +} From cc23267aedebd847f86953c67606a3f280fde201 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Mon, 13 Apr 2020 19:58:07 +0800 Subject: [PATCH 099/562] RDMA/hns: Optimize hns buffer allocation flow When the value of nbufs is 1, the buffer is in direct mode, which may cause confusion. So optimizes current codes to make it easier to maintain and understand. Link: https://lore.kernel.org/r/1586779091-51410-3-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_alloc.c | 105 +++++++++----------- drivers/infiniband/hw/hns/hns_roce_cq.c | 18 ++-- drivers/infiniband/hw/hns/hns_roce_device.h | 32 ++++-- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 43 ++++---- drivers/infiniband/hw/hns/hns_roce_mr.c | 8 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 6 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 37 ++++--- 8 files changed, 122 insertions(+), 129 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index da574c26e063..e04e7596d979 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -157,84 +157,78 @@ void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap) kfree(bitmap->table); } -void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, - struct hns_roce_buf *buf) +void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf) { - int i; struct device *dev = hr_dev->dev; + u32 size = buf->size; + int i; - if (buf->nbufs == 1) { + if (size == 0) + return; + + buf->size = 0; + + if (hns_roce_buf_is_direct(buf)) { dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map); } else { - for (i = 0; i < buf->nbufs; ++i) + for (i = 0; i < buf->npages; ++i) if (buf->page_list[i].buf) dma_free_coherent(dev, 1 << buf->page_shift, buf->page_list[i].buf, buf->page_list[i].map); kfree(buf->page_list); + buf->page_list = NULL; } } int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, struct hns_roce_buf *buf, u32 page_shift) { - int i = 0; - dma_addr_t t; + struct hns_roce_buf_list *buf_list; struct device *dev = hr_dev->dev; - u32 page_size = 1 << page_shift; - u32 order; + u32 page_size; + int i; - /* SQ/RQ buf lease than one page, SQ + RQ = 8K */ + /* The minimum shift of the page accessed by hw is PAGE_ADDR_SHIFT */ + buf->page_shift = max_t(int, PAGE_ADDR_SHIFT, page_shift); + + page_size = 1 << buf->page_shift; + buf->npages = DIV_ROUND_UP(size, page_size); + + /* required size is not bigger than one trunk size */ if (size <= max_direct) { - buf->nbufs = 1; - /* Npages calculated by page_size */ - order = get_order(size); - if (order <= page_shift - PAGE_SHIFT) - order = 0; - else - order -= page_shift - PAGE_SHIFT; - buf->npages = 1 << order; - buf->page_shift = page_shift; - /* MTT PA must be recorded in 4k alignment, t is 4k aligned */ - buf->direct.buf = dma_alloc_coherent(dev, size, &t, + buf->page_list = NULL; + buf->direct.buf = dma_alloc_coherent(dev, size, + &buf->direct.map, GFP_KERNEL); if (!buf->direct.buf) return -ENOMEM; - - buf->direct.map = t; - - while (t & ((1 << buf->page_shift) - 1)) { - --buf->page_shift; - buf->npages *= 2; - } } else { - buf->nbufs = (size + page_size - 1) / page_size; - buf->npages = buf->nbufs; - buf->page_shift = page_shift; - buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), - GFP_KERNEL); - - if (!buf->page_list) + buf_list = kcalloc(buf->npages, sizeof(*buf_list), GFP_KERNEL); + if (!buf_list) return -ENOMEM; - for (i = 0; i < buf->nbufs; ++i) { - buf->page_list[i].buf = dma_alloc_coherent(dev, - page_size, - &t, - GFP_KERNEL); - - if (!buf->page_list[i].buf) - goto err_free; - - buf->page_list[i].map = t; + for (i = 0; i < buf->npages; i++) { + buf_list[i].buf = dma_alloc_coherent(dev, page_size, + &buf_list[i].map, + GFP_KERNEL); + if (!buf_list[i].buf) + break; } + + if (i != buf->npages && i > 0) { + while (i-- > 0) + dma_free_coherent(dev, page_size, + buf_list[i].buf, + buf_list[i].map); + kfree(buf_list); + return -ENOMEM; + } + buf->page_list = buf_list; } + buf->size = size; return 0; - -err_free: - hns_roce_buf_free(hr_dev, size, buf); - return -ENOMEM; } int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, @@ -246,18 +240,14 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, end = start + buf_cnt; if (end > buf->npages) { dev_err(hr_dev->dev, - "invalid kmem region,offset %d,buf_cnt %d,total %d!\n", + "Failed to check kmem bufs, end %d + %d total %d!\n", start, buf_cnt, buf->npages); return -EINVAL; } total = 0; for (i = start; i < end; i++) - if (buf->nbufs == 1) - bufs[total++] = buf->direct.map + - ((dma_addr_t)i << buf->page_shift); - else - bufs[total++] = buf->page_list[i].map; + bufs[total++] = hns_roce_buf_page(buf, i); return total; } @@ -271,8 +261,9 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int idx = 0; u64 addr; - if (page_shift < PAGE_SHIFT) { - dev_err(hr_dev->dev, "invalid page shift %d!\n", page_shift); + if (page_shift < PAGE_ADDR_SHIFT) { + dev_err(hr_dev->dev, "Failed to check umem page shift %d!\n", + page_shift); return -EINVAL; } diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 5bfb52ffd590..92798ff6360d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -157,13 +157,12 @@ static int get_cq_umem(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, struct hns_roce_ib_create_cq ucmd, struct ib_udata *udata) { - struct hns_roce_buf *buf = &hr_cq->buf; struct hns_roce_mtt *mtt = &hr_cq->mtt; struct ib_umem **umem = &hr_cq->umem; u32 npages; int ret; - *umem = ib_umem_get(&hr_dev->ib_dev, ucmd.buf_addr, buf->size, + *umem = ib_umem_get(&hr_dev->ib_dev, ucmd.buf_addr, hr_cq->buf_size, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(*umem)) return PTR_ERR(*umem); @@ -175,7 +174,7 @@ static int get_cq_umem(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, npages = DIV_ROUND_UP(ib_umem_page_count(*umem), 1 << hr_dev->caps.cqe_buf_pg_sz); - ret = hns_roce_mtt_init(hr_dev, npages, buf->page_shift, mtt); + ret = hns_roce_mtt_init(hr_dev, npages, hr_cq->page_shift, mtt); if (ret) goto err_buf; @@ -199,8 +198,9 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) struct hns_roce_mtt *mtt = &hr_cq->mtt; int ret; - ret = hns_roce_buf_alloc(hr_dev, buf->size, (1 << buf->page_shift) * 2, - buf, buf->page_shift); + ret = hns_roce_buf_alloc(hr_dev, hr_cq->buf_size, + (1 << hr_cq->page_shift) * 2, + buf, hr_cq->page_shift); if (ret) goto out; @@ -223,7 +223,7 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) hns_roce_mtt_cleanup(hr_dev, mtt); err_buf: - hns_roce_buf_free(hr_dev, buf->size, buf); + hns_roce_buf_free(hr_dev, buf); out: return ret; @@ -231,7 +231,7 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { - hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf); + hns_roce_buf_free(hr_dev, &hr_cq->buf); } static int create_user_cq(struct hns_roce_dev *hr_dev, @@ -367,8 +367,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, hr_cq->ib_cq.cqe = cq_entries - 1; /* used as cqe index */ hr_cq->cq_depth = cq_entries; hr_cq->vector = vector; - hr_cq->buf.size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz; - hr_cq->buf.page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; + hr_cq->buf_size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz; + hr_cq->page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; spin_lock_init(&hr_cq->lock); INIT_LIST_HEAD(&hr_cq->sq_list); INIT_LIST_HEAD(&hr_cq->rq_list); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 4a7afec4899c..c37617d5200f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -468,7 +468,6 @@ struct hns_roce_buf_list { struct hns_roce_buf { struct hns_roce_buf_list direct; struct hns_roce_buf_list *page_list; - int nbufs; u32 npages; u32 size; int page_shift; @@ -510,6 +509,8 @@ struct hns_roce_cq { u8 db_en; spinlock_t lock; struct ib_umem *umem; + u32 buf_size; + int page_shift; u32 cq_depth; u32 cons_index; u32 *set_ci_db; @@ -549,6 +550,8 @@ struct hns_roce_srq { struct hns_roce_buf buf; u64 *wrid; struct ib_umem *umem; + u32 buf_size; + int page_shift; struct hns_roce_mtt mtt; struct hns_roce_idx_que idx_que; spinlock_t lock; @@ -1124,15 +1127,29 @@ static inline struct hns_roce_qp return xa_load(&hr_dev->qp_table_xa, qpn & (hr_dev->caps.num_qps - 1)); } +static inline bool hns_roce_buf_is_direct(struct hns_roce_buf *buf) +{ + if (buf->page_list) + return false; + + return true; +} + static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset) { - u32 page_size = 1 << buf->page_shift; + if (hns_roce_buf_is_direct(buf)) + return (char *)(buf->direct.buf) + (offset & (buf->size - 1)); - if (buf->nbufs == 1) - return (char *)(buf->direct.buf) + offset; + return (char *)(buf->page_list[offset >> buf->page_shift].buf) + + (offset & ((1 << buf->page_shift) - 1)); +} + +static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx) +{ + if (hns_roce_buf_is_direct(buf)) + return buf->direct.map + ((dma_addr_t)idx << buf->page_shift); else - return (char *)(buf->page_list[offset >> buf->page_shift].buf) + - (offset & (page_size - 1)); + return buf->page_list[idx].map; } static inline u64 to_hr_hw_page_addr(u64 addr) @@ -1240,8 +1257,7 @@ struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type, struct ib_udata *udata); int hns_roce_dealloc_mw(struct ib_mw *ibmw); -void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, - struct hns_roce_buf *buf); +void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf); int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, struct hns_roce_buf *buf, u32 page_shift); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 5ff028d77be3..4b5490692fbb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -3666,7 +3666,7 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) ib_umem_release(hr_cq->umem); if (!udata) { /* Free the buff of stored cq */ - hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf); + hns_roce_buf_free(hr_dev, &hr_cq->buf); } } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index c3316672b70e..998015da48ba 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4989,24 +4989,14 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq) hns_roce_write64(hr_dev, doorbell, eq->doorbell); } -static inline void *get_eqe_buf(struct hns_roce_eq *eq, unsigned long offset) -{ - u32 buf_chk_sz; - - buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT); - if (eq->buf.nbufs == 1) - return eq->buf.direct.buf + offset % buf_chk_sz; - else - return eq->buf.page_list[offset / buf_chk_sz].buf + - offset % buf_chk_sz; -} - static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq) { struct hns_roce_aeqe *aeqe; - aeqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) * - HNS_ROCE_AEQ_ENTRY_SIZE); + aeqe = hns_roce_buf_offset(&eq->buf, + (eq->cons_index & (eq->entries - 1)) * + HNS_ROCE_AEQ_ENTRY_SIZE); + return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^ !!(eq->cons_index & eq->entries)) ? aeqe : NULL; } @@ -5103,8 +5093,9 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) { struct hns_roce_ceqe *ceqe; - ceqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) * - HNS_ROCE_CEQ_ENTRY_SIZE); + ceqe = hns_roce_buf_offset(&eq->buf, + (eq->cons_index & (eq->entries - 1)) * + HNS_ROCE_CEQ_ENTRY_SIZE); return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^ (!!(eq->cons_index & eq->entries)) ? ceqe : NULL; } @@ -5265,7 +5256,7 @@ static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) { if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) hns_roce_mtr_cleanup(hr_dev, &eq->mtr); - hns_roce_buf_free(hr_dev, eq->buf.size, &eq->buf); + hns_roce_buf_free(hr_dev, &eq->buf); } static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, @@ -5290,7 +5281,8 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz; eq->shift = ilog2((unsigned int)eq->entries); - /* if not muti-hop, eqe buffer only use one trunk */ + /* if not multi-hop, eqe buffer only use one trunk */ + eq->eqe_buf_pg_sz = eq->buf.page_shift - PAGE_ADDR_SHIFT; if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) { eq->eqe_ba = eq->buf.direct.map; eq->cur_eqe_ba = eq->eqe_ba; @@ -5432,7 +5424,7 @@ static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, goto done; } - hns_roce_mtr_init(&eq->mtr, PAGE_SHIFT + hr_dev->caps.eqe_ba_pg_sz, + hns_roce_mtr_init(&eq->mtr, PAGE_ADDR_SHIFT + hr_dev->caps.eqe_ba_pg_sz, page_shift); ret = hns_roce_mtr_attach(hr_dev, &eq->mtr, &buf_list, ®ion, 1); if (ret) @@ -5454,23 +5446,24 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) u32 page_shift; u32 mhop_num; u32 max_size; + u32 buf_size; int ret; - page_shift = PAGE_SHIFT + hr_dev->caps.eqe_buf_pg_sz; + page_shift = PAGE_ADDR_SHIFT + hr_dev->caps.eqe_buf_pg_sz; mhop_num = hr_dev->caps.eqe_hop_num; if (!mhop_num) { max_size = 1 << page_shift; - buf->size = max_size; + buf_size = max_size; } else if (mhop_num == HNS_ROCE_HOP_NUM_0) { max_size = eq->entries * eq->eqe_size; - buf->size = max_size; + buf_size = max_size; } else { max_size = 1 << page_shift; - buf->size = PAGE_ALIGN(eq->entries * eq->eqe_size); + buf_size = round_up(eq->entries * eq->eqe_size, max_size); is_mhop = true; } - ret = hns_roce_buf_alloc(hr_dev, buf->size, max_size, buf, page_shift); + ret = hns_roce_buf_alloc(hr_dev, buf_size, max_size, buf, page_shift); if (ret) { dev_err(hr_dev->dev, "alloc eq buf error\n"); return ret; @@ -5486,7 +5479,7 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) return 0; err_alloc: - hns_roce_buf_free(hr_dev, buf->size, buf); + hns_roce_buf_free(hr_dev, buf); return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index b3af36938f63..99e3876e712c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -900,13 +900,9 @@ int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, if (!page_list) return -ENOMEM; - for (i = 0; i < buf->npages; ++i) { - if (buf->nbufs == 1) - page_list[i] = buf->direct.map + (i << buf->page_shift); - else - page_list[i] = buf->page_list[i].map; + for (i = 0; i < buf->npages; ++i) + page_list[i] = hns_roce_buf_page(buf, i); - } ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list); kfree(page_list); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 6317901c4b4f..1667f3753f34 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -839,7 +839,7 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ib_umem_release(hr_qp->umem); hr_qp->umem = NULL; } else { - hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); + hns_roce_buf_free(hr_dev, &hr_qp->hr_buf); } ibdev_err(ibdev, "Failed to alloc WQE buffer, ret %d.\n", ret); @@ -855,8 +855,8 @@ static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hr_qp->umem = NULL; } - if (hr_qp->hr_buf.nbufs > 0) - hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); + if (hr_qp->hr_buf.npages > 0) + hns_roce_buf_free(hr_dev, &hr_qp->hr_buf); if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && hr_qp->rq.wqe_cnt) diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 5b3dd1a337d4..9851d76d2c14 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -180,7 +180,8 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, { struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); struct hns_roce_ib_create_srq ucmd; - struct hns_roce_buf *buf; + int page_shift; + int page_count; int ret; if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) @@ -191,12 +192,11 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, if (IS_ERR(srq->umem)) return PTR_ERR(srq->umem); - buf = &srq->buf; - buf->npages = (ib_umem_page_count(srq->umem) + - (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / + page_count = (ib_umem_page_count(srq->umem) + + (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / (1 << hr_dev->caps.srqwqe_buf_pg_sz); - buf->page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, + page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, page_count, page_shift, &srq->mtt); if (ret) goto err_user_buf; @@ -214,11 +214,10 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, goto err_user_srq_mtt; } - buf = &srq->idx_que.idx_buf; - buf->npages = DIV_ROUND_UP(ib_umem_page_count(srq->idx_que.umem), - 1 << hr_dev->caps.idx_buf_pg_sz); - buf->page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, + page_count = DIV_ROUND_UP(ib_umem_page_count(srq->idx_que.umem), + 1 << hr_dev->caps.idx_buf_pg_sz); + page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, page_count, page_shift, &srq->idx_que.mtt); if (ret) { dev_err(hr_dev->dev, "hns_roce_mtt_init error for idx que\n"); @@ -325,15 +324,14 @@ static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size) hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); err_kernel_create_idx: - hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, - &srq->idx_que.idx_buf); + hns_roce_buf_free(hr_dev, &srq->idx_que.idx_buf); kfree(srq->idx_que.bitmap); err_kernel_srq_mtt: hns_roce_mtt_cleanup(hr_dev, &srq->mtt); err_kernel_buf: - hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf); + hns_roce_buf_free(hr_dev, &srq->buf); return ret; } @@ -348,14 +346,14 @@ static void destroy_user_srq(struct hns_roce_dev *hr_dev, } static void destroy_kernel_srq(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq, int srq_buf_size) + struct hns_roce_srq *srq) { kvfree(srq->wrid); hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, &srq->idx_que.idx_buf); + hns_roce_buf_free(hr_dev, &srq->idx_que.idx_buf); kfree(srq->idx_que.bitmap); hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf); + hns_roce_buf_free(hr_dev, &srq->buf); } int hns_roce_create_srq(struct ib_srq *ib_srq, @@ -437,7 +435,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, if (udata) destroy_user_srq(hr_dev, srq); else - destroy_kernel_srq(hr_dev, srq, srq_buf_size); + destroy_kernel_srq(hr_dev, srq); err_srq: return ret; @@ -455,8 +453,7 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); } else { kvfree(srq->wrid); - hns_roce_buf_free(hr_dev, srq->wqe_cnt << srq->wqe_shift, - &srq->buf); + hns_roce_buf_free(hr_dev, &srq->buf); } ib_umem_release(srq->idx_que.umem); ib_umem_release(srq->umem); From 477a0a38707249227d8929648baf5abbdd58c40f Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Mon, 13 Apr 2020 19:58:08 +0800 Subject: [PATCH 100/562] RDMA/hns: Optimize 0 hop addressing for EQE buffer Use the new mtr interface to simple the hop 0 addressing and multihop addressing process. Link: https://lore.kernel.org/r/1586779091-51410-4-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 6 - drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 171 ++++++-------------- 2 files changed, 48 insertions(+), 129 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index c37617d5200f..39577c2e345e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -794,17 +794,11 @@ struct hns_roce_eq { int over_ignore; int coalesce; int arm_st; - u64 eqe_ba; - int eqe_ba_pg_sz; - int eqe_buf_pg_sz; int hop_num; struct hns_roce_mtr mtr; - struct hns_roce_buf buf; int eq_max_cnt; int eq_period; int shift; - dma_addr_t cur_eqe_ba; - dma_addr_t nxt_eqe_ba; int event_type; int sub_type; }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 998015da48ba..335a637ab239 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4993,7 +4993,7 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq) { struct hns_roce_aeqe *aeqe; - aeqe = hns_roce_buf_offset(&eq->buf, + aeqe = hns_roce_buf_offset(eq->mtr.kmem, (eq->cons_index & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE); @@ -5093,7 +5093,7 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) { struct hns_roce_ceqe *ceqe; - ceqe = hns_roce_buf_offset(&eq->buf, + ceqe = hns_roce_buf_offset(eq->mtr.kmem, (eq->cons_index & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE); return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^ @@ -5254,17 +5254,15 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn) static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) { - if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) - hns_roce_mtr_cleanup(hr_dev, &eq->mtr); - hns_roce_buf_free(hr_dev, &eq->buf); + hns_roce_mtr_destroy(hr_dev, &eq->mtr); } -static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq, - void *mb_buf) +static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, + void *mb_buf) { + u64 eqe_ba[MTT_MIN_COUNT] = { 0 }; struct hns_roce_eq_context *eqc; - u64 ba[MTT_MIN_COUNT] = { 0 }; + u64 bt_ba = 0; int count; eqc = mb_buf; @@ -5272,32 +5270,18 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, /* init eqc */ eq->doorbell = hr_dev->reg_base + ROCEE_VF_EQ_DB_CFG0_REG; - eq->hop_num = hr_dev->caps.eqe_hop_num; eq->cons_index = 0; eq->over_ignore = HNS_ROCE_V2_EQ_OVER_IGNORE_0; eq->coalesce = HNS_ROCE_V2_EQ_COALESCE_0; eq->arm_st = HNS_ROCE_V2_EQ_ALWAYS_ARMED; - eq->eqe_ba_pg_sz = hr_dev->caps.eqe_ba_pg_sz; - eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz; eq->shift = ilog2((unsigned int)eq->entries); /* if not multi-hop, eqe buffer only use one trunk */ - eq->eqe_buf_pg_sz = eq->buf.page_shift - PAGE_ADDR_SHIFT; - if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) { - eq->eqe_ba = eq->buf.direct.map; - eq->cur_eqe_ba = eq->eqe_ba; - if (eq->buf.npages > 1) - eq->nxt_eqe_ba = eq->eqe_ba + (1 << eq->eqe_buf_pg_sz); - else - eq->nxt_eqe_ba = eq->eqe_ba; - } else { - count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, ba, - MTT_MIN_COUNT, &eq->eqe_ba); - eq->cur_eqe_ba = ba[0]; - if (count > 1) - eq->nxt_eqe_ba = ba[1]; - else - eq->nxt_eqe_ba = ba[0]; + count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, eqe_ba, MTT_MIN_COUNT, + &bt_ba); + if (count < 1) { + dev_err(hr_dev->dev, "failed to find EQE mtr\n"); + return -ENOBUFS; } /* set eqc state */ @@ -5331,12 +5315,12 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, /* set eqe_ba_pg_sz */ roce_set_field(eqc->byte_8, HNS_ROCE_EQC_BA_PG_SZ_M, HNS_ROCE_EQC_BA_PG_SZ_S, - eq->eqe_ba_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(eq->mtr.hem_cfg.ba_pg_shift)); /* set eqe_buf_pg_sz */ roce_set_field(eqc->byte_8, HNS_ROCE_EQC_BUF_PG_SZ_M, HNS_ROCE_EQC_BUF_PG_SZ_S, - eq->eqe_buf_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(eq->mtr.hem_cfg.buf_pg_shift)); /* set eq_producer_idx */ roce_set_field(eqc->byte_8, HNS_ROCE_EQC_PROD_INDX_M, @@ -5355,13 +5339,13 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, HNS_ROCE_EQC_REPORT_TIMER_S, HNS_ROCE_EQ_INIT_REPORT_TIMER); - /* set eqe_ba [34:3] */ + /* set bt_ba [34:3] */ roce_set_field(eqc->eqe_ba0, HNS_ROCE_EQC_EQE_BA_L_M, - HNS_ROCE_EQC_EQE_BA_L_S, eq->eqe_ba >> 3); + HNS_ROCE_EQC_EQE_BA_L_S, bt_ba >> 3); - /* set eqe_ba [64:35] */ + /* set bt_ba [64:35] */ roce_set_field(eqc->eqe_ba1, HNS_ROCE_EQC_EQE_BA_H_M, - HNS_ROCE_EQC_EQE_BA_H_S, eq->eqe_ba >> 35); + HNS_ROCE_EQC_EQE_BA_H_S, bt_ba >> 35); /* set eq shift */ roce_set_field(eqc->byte_28, HNS_ROCE_EQC_SHIFT_M, HNS_ROCE_EQC_SHIFT_S, @@ -5373,15 +5357,15 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, /* set cur_eqe_ba [27:12] */ roce_set_field(eqc->byte_28, HNS_ROCE_EQC_CUR_EQE_BA_L_M, - HNS_ROCE_EQC_CUR_EQE_BA_L_S, eq->cur_eqe_ba >> 12); + HNS_ROCE_EQC_CUR_EQE_BA_L_S, eqe_ba[0] >> 12); /* set cur_eqe_ba [59:28] */ roce_set_field(eqc->byte_32, HNS_ROCE_EQC_CUR_EQE_BA_M_M, - HNS_ROCE_EQC_CUR_EQE_BA_M_S, eq->cur_eqe_ba >> 28); + HNS_ROCE_EQC_CUR_EQE_BA_M_S, eqe_ba[0] >> 28); /* set cur_eqe_ba [63:60] */ roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CUR_EQE_BA_H_M, - HNS_ROCE_EQC_CUR_EQE_BA_H_S, eq->cur_eqe_ba >> 60); + HNS_ROCE_EQC_CUR_EQE_BA_H_S, eqe_ba[0] >> 60); /* set eq consumer idx */ roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CONS_INDX_M, @@ -5389,98 +5373,38 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, /* set nex_eqe_ba[43:12] */ roce_set_field(eqc->nxt_eqe_ba0, HNS_ROCE_EQC_NXT_EQE_BA_L_M, - HNS_ROCE_EQC_NXT_EQE_BA_L_S, eq->nxt_eqe_ba >> 12); + HNS_ROCE_EQC_NXT_EQE_BA_L_S, eqe_ba[1] >> 12); /* set nex_eqe_ba[63:44] */ roce_set_field(eqc->nxt_eqe_ba1, HNS_ROCE_EQC_NXT_EQE_BA_H_M, - HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44); -} + HNS_ROCE_EQC_NXT_EQE_BA_H_S, eqe_ba[1] >> 44); -static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, - u32 page_shift) -{ - struct hns_roce_buf_region region = {}; - dma_addr_t *buf_list = NULL; - int ba_num; - int ret; - - ba_num = DIV_ROUND_UP(PAGE_ALIGN(eq->entries * eq->eqe_size), - 1 << page_shift); - hns_roce_init_buf_region(®ion, hr_dev->caps.eqe_hop_num, 0, ba_num); - - /* alloc a tmp list for storing eq buf address */ - ret = hns_roce_alloc_buf_list(®ion, &buf_list, 1); - if (ret) { - dev_err(hr_dev->dev, "alloc eq buf_list error\n"); - return ret; - } - - ba_num = hns_roce_get_kmem_bufs(hr_dev, buf_list, region.count, - region.offset, &eq->buf); - if (ba_num != region.count) { - dev_err(hr_dev->dev, "get eqe buf err,expect %d,ret %d.\n", - region.count, ba_num); - ret = -ENOBUFS; - goto done; - } - - hns_roce_mtr_init(&eq->mtr, PAGE_ADDR_SHIFT + hr_dev->caps.eqe_ba_pg_sz, - page_shift); - ret = hns_roce_mtr_attach(hr_dev, &eq->mtr, &buf_list, ®ion, 1); - if (ret) - dev_err(hr_dev->dev, "mtr attach error for eqe\n"); - - goto done; - - hns_roce_mtr_cleanup(hr_dev, &eq->mtr); -done: - hns_roce_free_buf_list(&buf_list, 1); - - return ret; + return 0; } static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) { - struct hns_roce_buf *buf = &eq->buf; - bool is_mhop = false; - u32 page_shift; - u32 mhop_num; - u32 max_size; - u32 buf_size; - int ret; + struct hns_roce_buf_attr buf_attr = {}; + int err; - page_shift = PAGE_ADDR_SHIFT + hr_dev->caps.eqe_buf_pg_sz; - mhop_num = hr_dev->caps.eqe_hop_num; - if (!mhop_num) { - max_size = 1 << page_shift; - buf_size = max_size; - } else if (mhop_num == HNS_ROCE_HOP_NUM_0) { - max_size = eq->entries * eq->eqe_size; - buf_size = max_size; - } else { - max_size = 1 << page_shift; - buf_size = round_up(eq->entries * eq->eqe_size, max_size); - is_mhop = true; - } + if (hr_dev->caps.eqe_hop_num == HNS_ROCE_HOP_NUM_0) + eq->hop_num = 0; + else + eq->hop_num = hr_dev->caps.eqe_hop_num; - ret = hns_roce_buf_alloc(hr_dev, buf_size, max_size, buf, page_shift); - if (ret) { - dev_err(hr_dev->dev, "alloc eq buf error\n"); - return ret; - } + buf_attr.page_shift = hr_dev->caps.eqe_buf_pg_sz + PAGE_ADDR_SHIFT; + buf_attr.region[0].size = eq->entries * eq->eqe_size; + buf_attr.region[0].hopnum = eq->hop_num; + buf_attr.region_count = 1; + buf_attr.fixed_page = true; - if (is_mhop) { - ret = map_eq_buf(hr_dev, eq, page_shift); - if (ret) { - dev_err(hr_dev->dev, "map roce buf error\n"); - goto err_alloc; - } - } + err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr, + hr_dev->caps.srqwqe_ba_pg_sz + + PAGE_ADDR_SHIFT, NULL, 0); + if (err) + dev_err(hr_dev->dev, "Failed to alloc EQE mtr, err %d\n", err); - return 0; -err_alloc: - hns_roce_buf_free(hr_dev, buf); - return ret; + return err; } static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, @@ -5492,15 +5416,16 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, /* Allocate mailbox memory */ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); + if (IS_ERR_OR_NULL(mailbox)) + return -ENOMEM; ret = alloc_eq_buf(hr_dev, eq); - if (ret) { - ret = -ENOMEM; + if (ret) goto free_cmd_mbox; - } - hns_roce_config_eqc(hr_dev, eq, mailbox->buf); + + ret = config_eqc(hr_dev, eq, mailbox->buf); + if (ret) + goto err_cmd_mbox; ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, eq->eqn, 0, eq_cmd, HNS_ROCE_CMD_TIMEOUT_MSECS); From d563099e3e89c48caf9cc183ab3d39dd326c8987 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Mon, 13 Apr 2020 19:58:09 +0800 Subject: [PATCH 101/562] RDMA/hns: Support 0 hop addressing for WQE buffer Add the zero hop addressing support by using new mtr interface for WQE buffer and simple mtr invoking process, so WQE buffer can support hopnum between 0 to 3. Link: https://lore.kernel.org/r/1586779091-51410-5-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 4 - drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 80 +++++---- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 36 ++-- drivers/infiniband/hw/hns/hns_roce_qp.c | 182 +++++--------------- 4 files changed, 106 insertions(+), 196 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 39577c2e345e..5df4ee3a1f82 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -681,7 +681,6 @@ struct hns_roce_work { struct hns_roce_qp { struct ib_qp ibqp; - struct hns_roce_buf hr_buf; struct hns_roce_wq rq; struct hns_roce_db rdb; struct hns_roce_db sdb; @@ -691,10 +690,7 @@ struct hns_roce_qp { u32 sq_signal_bits; struct hns_roce_wq sq; - struct ib_umem *umem; - struct hns_roce_mtt mtt; struct hns_roce_mtr mtr; - int wqe_bt_pg_shift; u32 buff_size; struct mutex mutex; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 4b5490692fbb..ddf2a454b525 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2479,7 +2479,6 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, } static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, enum hns_roce_qp_state cur_state, enum hns_roce_qp_state new_state, struct hns_roce_qp_context *context, @@ -2560,6 +2559,29 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, return ret; } +static int find_wqe_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + u64 *sq_ba, u64 *rq_ba, dma_addr_t *bt_ba) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + int rq_pa_start; + int count; + + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, sq_ba, 1, bt_ba); + if (count < 1) { + ibdev_err(ibdev, "Failed to find SQ ba\n"); + return -ENOBUFS; + } + rq_pa_start = hr_qp->rq.offset >> hr_qp->mtr.hem_cfg.buf_pg_shift; + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, rq_pa_start, rq_ba, 1, + NULL); + if (!count) { + ibdev_err(ibdev, "Failed to find RQ ba\n"); + return -ENOBUFS; + } + + return 0; +} + static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) @@ -2567,25 +2589,20 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct hns_roce_sqp_context *context; - struct device *dev = &hr_dev->pdev->dev; dma_addr_t dma_handle = 0; u32 __iomem *addr; - int rq_pa_start; + u64 sq_ba = 0; + u64 rq_ba = 0; __le32 tmp; u32 reg_val; - u64 *mtts; context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return -ENOMEM; /* Search QP buf's MTTs */ - mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table, - hr_qp->mtt.first_seg, &dma_handle); - if (!mtts) { - dev_err(dev, "qp buf pa find failed\n"); + if (find_wqe_mtt(hr_dev, hr_qp, &sq_ba, &rq_ba, &dma_handle)) goto out; - } if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { roce_set_field(context->qp1c_bytes_4, @@ -2599,11 +2616,11 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M, QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn); - context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle)); + context->sq_rq_bt_l = cpu_to_le32(dma_handle); roce_set_field(context->qp1c_bytes_12, QP1C_BYTES_12_SQ_RQ_BT_H_M, QP1C_BYTES_12_SQ_RQ_BT_H_S, - ((u32)(dma_handle >> 32))); + upper_32_bits(dma_handle)); roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_HEAD_M, QP1C_BYTES_16_RQ_HEAD_S, hr_qp->rq.head); @@ -2624,14 +2641,12 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_PKEY_IDX_M, QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index); - rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; - context->cur_rq_wqe_ba_l = - cpu_to_le32((u32)(mtts[rq_pa_start])); + context->cur_rq_wqe_ba_l = cpu_to_le32(rq_ba); roce_set_field(context->qp1c_bytes_28, QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M, QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S, - (mtts[rq_pa_start]) >> 32); + upper_32_bits(rq_ba)); roce_set_field(context->qp1c_bytes_28, QP1C_BYTES_28_RQ_CUR_IDX_M, QP1C_BYTES_28_RQ_CUR_IDX_S, 0); @@ -2645,12 +2660,12 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_32_TX_CQ_NUM_S, to_hr_cq(ibqp->send_cq)->cqn); - context->cur_sq_wqe_ba_l = cpu_to_le32((u32)mtts[0]); + context->cur_sq_wqe_ba_l = cpu_to_le32(sq_ba); roce_set_field(context->qp1c_bytes_40, QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M, QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S, - (mtts[0]) >> 32); + upper_32_bits(sq_ba)); roce_set_field(context->qp1c_bytes_40, QP1C_BYTES_40_SQ_CUR_IDX_M, QP1C_BYTES_40_SQ_CUR_IDX_S, 0); @@ -2716,10 +2731,10 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, dma_addr_t dma_handle_2 = 0; dma_addr_t dma_handle = 0; __le32 doorbell[2] = {0}; - int rq_pa_start = 0; u64 *mtts_2 = NULL; int ret = -EINVAL; - u64 *mtts = NULL; + u64 sq_ba = 0; + u64 rq_ba = 0; int port; u8 port_num; u8 *dmac; @@ -2730,12 +2745,8 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, return -ENOMEM; /* Search qp buf's mtts */ - mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table, - hr_qp->mtt.first_seg, &dma_handle); - if (mtts == NULL) { - dev_err(dev, "qp buf pa find failed\n"); + if (find_wqe_mtt(hr_dev, hr_qp, &sq_ba, &rq_ba, &dma_handle)) goto out; - } /* Search IRRL's mtts */ mtts_2 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table, @@ -2890,11 +2901,11 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, dmac = (u8 *)attr->ah_attr.roce.dmac; - context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle)); + context->sq_rq_bt_l = cpu_to_le32(dma_handle); roce_set_field(context->qpc_bytes_24, QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M, QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S, - ((u32)(dma_handle >> 32))); + upper_32_bits(dma_handle)); roce_set_bit(context->qpc_bytes_24, QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S, 1); @@ -2993,14 +3004,12 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M, QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0); - rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; - context->cur_rq_wqe_ba_l = - cpu_to_le32((u32)(mtts[rq_pa_start])); + context->cur_rq_wqe_ba_l = cpu_to_le32(rq_ba); roce_set_field(context->qpc_bytes_76, QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M, QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S, - mtts[rq_pa_start] >> 32); + upper_32_bits(rq_ba)); roce_set_field(context->qpc_bytes_76, QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M, QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S, 0); @@ -3075,12 +3084,12 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, goto out; } - context->rx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0])); + context->rx_cur_sq_wqe_ba_l = cpu_to_le32(sq_ba); roce_set_field(context->qpc_bytes_120, QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M, QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S, - (mtts[0]) >> 32); + upper_32_bits(sq_ba)); roce_set_field(context->qpc_bytes_124, QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M, @@ -3223,12 +3232,12 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M, QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0); - context->tx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0])); + context->tx_cur_sq_wqe_ba_l = cpu_to_le32(sq_ba); roce_set_field(context->qpc_bytes_188, QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M, QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S, - (mtts[0]) >> 32); + upper_32_bits(sq_ba)); roce_set_bit(context->qpc_bytes_188, QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S, 0); roce_set_field(context->qpc_bytes_188, @@ -3253,8 +3262,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, new_state); /* SW pass context to HW */ - ret = hns_roce_v1_qp_modify(hr_dev, &hr_qp->mtt, - to_hns_roce_state(cur_state), + ret = hns_roce_v1_qp_modify(hr_dev, to_hns_roce_state(cur_state), to_hns_roce_state(new_state), context, hr_qp); if (ret) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 335a637ab239..42bca0ae64ff 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -164,7 +164,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; extend_sge_num = valid_num_sge - num_in_wqe; sg = wr->sg_list + num_in_wqe; - shift = qp->hr_buf.page_shift; + shift = qp->mtr.hem_cfg.buf_pg_shift; /* * Check whether wr->num_sge sges are in the same page. If not, we @@ -3757,7 +3757,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, int port; /* Search qp buf's mtts */ - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); + page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift; count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset / page_size, mtts, MTT_MIN_COUNT, &wqe_sge_ba); @@ -3831,7 +3831,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, - hr_qp->wqe_bt_pg_shift + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.ba_pg_shift)); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, 0); @@ -3839,29 +3839,29 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, - hr_dev->caps.mtt_buf_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.buf_pg_shift)); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, 0); - context->rq_cur_blk_addr = cpu_to_le32(mtts[0] >> PAGE_ADDR_SHIFT); + context->rq_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0])); qpc_mask->rq_cur_blk_addr = 0; roce_set_field(context->byte_92_srq_info, V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S, - mtts[0] >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(mtts[0]))); roce_set_field(qpc_mask->byte_92_srq_info, V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S, 0); - context->rq_nxt_blk_addr = cpu_to_le32(mtts[1] >> PAGE_ADDR_SHIFT); + context->rq_nxt_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[1])); qpc_mask->rq_nxt_blk_addr = 0; roce_set_field(context->byte_104_rq_sge, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S, - mtts[1] >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(mtts[1]))); roce_set_field(qpc_mask->byte_104_rq_sge, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S, 0); @@ -3995,18 +3995,18 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, /* Search qp buf's mtts */ count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, &sq_cur_blk, 1, NULL); if (count < 1) { - ibdev_err(ibdev, "failed to find buf pa of QP(0x%lx)\n", + ibdev_err(ibdev, "failed to find QP(0x%lx) SQ buf\n", hr_qp->qpn); return -EINVAL; } if (hr_qp->sge.offset) { - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); + page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift; count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->sge.offset / page_size, &sge_cur_blk, 1, NULL); if (count < 1) { - ibdev_err(ibdev, "failed to find sge pa of QP(0x%lx)\n", + ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf\n", hr_qp->qpn); return -EINVAL; } @@ -4024,11 +4024,11 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, * we should set all bits of the relevant fields in context mask to * 0 at the same time, else set them to 0x1. */ - context->sq_cur_blk_addr = cpu_to_le32(sq_cur_blk >> PAGE_ADDR_SHIFT); + context->sq_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(sq_cur_blk)); roce_set_field(context->byte_168_irrl_idx, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, - sq_cur_blk >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(sq_cur_blk))); qpc_mask->sq_cur_blk_addr = 0; roce_set_field(qpc_mask->byte_168_irrl_idx, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M, @@ -4036,26 +4036,24 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, context->sq_cur_sge_blk_addr = ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? - cpu_to_le32(sge_cur_blk >> - PAGE_ADDR_SHIFT) : 0; + cpu_to_le32(to_hr_hw_page_addr(sge_cur_blk)) : 0; roce_set_field(context->byte_184_irrl_idx, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? - (sge_cur_blk >> - (32 + PAGE_ADDR_SHIFT)) : 0); + upper_32_bits(to_hr_hw_page_addr(sge_cur_blk)) : 0); qpc_mask->sq_cur_sge_blk_addr = 0; roce_set_field(qpc_mask->byte_184_irrl_idx, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, 0); context->rx_sq_cur_blk_addr = - cpu_to_le32(sq_cur_blk >> PAGE_ADDR_SHIFT); + cpu_to_le32(to_hr_hw_page_addr(sq_cur_blk)); roce_set_field(context->byte_232_irrl_sge, V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_S, - sq_cur_blk >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(sq_cur_blk))); qpc_mask->rx_sq_cur_blk_addr = 0; roce_set_field(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M, diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 1667f3753f34..d05d3cb7de39 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -512,63 +512,57 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, static int split_wqe_buf_region(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - struct hns_roce_buf_region *regions, - int region_max, int page_shift) + struct hns_roce_buf_attr *buf_attr) { - int page_size = 1 << page_shift; bool is_extend_sge; - int region_cnt = 0; int buf_size; - int buf_cnt; + int idx = 0; - if (hr_qp->buff_size < 1 || region_max < 1) - return region_cnt; + if (hr_qp->buff_size < 1) + return -EINVAL; + + buf_attr->page_shift = PAGE_ADDR_SHIFT + hr_dev->caps.mtt_buf_pg_sz; + buf_attr->fixed_page = true; + buf_attr->region_count = 0; if (hr_qp->sge.sge_cnt > 0) is_extend_sge = true; else is_extend_sge = false; - /* sq region */ + /* SQ WQE */ if (is_extend_sge) buf_size = hr_qp->sge.offset - hr_qp->sq.offset; else buf_size = hr_qp->rq.offset - hr_qp->sq.offset; - if (buf_size > 0 && region_cnt < region_max) { - buf_cnt = DIV_ROUND_UP(buf_size, page_size); - hns_roce_init_buf_region(®ions[region_cnt], - hr_dev->caps.wqe_sq_hop_num, - hr_qp->sq.offset / page_size, - buf_cnt); - region_cnt++; + if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { + buf_attr->region[idx].size = buf_size; + buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sq_hop_num; + idx++; } - /* sge region */ - if (is_extend_sge) { - buf_size = hr_qp->rq.offset - hr_qp->sge.offset; - if (buf_size > 0 && region_cnt < region_max) { - buf_cnt = DIV_ROUND_UP(buf_size, page_size); - hns_roce_init_buf_region(®ions[region_cnt], - hr_dev->caps.wqe_sge_hop_num, - hr_qp->sge.offset / page_size, - buf_cnt); - region_cnt++; - } + /* extend SGE in SQ WQE */ + buf_size = hr_qp->rq.offset - hr_qp->sge.offset; + if (buf_size > 0 && is_extend_sge && + idx < ARRAY_SIZE(buf_attr->region)) { + buf_attr->region[idx].size = buf_size; + buf_attr->region[idx].hopnum = + hr_dev->caps.wqe_sge_hop_num; + idx++; } - /* rq region */ + /* RQ WQE */ buf_size = hr_qp->buff_size - hr_qp->rq.offset; - if (buf_size > 0) { - buf_cnt = DIV_ROUND_UP(buf_size, page_size); - hns_roce_init_buf_region(®ions[region_cnt], - hr_dev->caps.wqe_rq_hop_num, - hr_qp->rq.offset / page_size, - buf_cnt); - region_cnt++; + if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { + buf_attr->region[idx].size = buf_size; + buf_attr->region[idx].hopnum = hr_dev->caps.wqe_rq_hop_num; + idx++; } - return region_cnt; + buf_attr->region_count = idx; + + return 0; } static int set_extend_sge_param(struct hns_roce_dev *hr_dev, @@ -731,72 +725,12 @@ static void free_rq_inline_buf(struct hns_roce_qp *hr_qp) kfree(hr_qp->rq_inl_buf.wqe_list); } -static int map_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - u32 page_shift, bool is_user) -{ -/* WQE buffer include 3 parts: SQ, extend SGE and RQ. */ -#define HNS_ROCE_WQE_REGION_MAX 3 - struct hns_roce_buf_region regions[HNS_ROCE_WQE_REGION_MAX] = {}; - dma_addr_t *buf_list[HNS_ROCE_WQE_REGION_MAX] = {}; - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_buf_region *r; - int region_count; - int buf_count; - int ret; - int i; - - region_count = split_wqe_buf_region(hr_dev, hr_qp, regions, - ARRAY_SIZE(regions), page_shift); - - /* alloc a tmp list to store WQE buffers address */ - ret = hns_roce_alloc_buf_list(regions, buf_list, region_count); - if (ret) { - ibdev_err(ibdev, "Failed to alloc WQE buffer list\n"); - return ret; - } - - for (i = 0; i < region_count; i++) { - r = ®ions[i]; - if (is_user) - buf_count = hns_roce_get_umem_bufs(hr_dev, buf_list[i], - r->count, r->offset, hr_qp->umem, - page_shift); - else - buf_count = hns_roce_get_kmem_bufs(hr_dev, buf_list[i], - r->count, r->offset, &hr_qp->hr_buf); - - if (buf_count != r->count) { - ibdev_err(ibdev, "Failed to get %s WQE buf, expect %d = %d.\n", - is_user ? "user" : "kernel", - r->count, buf_count); - ret = -ENOBUFS; - goto done; - } - } - - hr_qp->wqe_bt_pg_shift = hr_dev->caps.mtt_ba_pg_sz; - hns_roce_mtr_init(&hr_qp->mtr, PAGE_SHIFT + hr_qp->wqe_bt_pg_shift, - page_shift); - ret = hns_roce_mtr_attach(hr_dev, &hr_qp->mtr, buf_list, regions, - region_count); - if (ret) - ibdev_err(ibdev, "Failed to attach WQE's mtr\n"); - - goto done; - - hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); -done: - hns_roce_free_buf_list(buf_list, region_count); - - return ret; -} - static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, unsigned long addr) { - u32 page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_attr buf_attr = {}; bool is_rq_buf_inline; int ret; @@ -810,54 +744,30 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, } } - if (udata) { - hr_qp->umem = ib_umem_get(ibdev, addr, hr_qp->buff_size, 0); - if (IS_ERR(hr_qp->umem)) { - ret = PTR_ERR(hr_qp->umem); - goto err_inline; - } - } else { - ret = hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, - (1 << page_shift) * 2, - &hr_qp->hr_buf, page_shift); - if (ret) - goto err_inline; + ret = split_wqe_buf_region(hr_dev, hr_qp, &buf_attr); + if (ret) { + ibdev_err(ibdev, "Failed to split WQE buf, ret %d\n", ret); + goto err_inline; + } + ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr, + PAGE_ADDR_SHIFT + hr_dev->caps.mtt_ba_pg_sz, + udata, addr); + if (ret) { + ibdev_err(ibdev, "Failed to create WQE mtr, ret %d\n", ret); + goto err_inline; } - ret = map_wqe_buf(hr_dev, hr_qp, page_shift, udata); - if (ret) - goto err_alloc; - return 0; - err_inline: if (is_rq_buf_inline) free_rq_inline_buf(hr_qp); -err_alloc: - if (udata) { - ib_umem_release(hr_qp->umem); - hr_qp->umem = NULL; - } else { - hns_roce_buf_free(hr_dev, &hr_qp->hr_buf); - } - - ibdev_err(ibdev, "Failed to alloc WQE buffer, ret %d.\n", ret); - return ret; } static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { - hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); - if (hr_qp->umem) { - ib_umem_release(hr_qp->umem); - hr_qp->umem = NULL; - } - - if (hr_qp->hr_buf.npages > 0) - hns_roce_buf_free(hr_dev, &hr_qp->hr_buf); - + hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr); if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && hr_qp->rq.wqe_cnt) free_rq_inline_buf(hr_qp); @@ -1431,10 +1341,9 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, } } -static void *get_wqe(struct hns_roce_qp *hr_qp, int offset) +static inline void *get_wqe(struct hns_roce_qp *hr_qp, int offset) { - - return hns_roce_buf_offset(&hr_qp->hr_buf, offset); + return hns_roce_buf_offset(hr_qp->mtr.kmem, offset); } void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n) @@ -1449,8 +1358,7 @@ void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n) void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n) { - return hns_roce_buf_offset(&hr_qp->hr_buf, hr_qp->sge.offset + - (n << hr_qp->sge.sge_shift)); + return get_wqe(hr_qp, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift)); } bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, From 6fd610c5733d0b2024393e82f145180324ef55a7 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Mon, 13 Apr 2020 19:58:10 +0800 Subject: [PATCH 102/562] RDMA/hns: Support 0 hop addressing for SRQ buffer Add the zero hop addressing support by using mtr interface for SRQ buffer, so the hns driver can support addressing hopnum between 0 to 3 for SRQ. Link: https://lore.kernel.org/r/1586779091-51410-6-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 12 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 34 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 367 ++++++++------------ 3 files changed, 162 insertions(+), 251 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 5df4ee3a1f82..39af7366126a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -528,11 +528,8 @@ struct hns_roce_cq { }; struct hns_roce_idx_que { - struct hns_roce_buf idx_buf; + struct hns_roce_mtr mtr; int entry_sz; - u32 buf_size; - struct ib_umem *umem; - struct hns_roce_mtt mtt; unsigned long *bitmap; }; @@ -547,12 +544,9 @@ struct hns_roce_srq { atomic_t refcount; struct completion free; - struct hns_roce_buf buf; + struct hns_roce_mtr buf_mtr; + u64 *wrid; - struct ib_umem *umem; - u32 buf_size; - int page_shift; - struct hns_roce_mtt mtt; struct hns_roce_idx_que idx_que; spinlock_t lock; int head; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 42bca0ae64ff..1aed542fbdc5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2699,7 +2699,7 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq) static void *get_srq_wqe(struct hns_roce_srq *srq, int n) { - return hns_roce_buf_offset(&srq->buf, n << srq->wqe_shift); + return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift); } static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) @@ -5699,43 +5699,45 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, dma_handle_idx >> 35); srq_context->idx_cur_blk_addr = - cpu_to_le32(mtts_idx[0] >> PAGE_ADDR_SHIFT); + cpu_to_le32(to_hr_hw_page_addr(mtts_idx[0])); roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M, SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S, - mtts_idx[0] >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(mtts_idx[0]))); roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M, SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S, hr_dev->caps.idx_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : hr_dev->caps.idx_hop_num); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, - SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, - hr_dev->caps.idx_ba_pg_sz + PG_SHIFT_OFFSET); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, - SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, - hr_dev->caps.idx_buf_pg_sz + PG_SHIFT_OFFSET); + roce_set_field( + srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, + SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, + to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.ba_pg_shift)); + roce_set_field( + srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, + SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, + to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.buf_pg_shift)); srq_context->idx_nxt_blk_addr = - cpu_to_le32(mtts_idx[1] >> PAGE_ADDR_SHIFT); + cpu_to_le32(to_hr_hw_page_addr(mtts_idx[1])); roce_set_field(srq_context->rsv_idxnxtblkaddr, SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M, SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S, - mtts_idx[1] >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(mtts_idx[1]))); roce_set_field(srq_context->byte_56_xrc_cqn, SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S, cqn); roce_set_field(srq_context->byte_56_xrc_cqn, SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M, SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S, - hr_dev->caps.srqwqe_ba_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift)); roce_set_field(srq_context->byte_56_xrc_cqn, SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M, SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S, - hr_dev->caps.srqwqe_buf_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift)); roce_set_bit(srq_context->db_record_addr_record_en, SRQC_BYTE_60_SRQ_RECORD_EN_S, 0); @@ -5847,7 +5849,7 @@ static void fill_idx_queue(struct hns_roce_idx_que *idx_que, { unsigned int *addr; - addr = (unsigned int *)hns_roce_buf_offset(&idx_que->idx_buf, + addr = (unsigned int *)hns_roce_buf_offset(idx_que->mtr.kmem, cur_idx * idx_que->entry_sz); *addr = wqe_idx; } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 9851d76d2c14..e413a9737ae6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -77,56 +77,56 @@ static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, - u16 xrcd, struct hns_roce_mtt *hr_mtt, - u64 db_rec_addr, struct hns_roce_srq *srq) +static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, + u32 pdn, u32 cqn, u16 xrcd, u64 db_rec_addr) { struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_cmd_mailbox *mailbox; - dma_addr_t dma_handle_wqe; - dma_addr_t dma_handle_idx; - u64 *mtts_wqe; - u64 *mtts_idx; + u64 mtts_wqe[MTT_MIN_COUNT] = { 0 }; + u64 mtts_idx[MTT_MIN_COUNT] = { 0 }; + dma_addr_t dma_handle_wqe = 0; + dma_addr_t dma_handle_idx = 0; int ret; /* Get the physical address of srq buf */ - mtts_wqe = hns_roce_table_find(hr_dev, - &hr_dev->mr_table.mtt_srqwqe_table, - srq->mtt.first_seg, - &dma_handle_wqe); - if (!mtts_wqe) { - dev_err(hr_dev->dev, "Failed to find mtt for srq buf.\n"); - return -EINVAL; + ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, + ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); + if (ret < 1) { + ibdev_err(ibdev, "Failed to find mtr for SRQ WQE\n"); + return -ENOBUFS; } /* Get physical address of idx que buf */ - mtts_idx = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_idx_table, - srq->idx_que.mtt.first_seg, - &dma_handle_idx); - if (!mtts_idx) { - dev_err(hr_dev->dev, - "Failed to find mtt for srq idx queue buf.\n"); - return -EINVAL; + ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx, + ARRAY_SIZE(mtts_idx), &dma_handle_idx); + if (ret < 1) { + ibdev_err(ibdev, "Failed to find mtr for SRQ idx\n"); + return -ENOBUFS; } ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); if (ret) { - dev_err(hr_dev->dev, - "Failed to alloc a bit from srq bitmap.\n"); + ibdev_err(ibdev, "Failed to alloc SRQ number, err %d\n", ret); return -ENOMEM; } ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn); - if (ret) + if (ret) { + ibdev_err(ibdev, "Failed to get SRQC table, err %d\n", ret); goto err_out; + } ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); - if (ret) + if (ret) { + ibdev_err(ibdev, "Failed to store SRQC, err %d\n", ret); goto err_put; + } mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) { - ret = PTR_ERR(mailbox); + if (IS_ERR_OR_NULL(mailbox)) { + ret = -ENOMEM; + ibdev_err(ibdev, "Failed to alloc mailbox for SRQC\n"); goto err_xa; } @@ -136,8 +136,10 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); - if (ret) + if (ret) { + ibdev_err(ibdev, "Failed to config SRQC, err %d\n", ret); goto err_xa; + } atomic_set(&srq->refcount, 1); init_completion(&srq->free); @@ -154,8 +156,7 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, return ret; } -static void hns_roce_srq_free(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq) +static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; int ret; @@ -175,185 +176,104 @@ static void hns_roce_srq_free(struct hns_roce_dev *hr_dev, hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); } -static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, - int srq_buf_size) +static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, + struct ib_udata *udata, unsigned long addr) { - struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); - struct hns_roce_ib_create_srq ucmd; - int page_shift; - int page_count; - int ret; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_attr buf_attr = {}; + int sge_size; + int err; - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) - return -EFAULT; + sge_size = roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, + HNS_ROCE_SGE_SIZE * srq->max_gs)); - srq->umem = - ib_umem_get(srq->ibsrq.device, ucmd.buf_addr, srq_buf_size, 0); - if (IS_ERR(srq->umem)) - return PTR_ERR(srq->umem); + srq->wqe_shift = ilog2(sge_size); - page_count = (ib_umem_page_count(srq->umem) + - (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / - (1 << hr_dev->caps.srqwqe_buf_pg_sz); - page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, page_count, page_shift, - &srq->mtt); - if (ret) - goto err_user_buf; + buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + PAGE_ADDR_SHIFT; + buf_attr.region[0].size = srq->wqe_cnt * sge_size; + buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; + buf_attr.region_count = 1; + buf_attr.fixed_page = true; - ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem); - if (ret) - goto err_user_srq_mtt; + err = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, + hr_dev->caps.srqwqe_ba_pg_sz + + PAGE_ADDR_SHIFT, udata, addr); + if (err) + ibdev_err(ibdev, "Failed to alloc SRQ buf mtr, err %d\n", err); - /* config index queue BA */ - srq->idx_que.umem = ib_umem_get(srq->ibsrq.device, ucmd.que_addr, - srq->idx_que.buf_size, 0); - if (IS_ERR(srq->idx_que.umem)) { - dev_err(hr_dev->dev, "ib_umem_get error for index queue\n"); - ret = PTR_ERR(srq->idx_que.umem); - goto err_user_srq_mtt; + return err; +} + +static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +{ + hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr); +} + +static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, + struct ib_udata *udata, unsigned long addr) +{ + struct hns_roce_idx_que *idx_que = &srq->idx_que; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_attr buf_attr = {}; + int err; + + srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ; + + buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + PAGE_ADDR_SHIFT; + buf_attr.region[0].size = srq->wqe_cnt * HNS_ROCE_IDX_QUE_ENTRY_SZ; + buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num; + buf_attr.region_count = 1; + buf_attr.fixed_page = true; + + err = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, + hr_dev->caps.idx_ba_pg_sz + PAGE_ADDR_SHIFT, + udata, addr); + if (err) { + ibdev_err(ibdev, "Failed to alloc SRQ idx mtr, err %d\n", err); + return err; } - page_count = DIV_ROUND_UP(ib_umem_page_count(srq->idx_que.umem), - 1 << hr_dev->caps.idx_buf_pg_sz); - page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, page_count, page_shift, - &srq->idx_que.mtt); - if (ret) { - dev_err(hr_dev->dev, "hns_roce_mtt_init error for idx que\n"); - goto err_user_idx_mtt; - } + if (!udata) { + idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); + if (!idx_que->bitmap) { + ibdev_err(ibdev, "Failed to alloc SRQ idx bitmap\n"); + err = -ENOMEM; + goto err_idx_mtr; + } - ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt, - srq->idx_que.umem); - if (ret) { - dev_err(hr_dev->dev, - "hns_roce_ib_umem_write_mtt error for idx que\n"); - goto err_user_idx_buf; } return 0; +err_idx_mtr: + hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); -err_user_idx_buf: - hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - -err_user_idx_mtt: - ib_umem_release(srq->idx_que.umem); - -err_user_srq_mtt: - hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - -err_user_buf: - ib_umem_release(srq->umem); - - return ret; + return err; } -static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq, - u32 page_shift) +static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { - struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct hns_roce_idx_que *idx_que = &srq->idx_que; - idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); - if (!idx_que->bitmap) - return -ENOMEM; - - idx_que->buf_size = srq->idx_que.buf_size; - - if (hns_roce_buf_alloc(hr_dev, idx_que->buf_size, (1 << page_shift) * 2, - &idx_que->idx_buf, page_shift)) { - bitmap_free(idx_que->bitmap); - return -ENOMEM; - } - - return 0; + bitmap_free(idx_que->bitmap); + idx_que->bitmap = NULL; + hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); } -static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size) +static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { - struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); - u32 page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; - int ret; - - if (hns_roce_buf_alloc(hr_dev, srq_buf_size, (1 << page_shift) * 2, - &srq->buf, page_shift)) - return -ENOMEM; - srq->head = 0; srq->tail = srq->wqe_cnt - 1; - - ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, srq->buf.page_shift, - &srq->mtt); - if (ret) - goto err_kernel_buf; - - ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf); - if (ret) - goto err_kernel_srq_mtt; - - page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; - ret = hns_roce_create_idx_que(srq->ibsrq.pd, srq, page_shift); - if (ret) { - dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n", ret); - goto err_kernel_srq_mtt; - } - - /* Init mtt table for idx_que */ - ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages, - srq->idx_que.idx_buf.page_shift, - &srq->idx_que.mtt); - if (ret) - goto err_kernel_create_idx; - - /* Write buffer address into the mtt table */ - ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt, - &srq->idx_que.idx_buf); - if (ret) - goto err_kernel_idx_buf; - srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL); - if (!srq->wrid) { - ret = -ENOMEM; - goto err_kernel_idx_buf; - } + if (!srq->wrid) + return -ENOMEM; return 0; - -err_kernel_idx_buf: - hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - -err_kernel_create_idx: - hns_roce_buf_free(hr_dev, &srq->idx_que.idx_buf); - kfree(srq->idx_que.bitmap); - -err_kernel_srq_mtt: - hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - -err_kernel_buf: - hns_roce_buf_free(hr_dev, &srq->buf); - - return ret; } -static void destroy_user_srq(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq) +static void free_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { - hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - ib_umem_release(srq->idx_que.umem); - hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - ib_umem_release(srq->umem); -} - -static void destroy_kernel_srq(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq) -{ - kvfree(srq->wrid); - hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - hns_roce_buf_free(hr_dev, &srq->idx_que.idx_buf); - kfree(srq->idx_que.bitmap); - hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - hns_roce_buf_free(hr_dev, &srq->buf); + kfree(srq->wrid); + srq->wrid = NULL; } int hns_roce_create_srq(struct ib_srq *ib_srq, @@ -363,8 +283,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device); struct hns_roce_ib_create_srq_resp resp = {}; struct hns_roce_srq *srq = to_hr_srq(ib_srq); - int srq_desc_size; - int srq_buf_size; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_ib_create_srq ucmd = {}; int ret = 0; u32 cqn; @@ -379,41 +299,45 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1); srq->max_gs = init_attr->attr.max_sge; - srq_desc_size = roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, - HNS_ROCE_SGE_SIZE * srq->max_gs)); - - srq->wqe_shift = ilog2(srq_desc_size); - - srq_buf_size = srq->wqe_cnt * srq_desc_size; - - srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ; - srq->idx_que.buf_size = srq->wqe_cnt * srq->idx_que.entry_sz; - srq->mtt.mtt_type = MTT_TYPE_SRQWQE; - srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX; - if (udata) { - ret = create_user_srq(srq, udata, srq_buf_size); + ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (ret) { - dev_err(hr_dev->dev, "Create user srq failed\n"); - goto err_srq; + ibdev_err(ibdev, "Failed to copy SRQ udata, err %d\n", + ret); + return ret; } - } else { - ret = create_kernel_srq(srq, srq_buf_size); + } + + ret = alloc_srq_buf(hr_dev, srq, udata, ucmd.buf_addr); + if (ret) { + ibdev_err(ibdev, "Failed to alloc SRQ buffer, err %d\n", ret); + return ret; + } + + ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr); + if (ret) { + ibdev_err(ibdev, "Failed to alloc SRQ idx, err %d\n", ret); + goto err_buf_alloc; + } + + if (!udata) { + ret = alloc_srq_wrid(hr_dev, srq); if (ret) { - dev_err(hr_dev->dev, "Create kernel srq failed\n"); - goto err_srq; + ibdev_err(ibdev, "Failed to alloc SRQ wrid, err %d\n", + ret); + goto err_idx_alloc; } } cqn = ib_srq_has_cq(init_attr->srq_type) ? to_hr_cq(init_attr->ext.cq)->cqn : 0; - srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; - ret = hns_roce_srq_alloc(hr_dev, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, - &srq->mtt, 0, srq); - if (ret) - goto err_wrid; + ret = alloc_srqc(hr_dev, srq, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, 0); + if (ret) { + ibdev_err(ibdev, "Failed to alloc SRQ context, err %d\n", ret); + goto err_wrid_alloc; + } srq->event = hns_roce_ib_srq_event; resp.srqn = srq->srqn; @@ -429,15 +353,13 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, return 0; err_srqc_alloc: - hns_roce_srq_free(hr_dev, srq); - -err_wrid: - if (udata) - destroy_user_srq(hr_dev, srq); - else - destroy_kernel_srq(hr_dev, srq); - -err_srq: + free_srqc(hr_dev, srq); +err_wrid_alloc: + free_srq_wrid(hr_dev, srq); +err_idx_alloc: + free_srq_idx(hr_dev, srq); +err_buf_alloc: + free_srq_buf(hr_dev, srq); return ret; } @@ -446,17 +368,10 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); - hns_roce_srq_free(hr_dev, srq); - hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - - if (udata) { - hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - } else { - kvfree(srq->wrid); - hns_roce_buf_free(hr_dev, &srq->buf); - } - ib_umem_release(srq->idx_que.umem); - ib_umem_release(srq->umem); + free_srqc(hr_dev, srq); + free_srq_idx(hr_dev, srq); + free_srq_wrid(hr_dev, srq); + free_srq_buf(hr_dev, srq); } int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev) From 744b7bdfa79edb30bb7d5f9ae43b65e0d147533a Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Mon, 13 Apr 2020 19:58:11 +0800 Subject: [PATCH 103/562] RDMA/hns: Support 0 hop addressing for CQE buffer Add the zero hop addressing support by using mtr interface for CQE buffer, so the hns driver can support addressing hopnum between 0 to 3 for CQE. Link: https://lore.kernel.org/r/1586779091-51410-7-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 345 ++++++-------------- drivers/infiniband/hw/hns/hns_roce_device.h | 8 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 13 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 15 +- 4 files changed, 119 insertions(+), 262 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 92798ff6360d..d2d7074bbe69 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -39,51 +39,40 @@ #include #include "hns_roce_common.h" -static int hns_roce_alloc_cqc(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq) +static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { struct hns_roce_cmd_mailbox *mailbox; - struct hns_roce_hem_table *mtt_table; struct hns_roce_cq_table *cq_table; - struct device *dev = hr_dev->dev; + struct ib_device *ibdev = &hr_dev->ib_dev; + u64 mtts[MTT_MIN_COUNT] = { 0 }; dma_addr_t dma_handle; - u64 *mtts; int ret; - cq_table = &hr_dev->cq_table; - - /* Get the physical address of cq buf */ - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - mtt_table = &hr_dev->mr_table.mtt_cqe_table; - else - mtt_table = &hr_dev->mr_table.mtt_table; - - mtts = hns_roce_table_find(hr_dev, mtt_table, hr_cq->mtt.first_seg, - &dma_handle); - - if (!mtts) { - dev_err(dev, "Failed to find mtt for CQ buf.\n"); + ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts), + &dma_handle); + if (ret < 1) { + ibdev_err(ibdev, "Failed to find CQ mtr\n"); return -EINVAL; } + cq_table = &hr_dev->cq_table; ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); if (ret) { - dev_err(dev, "Num of CQ out of range.\n"); + ibdev_err(ibdev, "Failed to alloc CQ bitmap, err %d\n", ret); return ret; } /* Get CQC memory HEM(Hardware Entry Memory) table */ ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn); if (ret) { - dev_err(dev, - "Get context mem failed(%d) when CQ(0x%lx) alloc.\n", - ret, hr_cq->cqn); + ibdev_err(ibdev, "Failed to get CQ(0x%lx) context, err %d\n", + hr_cq->cqn, ret); goto err_out; } ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL)); if (ret) { - dev_err(dev, "Failed to xa_store CQ.\n"); + ibdev_err(ibdev, "Failed to xa_store CQ\n"); goto err_put; } @@ -101,9 +90,9 @@ static int hns_roce_alloc_cqc(struct hns_roce_dev *hr_dev, HNS_ROCE_CMD_CREATE_CQC, HNS_ROCE_CMD_TIMEOUT_MSECS); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { - dev_err(dev, - "Send cmd mailbox failed(%d) when CQ(0x%lx) alloc.\n", - ret, hr_cq->cqn); + ibdev_err(ibdev, + "Failed to send create cmd for CQ(0x%lx), err %d\n", + hr_cq->cqn, ret); goto err_xa; } @@ -126,7 +115,7 @@ static int hns_roce_alloc_cqc(struct hns_roce_dev *hr_dev, return ret; } -void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct device *dev = hr_dev->dev; @@ -153,190 +142,86 @@ void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); } -static int get_cq_umem(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, - struct hns_roce_ib_create_cq ucmd, - struct ib_udata *udata) +static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct ib_udata *udata, unsigned long addr) { - struct hns_roce_mtt *mtt = &hr_cq->mtt; - struct ib_umem **umem = &hr_cq->umem; - u32 npages; - int ret; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_attr buf_attr = {}; + int err; - *umem = ib_umem_get(&hr_dev->ib_dev, ucmd.buf_addr, hr_cq->buf_size, - IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(*umem)) - return PTR_ERR(*umem); + buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + PAGE_ADDR_SHIFT; + buf_attr.region[0].size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz; + buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num; + buf_attr.region_count = 1; + buf_attr.fixed_page = true; - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - mtt->mtt_type = MTT_TYPE_CQE; - else - mtt->mtt_type = MTT_TYPE_WQE; + err = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, + hr_dev->caps.cqe_ba_pg_sz + PAGE_ADDR_SHIFT, + udata, addr); + if (err) + ibdev_err(ibdev, "Failed to alloc CQ mtr, err %d\n", err); - npages = DIV_ROUND_UP(ib_umem_page_count(*umem), - 1 << hr_dev->caps.cqe_buf_pg_sz); - ret = hns_roce_mtt_init(hr_dev, npages, hr_cq->page_shift, mtt); - if (ret) - goto err_buf; - - ret = hns_roce_ib_umem_write_mtt(hr_dev, mtt, *umem); - if (ret) - goto err_mtt; - - return 0; - -err_mtt: - hns_roce_mtt_cleanup(hr_dev, mtt); - -err_buf: - ib_umem_release(*umem); - return ret; -} - -static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) -{ - struct hns_roce_buf *buf = &hr_cq->buf; - struct hns_roce_mtt *mtt = &hr_cq->mtt; - int ret; - - ret = hns_roce_buf_alloc(hr_dev, hr_cq->buf_size, - (1 << hr_cq->page_shift) * 2, - buf, hr_cq->page_shift); - if (ret) - goto out; - - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - mtt->mtt_type = MTT_TYPE_CQE; - else - mtt->mtt_type = MTT_TYPE_WQE; - - ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, mtt); - if (ret) - goto err_buf; - - ret = hns_roce_buf_write_mtt(hr_dev, mtt, buf); - if (ret) - goto err_mtt; - - return 0; - -err_mtt: - hns_roce_mtt_cleanup(hr_dev, mtt); - -err_buf: - hns_roce_buf_free(hr_dev, buf); - -out: - return ret; + return err; } static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { - hns_roce_buf_free(hr_dev, &hr_cq->buf); + hns_roce_mtr_destroy(hr_dev, &hr_cq->mtr); } -static int create_user_cq(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq, - struct ib_udata *udata, - struct hns_roce_ib_create_cq_resp *resp) +static int alloc_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct ib_udata *udata, unsigned long addr, + struct hns_roce_ib_create_cq_resp *resp) { - struct hns_roce_ib_create_cq ucmd; - struct device *dev = hr_dev->dev; - int ret; - struct hns_roce_ucontext *context = rdma_udata_to_drv_context( - udata, struct hns_roce_ucontext, ibucontext); + bool has_db = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB; + struct hns_roce_ucontext *uctx; + int err; - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { - dev_err(dev, "Failed to copy_from_udata.\n"); - return -EFAULT; - } - - /* Get user space address, write it into mtt table */ - ret = get_cq_umem(hr_dev, hr_cq, ucmd, udata); - if (ret) { - dev_err(dev, "Failed to get_cq_umem.\n"); - return ret; - } - - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB && - udata->outlen >= offsetofend(typeof(*resp), cap_flags)) { - ret = hns_roce_db_map_user(context, udata, ucmd.db_addr, - &hr_cq->db); - if (ret) { - dev_err(dev, "cq record doorbell map failed!\n"); - goto err_mtt; + if (udata) { + if (has_db && + udata->outlen >= offsetofend(typeof(*resp), cap_flags)) { + uctx = rdma_udata_to_drv_context(udata, + struct hns_roce_ucontext, ibucontext); + err = hns_roce_db_map_user(uctx, udata, addr, + &hr_cq->db); + if (err) + return err; + hr_cq->db_en = 1; + resp->cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB; } - hr_cq->db_en = 1; - resp->cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB; + } else { + if (has_db) { + err = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1); + if (err) + return err; + hr_cq->set_ci_db = hr_cq->db.db_record; + *hr_cq->set_ci_db = 0; + hr_cq->db_en = 1; + } + hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset + + DB_REG_OFFSET * hr_dev->priv_uar.index; } return 0; - -err_mtt: - hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); - ib_umem_release(hr_cq->umem); - - return ret; } -static int create_kernel_cq(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq) +static void free_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct ib_udata *udata) { - struct device *dev = hr_dev->dev; - int ret; + struct hns_roce_ucontext *uctx; - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) { - ret = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1); - if (ret) - return ret; + if (!hr_cq->db_en) + return; - hr_cq->set_ci_db = hr_cq->db.db_record; - *hr_cq->set_ci_db = 0; - hr_cq->db_en = 1; - } - - /* Init mtt table and write buff address to mtt table */ - ret = alloc_cq_buf(hr_dev, hr_cq); - if (ret) { - dev_err(dev, "Failed to alloc_cq_buf.\n"); - goto err_db; - } - - hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset + - DB_REG_OFFSET * hr_dev->priv_uar.index; - - return 0; - -err_db: - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) - hns_roce_free_db(hr_dev, &hr_cq->db); - - return ret; -} - -static void destroy_user_cq(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq, - struct ib_udata *udata, - struct hns_roce_ib_create_cq_resp *resp) -{ - struct hns_roce_ucontext *context = rdma_udata_to_drv_context( - udata, struct hns_roce_ucontext, ibucontext); - - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB && - udata->outlen >= offsetofend(typeof(*resp), cap_flags)) - hns_roce_db_unmap_user(context, &hr_cq->db); - - hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); - ib_umem_release(hr_cq->umem); -} - -static void destroy_kernel_cq(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq) -{ - hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); - free_cq_buf(hr_dev, hr_cq); - - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) + hr_cq->db_en = 0; + if (udata) { + uctx = rdma_udata_to_drv_context(udata, + struct hns_roce_ucontext, + ibucontext); + hns_roce_db_unmap_user(uctx, &hr_cq->db); + } else { hns_roce_free_db(hr_dev, &hr_cq->db); + } } int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, @@ -345,20 +230,21 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_ib_create_cq_resp resp = {}; struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); - struct device *dev = hr_dev->dev; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_ib_create_cq ucmd = {}; int vector = attr->comp_vector; u32 cq_entries = attr->cqe; int ret; if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { - dev_err(dev, "Create CQ failed. entries=%d, max=%d\n", - cq_entries, hr_dev->caps.max_cqes); + ibdev_err(ibdev, "Failed to check CQ count %d max=%d\n", + cq_entries, hr_dev->caps.max_cqes); return -EINVAL; } if (vector >= hr_dev->caps.num_comp_vectors) { - dev_err(dev, "Create CQ failed, vector=%d, max=%d\n", - vector, hr_dev->caps.num_comp_vectors); + ibdev_err(ibdev, "Failed to check CQ vector=%d max=%d\n", + vector, hr_dev->caps.num_comp_vectors); return -EINVAL; } @@ -367,30 +253,35 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, hr_cq->ib_cq.cqe = cq_entries - 1; /* used as cqe index */ hr_cq->cq_depth = cq_entries; hr_cq->vector = vector; - hr_cq->buf_size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz; - hr_cq->page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; spin_lock_init(&hr_cq->lock); INIT_LIST_HEAD(&hr_cq->sq_list); INIT_LIST_HEAD(&hr_cq->rq_list); if (udata) { - ret = create_user_cq(hr_dev, hr_cq, udata, &resp); + ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (ret) { - dev_err(dev, "Create cq failed in user mode!\n"); - goto err_cq; - } - } else { - ret = create_kernel_cq(hr_dev, hr_cq); - if (ret) { - dev_err(dev, "Create cq failed in kernel mode!\n"); - goto err_cq; + ibdev_err(ibdev, "Failed to copy CQ udata, err %d\n", + ret); + return ret; } } - ret = hns_roce_alloc_cqc(hr_dev, hr_cq); + ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr); if (ret) { - dev_err(dev, "Alloc CQ failed(%d).\n", ret); - goto err_dbmap; + ibdev_err(ibdev, "Failed to alloc CQ buf, err %d\n", ret); + return ret; + } + + ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp); + if (ret) { + ibdev_err(ibdev, "Failed to alloc CQ db, err %d\n", ret); + goto err_cq_buf; + } + + ret = alloc_cqc(hr_dev, hr_cq); + if (ret) { + ibdev_err(ibdev, "Failed to alloc CQ context, err %d\n", ret); + goto err_cq_db; } /* @@ -412,15 +303,11 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, return 0; err_cqc: - hns_roce_free_cqc(hr_dev, hr_cq); - -err_dbmap: - if (udata) - destroy_user_cq(hr_dev, hr_cq, udata, &resp); - else - destroy_kernel_cq(hr_dev, hr_cq); - -err_cq: + free_cqc(hr_dev, hr_cq); +err_cq_db: + free_cq_db(hr_dev, hr_cq, udata); +err_cq_buf: + free_cq_buf(hr_dev, hr_cq); return ret; } @@ -429,28 +316,12 @@ void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); - if (hr_dev->hw->destroy_cq) { + if (hr_dev->hw->destroy_cq) hr_dev->hw->destroy_cq(ib_cq, udata); - return; - } - hns_roce_free_cqc(hr_dev, hr_cq); - hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); - - ib_umem_release(hr_cq->umem); - if (udata) { - if (hr_cq->db_en == 1) - hns_roce_db_unmap_user(rdma_udata_to_drv_context( - udata, - struct hns_roce_ucontext, - ibucontext), - &hr_cq->db); - } else { - /* Free the buff of stored cq */ - free_cq_buf(hr_dev, hr_cq); - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) - hns_roce_free_db(hr_dev, &hr_cq->db); - } + free_cq_buf(hr_dev, hr_cq); + free_cq_db(hr_dev, hr_cq, udata); + free_cqc(hr_dev, hr_cq); } void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 39af7366126a..ecbfeb6dbdd4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -503,14 +503,10 @@ struct hns_roce_db { struct hns_roce_cq { struct ib_cq ib_cq; - struct hns_roce_buf buf; - struct hns_roce_mtt mtt; + struct hns_roce_mtr mtr; struct hns_roce_db db; u8 db_en; spinlock_t lock; - struct ib_umem *umem; - u32 buf_size; - int page_shift; u32 cq_depth; u32 cons_index; u32 *set_ci_db; @@ -1294,8 +1290,6 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); -void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq); - int hns_roce_db_map_user(struct hns_roce_ucontext *context, struct ib_udata *udata, unsigned long virt, struct hns_roce_db *db); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index ddf2a454b525..a1f053cd30b9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1972,7 +1972,8 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, static void *get_cqe(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V1_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(hr_cq->mtr.kmem, + n * HNS_ROCE_V1_CQE_ENTRY_SIZE); } static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n) @@ -3644,8 +3645,6 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) u32 cqe_cnt_cur; int wait_time = 0; - hns_roce_free_cqc(hr_dev, hr_cq); - /* * Before freeing cq buffer, we need to ensure that the outstanding CQE * have been written by checking the CQE counter. @@ -3668,14 +3667,6 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) } wait_time++; } - - hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); - - ib_umem_release(hr_cq->umem); - if (!udata) { - /* Free the buff of stored cq */ - hns_roce_buf_free(hr_dev, &hr_cq->buf); - } } static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 1aed542fbdc5..833e9298f0dc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2680,7 +2680,8 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw) static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V2_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(hr_cq->mtr.kmem, + n * HNS_ROCE_V2_CQE_ENTRY_SIZE); } static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) @@ -2801,30 +2802,30 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M, V2_CQC_BYTE_8_CQN_S, hr_cq->cqn); - cq_context->cqe_cur_blk_addr = cpu_to_le32(mtts[0] >> PAGE_ADDR_SHIFT); + cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0])); roce_set_field(cq_context->byte_16_hop_addr, V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M, V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S, - mtts[0] >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(mtts[0]))); roce_set_field(cq_context->byte_16_hop_addr, V2_CQC_BYTE_16_CQE_HOP_NUM_M, V2_CQC_BYTE_16_CQE_HOP_NUM_S, hr_dev->caps.cqe_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : hr_dev->caps.cqe_hop_num); - cq_context->cqe_nxt_blk_addr = cpu_to_le32(mtts[1] >> PAGE_ADDR_SHIFT); + cq_context->cqe_nxt_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[1])); roce_set_field(cq_context->byte_24_pgsz_addr, V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_M, V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_S, - mtts[1] >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(mtts[1]))); roce_set_field(cq_context->byte_24_pgsz_addr, V2_CQC_BYTE_24_CQE_BA_PG_SZ_M, V2_CQC_BYTE_24_CQE_BA_PG_SZ_S, - hr_dev->caps.cqe_ba_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.ba_pg_shift)); roce_set_field(cq_context->byte_24_pgsz_addr, V2_CQC_BYTE_24_CQE_BUF_PG_SZ_M, V2_CQC_BYTE_24_CQE_BUF_PG_SZ_S, - hr_dev->caps.cqe_buf_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.buf_pg_shift)); cq_context->cqe_ba = cpu_to_le32(dma_handle >> 3); From 322f3d45a17f64494152bd5583b68c8855b539c0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 19 Apr 2020 16:20:46 +0300 Subject: [PATCH 104/562] RDMA/bnxt: Delete 'nq_ptr' variable which is not used The variable "nq_ptr" is set but never used, this generates the following warning while compiling kernel with W=1 option. drivers/infiniband/hw/bnxt_re/qplib_fp.c: In function 'bnxt_qplib_service_nq': drivers/infiniband/hw/bnxt_re/qplib_fp.c:303:25: warning: variable 'nq_ptr' set but not used [-Wunused-but-set-variable] 303 | struct nq_base *nqe, **nq_ptr; | Fixes: fddcbbb02af4 ("RDMA/bnxt_re: Simplify obtaining queue entry from hw ring") Link: https://lore.kernel.org/r/20200419132046.123887-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index a4de56bdd6e8..c5e29577cd43 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -300,12 +300,12 @@ static void bnxt_qplib_service_nq(unsigned long data) { struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data; struct bnxt_qplib_hwq *hwq = &nq->hwq; - struct nq_base *nqe, **nq_ptr; int num_srqne_processed = 0; int num_cqne_processed = 0; struct bnxt_qplib_cq *cq; int budget = nq->budget; u32 sw_cons, raw_cons; + struct nq_base *nqe; uintptr_t q_handle; u16 type; @@ -314,7 +314,6 @@ static void bnxt_qplib_service_nq(unsigned long data) raw_cons = hwq->cons; while (budget--) { sw_cons = HWQ_CMP(raw_cons, hwq); - nq_ptr = (struct nq_base **)hwq->pbl_ptr; nqe = bnxt_qplib_get_qe(hwq, sw_cons, NULL); if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements)) break; From 9784c9963feccd8dede924dbc0f8b1c64506e646 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 23 Apr 2020 13:34:16 -0700 Subject: [PATCH 105/562] gpiolib: devprop: Warn if gpio-line-names is too long Some DT authors (including myself) have messed up the length of gpio-line-names and made it longer than it should be. Add a warning here so that developers can figure out that they've messed up their DT and should fix it. Cc: Alexandru M Stan Signed-off-by: Stephen Boyd Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-devprop.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-devprop.c b/drivers/gpio/gpiolib-devprop.c index 53781b253986..26741032fa9e 100644 --- a/drivers/gpio/gpiolib-devprop.c +++ b/drivers/gpio/gpiolib-devprop.c @@ -37,8 +37,11 @@ void devprop_gpiochip_set_names(struct gpio_chip *chip, if (count < 0) return; - if (count > gdev->ngpio) + if (count > gdev->ngpio) { + dev_warn(&gdev->dev, "gpio-line-names is length %d but should be at most length %d", + count, gdev->ngpio); count = gdev->ngpio; + } names = kcalloc(count, sizeof(*names), GFP_KERNEL); if (!names) From 9976ea27b526365c4a9a6d3336a8b06f839ec26d Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Wed, 15 Apr 2020 16:14:30 +0800 Subject: [PATCH 106/562] RDMA/hns: Optimize hns_roce_config_link_table() Remove the unnecessary memset operation and adjust style of some lines in hns_roce_config_link_table(). Link: https://lore.kernel.org/r/1586938475-37049-2-git-send-email-liweihang@huawei.com Signed-off-by: Lijun Ou Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 53 +++++++++------------- 1 file changed, 21 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 833e9298f0dc..dbbc5d42d59f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2040,8 +2040,6 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, page_num = link_tbl->npages; entry = link_tbl->table.buf; - memset(req_a, 0, sizeof(*req_a)); - memset(req_b, 0, sizeof(*req_b)); for (i = 0; i < 2; i++) { hns_roce_cmq_setup_basic_desc(&desc[i], opcode, false); @@ -2050,39 +2048,30 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); else desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - - if (i == 0) { - req_a->base_addr_l = - cpu_to_le32(link_tbl->table.map & 0xffffffff); - req_a->base_addr_h = - cpu_to_le32(link_tbl->table.map >> 32); - roce_set_field(req_a->depth_pgsz_init_en, - CFG_LLM_QUE_DEPTH_M, CFG_LLM_QUE_DEPTH_S, - link_tbl->npages); - roce_set_field(req_a->depth_pgsz_init_en, - CFG_LLM_QUE_PGSZ_M, CFG_LLM_QUE_PGSZ_S, - link_tbl->pg_sz); - req_a->head_ba_l = cpu_to_le32(entry[0].blk_ba0); - req_a->head_ba_h_nxtptr = - cpu_to_le32(entry[0].blk_ba1_nxt_ptr); - roce_set_field(req_a->head_ptr, CFG_LLM_HEAD_PTR_M, - CFG_LLM_HEAD_PTR_S, 0); - } else { - req_b->tail_ba_l = - cpu_to_le32(entry[page_num - 1].blk_ba0); - roce_set_field(req_b->tail_ba_h, CFG_LLM_TAIL_BA_H_M, - CFG_LLM_TAIL_BA_H_S, - entry[page_num - 1].blk_ba1_nxt_ptr & - HNS_ROCE_LINK_TABLE_BA1_M); - roce_set_field(req_b->tail_ptr, CFG_LLM_TAIL_PTR_M, - CFG_LLM_TAIL_PTR_S, - (entry[page_num - 2].blk_ba1_nxt_ptr & - HNS_ROCE_LINK_TABLE_NXT_PTR_M) >> - HNS_ROCE_LINK_TABLE_NXT_PTR_S); - } } + + req_a->base_addr_l = cpu_to_le32(link_tbl->table.map & 0xffffffff); + req_a->base_addr_h = cpu_to_le32(link_tbl->table.map >> 32); + roce_set_field(req_a->depth_pgsz_init_en, CFG_LLM_QUE_DEPTH_M, + CFG_LLM_QUE_DEPTH_S, link_tbl->npages); + roce_set_field(req_a->depth_pgsz_init_en, CFG_LLM_QUE_PGSZ_M, + CFG_LLM_QUE_PGSZ_S, link_tbl->pg_sz); roce_set_field(req_a->depth_pgsz_init_en, CFG_LLM_INIT_EN_M, CFG_LLM_INIT_EN_S, 1); + req_a->head_ba_l = cpu_to_le32(entry[0].blk_ba0); + req_a->head_ba_h_nxtptr = cpu_to_le32(entry[0].blk_ba1_nxt_ptr); + roce_set_field(req_a->head_ptr, CFG_LLM_HEAD_PTR_M, CFG_LLM_HEAD_PTR_S, + 0); + + req_b->tail_ba_l = cpu_to_le32(entry[page_num - 1].blk_ba0); + roce_set_field(req_b->tail_ba_h, CFG_LLM_TAIL_BA_H_M, + CFG_LLM_TAIL_BA_H_S, + entry[page_num - 1].blk_ba1_nxt_ptr & + HNS_ROCE_LINK_TABLE_BA1_M); + roce_set_field(req_b->tail_ptr, CFG_LLM_TAIL_PTR_M, CFG_LLM_TAIL_PTR_S, + (entry[page_num - 2].blk_ba1_nxt_ptr & + HNS_ROCE_LINK_TABLE_NXT_PTR_M) >> + HNS_ROCE_LINK_TABLE_NXT_PTR_S); return hns_roce_cmq_send(hr_dev, desc, 2); } From 375898e83d26d6da7414f129ed417ad7cef2728f Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Wed, 15 Apr 2020 16:14:31 +0800 Subject: [PATCH 107/562] RDMA/hns: Optimize hns_roce_v2_set_mac() Removes the unnecessary memset opertaion and adjust style of some lines in hns_roce_v2_set_mac(). Link: https://lore.kernel.org/r/1586938475-37049-3-git-send-email-liweihang@huawei.com Signed-off-by: Lijun Ou Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index dbbc5d42d59f..0624d5ae15d0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2427,12 +2427,9 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, reg_smac_l = *(u32 *)(&addr[0]); reg_smac_h = *(u16 *)(&addr[4]); - memset(smac_tb, 0, sizeof(*smac_tb)); - roce_set_field(smac_tb->tb_idx_rsv, - CFG_SMAC_TB_IDX_M, + roce_set_field(smac_tb->tb_idx_rsv, CFG_SMAC_TB_IDX_M, CFG_SMAC_TB_IDX_S, phy_port); - roce_set_field(smac_tb->vf_smac_h_rsv, - CFG_SMAC_TB_VF_SMAC_H_M, + roce_set_field(smac_tb->vf_smac_h_rsv, CFG_SMAC_TB_VF_SMAC_H_M, CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h); smac_tb->vf_smac_l = cpu_to_le32(reg_smac_l); From a3de9e83810ced41ad7dece44c03f2338e44129d Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Wed, 15 Apr 2020 16:14:32 +0800 Subject: [PATCH 108/562] RDMA/hns: Simplify the qp state convert code Use type map table to reduce the cyclomatic complexity. Link: https://lore.kernel.org/r/1586938475-37049-4-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 25 +++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0624d5ae15d0..a1c819db3496 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4539,19 +4539,20 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, return ret; } -static inline enum ib_qp_state to_ib_qp_st(enum hns_roce_v2_qp_state state) +static int to_ib_qp_st(enum hns_roce_v2_qp_state state) { - switch (state) { - case HNS_ROCE_QP_ST_RST: return IB_QPS_RESET; - case HNS_ROCE_QP_ST_INIT: return IB_QPS_INIT; - case HNS_ROCE_QP_ST_RTR: return IB_QPS_RTR; - case HNS_ROCE_QP_ST_RTS: return IB_QPS_RTS; - case HNS_ROCE_QP_ST_SQ_DRAINING: - case HNS_ROCE_QP_ST_SQD: return IB_QPS_SQD; - case HNS_ROCE_QP_ST_SQER: return IB_QPS_SQE; - case HNS_ROCE_QP_ST_ERR: return IB_QPS_ERR; - default: return -1; - } + static const enum ib_qp_state map[] = { + [HNS_ROCE_QP_ST_RST] = IB_QPS_RESET, + [HNS_ROCE_QP_ST_INIT] = IB_QPS_INIT, + [HNS_ROCE_QP_ST_RTR] = IB_QPS_RTR, + [HNS_ROCE_QP_ST_RTS] = IB_QPS_RTS, + [HNS_ROCE_QP_ST_SQD] = IB_QPS_SQD, + [HNS_ROCE_QP_ST_SQER] = IB_QPS_SQE, + [HNS_ROCE_QP_ST_ERR] = IB_QPS_ERR, + [HNS_ROCE_QP_ST_SQ_DRAINING] = IB_QPS_SQD + }; + + return (state < ARRAY_SIZE(map)) ? map[state] : -1; } static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev, From 7c044adca272768d821921f11d3da4587dcec68a Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Wed, 15 Apr 2020 16:14:33 +0800 Subject: [PATCH 109/562] RDMA/hns: Simplify the cqe code of poll cq Encapsulate codes to get status of cqe into a function and use map table instead of switch-case to reduce cyclomatic complexity of hns_roce_v2_poll_one(). Link: https://lore.kernel.org/r/1586938475-37049-5-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 130 +++++++++------------ 1 file changed, 57 insertions(+), 73 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a1c819db3496..9b86c8eeccf2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2955,6 +2955,61 @@ static int hns_roce_v2_sw_poll_cq(struct hns_roce_cq *hr_cq, int num_entries, return npolled; } +static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, + struct hns_roce_v2_cqe *cqe, struct ib_wc *wc) +{ + static const struct { + u32 cqe_status; + enum ib_wc_status wc_status; + } map[] = { + { HNS_ROCE_CQE_V2_SUCCESS, IB_WC_SUCCESS }, + { HNS_ROCE_CQE_V2_LOCAL_LENGTH_ERR, IB_WC_LOC_LEN_ERR }, + { HNS_ROCE_CQE_V2_LOCAL_QP_OP_ERR, IB_WC_LOC_QP_OP_ERR }, + { HNS_ROCE_CQE_V2_LOCAL_PROT_ERR, IB_WC_LOC_PROT_ERR }, + { HNS_ROCE_CQE_V2_WR_FLUSH_ERR, IB_WC_WR_FLUSH_ERR }, + { HNS_ROCE_CQE_V2_MW_BIND_ERR, IB_WC_MW_BIND_ERR }, + { HNS_ROCE_CQE_V2_BAD_RESP_ERR, IB_WC_BAD_RESP_ERR }, + { HNS_ROCE_CQE_V2_LOCAL_ACCESS_ERR, IB_WC_LOC_ACCESS_ERR }, + { HNS_ROCE_CQE_V2_REMOTE_INVAL_REQ_ERR, IB_WC_REM_INV_REQ_ERR }, + { HNS_ROCE_CQE_V2_REMOTE_ACCESS_ERR, IB_WC_REM_ACCESS_ERR }, + { HNS_ROCE_CQE_V2_REMOTE_OP_ERR, IB_WC_REM_OP_ERR }, + { HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR, + IB_WC_RETRY_EXC_ERR }, + { HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR, IB_WC_RNR_RETRY_EXC_ERR }, + { HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR, IB_WC_REM_ABORT_ERR }, + }; + + u32 cqe_status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M, + V2_CQE_BYTE_4_STATUS_S); + int i; + + wc->status = IB_WC_GENERAL_ERR; + for (i = 0; i < ARRAY_SIZE(map); i++) + if (cqe_status == map[i].cqe_status) { + wc->status = map[i].wc_status; + break; + } + + if (wc->status == IB_WC_SUCCESS || wc->status == IB_WC_WR_FLUSH_ERR) + return; + + ibdev_err(&hr_dev->ib_dev, "error cqe status 0x%x:\n", cqe_status); + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_NONE, 16, 4, cqe, + sizeof(*cqe), false); + + /* + * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state gets + * into errored mode. Hence, as a workaround to this hardware + * limitation, driver needs to assist in flushing. But the flushing + * operation uses mailbox to convey the QP state to the hardware and + * which can sleep due to the mutex protection around the mailbox calls. + * Hence, use the deferred flush for now. Once wc error detected, the + * flushing operation is needed. + */ + if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &qp->flush_flag)) + init_flush_work(hr_dev, qp); +} + static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, struct hns_roce_qp **cur_qp, struct ib_wc *wc) { @@ -2966,7 +3021,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, int is_send; u16 wqe_ctr; u32 opcode; - u32 status; int qpn; int ret; @@ -2996,7 +3050,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, *cur_qp = hr_qp; } - hr_qp = *cur_qp; wc->qp = &(*cur_qp)->ibqp; wc->vendor_err = 0; @@ -3031,77 +3084,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, ++wq->tail; } - status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M, - V2_CQE_BYTE_4_STATUS_S); - switch (status & HNS_ROCE_V2_CQE_STATUS_MASK) { - case HNS_ROCE_CQE_V2_SUCCESS: - wc->status = IB_WC_SUCCESS; - break; - case HNS_ROCE_CQE_V2_LOCAL_LENGTH_ERR: - wc->status = IB_WC_LOC_LEN_ERR; - break; - case HNS_ROCE_CQE_V2_LOCAL_QP_OP_ERR: - wc->status = IB_WC_LOC_QP_OP_ERR; - break; - case HNS_ROCE_CQE_V2_LOCAL_PROT_ERR: - wc->status = IB_WC_LOC_PROT_ERR; - break; - case HNS_ROCE_CQE_V2_WR_FLUSH_ERR: - wc->status = IB_WC_WR_FLUSH_ERR; - break; - case HNS_ROCE_CQE_V2_MW_BIND_ERR: - wc->status = IB_WC_MW_BIND_ERR; - break; - case HNS_ROCE_CQE_V2_BAD_RESP_ERR: - wc->status = IB_WC_BAD_RESP_ERR; - break; - case HNS_ROCE_CQE_V2_LOCAL_ACCESS_ERR: - wc->status = IB_WC_LOC_ACCESS_ERR; - break; - case HNS_ROCE_CQE_V2_REMOTE_INVAL_REQ_ERR: - wc->status = IB_WC_REM_INV_REQ_ERR; - break; - case HNS_ROCE_CQE_V2_REMOTE_ACCESS_ERR: - wc->status = IB_WC_REM_ACCESS_ERR; - break; - case HNS_ROCE_CQE_V2_REMOTE_OP_ERR: - wc->status = IB_WC_REM_OP_ERR; - break; - case HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR: - wc->status = IB_WC_RETRY_EXC_ERR; - break; - case HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR: - wc->status = IB_WC_RNR_RETRY_EXC_ERR; - break; - case HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR: - wc->status = IB_WC_REM_ABORT_ERR; - break; - default: - wc->status = IB_WC_GENERAL_ERR; - break; - } - - /* - * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state gets - * into errored mode. Hence, as a workaround to this hardware - * limitation, driver needs to assist in flushing. But the flushing - * operation uses mailbox to convey the QP state to the hardware and - * which can sleep due to the mutex protection around the mailbox calls. - * Hence, use the deferred flush for now. Once wc error detected, the - * flushing operation is needed. - */ - if (wc->status != IB_WC_SUCCESS && - wc->status != IB_WC_WR_FLUSH_ERR) { - ibdev_err(&hr_dev->ib_dev, "error cqe status is: 0x%x\n", - status & HNS_ROCE_V2_CQE_STATUS_MASK); - - if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &hr_qp->flush_flag)) - init_flush_work(hr_dev, hr_qp); - - return 0; - } - - if (wc->status == IB_WC_WR_FLUSH_ERR) + get_cqe_status(hr_dev, *cur_qp, cqe, wc); + if (wc->status != IB_WC_SUCCESS) return 0; if (is_send) { From 357f342946860b323d1981cc42370516dbb209d2 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Wed, 15 Apr 2020 16:14:34 +0800 Subject: [PATCH 110/562] RDMA/hns: Simplify the state judgment code of qp Use state table to make the qp state migrate code more readable. Link: https://lore.kernel.org/r/1586938475-37049-6-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 54 +++++++++++----------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 9b86c8eeccf2..2a8c3893bccd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4077,21 +4077,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, return 0; } -static inline bool hns_roce_v2_check_qp_stat(enum ib_qp_state cur_state, - enum ib_qp_state new_state) -{ - - if ((cur_state != IB_QPS_RESET && - (new_state == IB_QPS_ERR || new_state == IB_QPS_RESET)) || - ((cur_state == IB_QPS_RTS || cur_state == IB_QPS_SQD) && - (new_state == IB_QPS_RTS || new_state == IB_QPS_SQD)) || - (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS)) - return true; - - return false; - -} - static int hns_roce_v2_set_path(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, @@ -4195,6 +4180,28 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, return 0; } +static bool check_qp_state(enum ib_qp_state cur_state, + enum ib_qp_state new_state) +{ + static const bool sm[][IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = { [IB_QPS_RESET] = true, + [IB_QPS_INIT] = true }, + [IB_QPS_INIT] = { [IB_QPS_RESET] = true, + [IB_QPS_INIT] = true, + [IB_QPS_RTR] = true, + [IB_QPS_ERR] = true }, + [IB_QPS_RTR] = { [IB_QPS_RESET] = true, + [IB_QPS_RTS] = true, + [IB_QPS_ERR] = true }, + [IB_QPS_RTS] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true }, + [IB_QPS_SQD] = {}, + [IB_QPS_SQE] = {}, + [IB_QPS_ERR] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true } + }; + + return sm[cur_state][new_state]; +} + static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, @@ -4206,6 +4213,11 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); int ret = 0; + if (!check_qp_state(cur_state, new_state)) { + ibdev_err(&hr_dev->ib_dev, "Illegal state for QP!\n"); + return -EINVAL; + } + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { memset(qpc_mask, 0, sizeof(*qpc_mask)); modify_qp_reset_to_init(ibqp, attr, attr_mask, context, @@ -4216,23 +4228,11 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { ret = modify_qp_init_to_rtr(ibqp, attr, attr_mask, context, qpc_mask); - if (ret) - goto out; } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) { ret = modify_qp_rtr_to_rts(ibqp, attr, attr_mask, context, qpc_mask); - if (ret) - goto out; - } else if (hns_roce_v2_check_qp_stat(cur_state, new_state)) { - /* Nothing */ - ; - } else { - ibdev_err(&hr_dev->ib_dev, "Illegal state for QP!\n"); - ret = -EINVAL; - goto out; } -out: return ret; } From a97bf49f824e357f1cc5d292e247d05271d32afe Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Wed, 15 Apr 2020 16:14:35 +0800 Subject: [PATCH 111/562] RDMA/hns: Simplify the status judgment code of hns_roce_v1_m_qp() Use status table to reduce cyclomatic complexity. Link: https://lore.kernel.org/r/1586938475-37049-7-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 42 +++++++++++++++------- 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index a1f053cd30b9..49775cda83dc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2720,6 +2720,28 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, return -EINVAL; } +static bool check_qp_state(enum ib_qp_state cur_state, + enum ib_qp_state new_state) +{ + static const bool sm[][IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = { [IB_QPS_RESET] = true, + [IB_QPS_INIT] = true }, + [IB_QPS_INIT] = { [IB_QPS_RESET] = true, + [IB_QPS_INIT] = true, + [IB_QPS_RTR] = true, + [IB_QPS_ERR] = true }, + [IB_QPS_RTR] = { [IB_QPS_RESET] = true, + [IB_QPS_RTS] = true, + [IB_QPS_ERR] = true }, + [IB_QPS_RTS] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true }, + [IB_QPS_SQD] = {}, + [IB_QPS_SQE] = {}, + [IB_QPS_ERR] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true } + }; + + return sm[cur_state][new_state]; +} + static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) @@ -2741,6 +2763,13 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, u8 *dmac; u8 *smac; + if (!check_qp_state(cur_state, new_state)) { + ibdev_err(ibqp->device, + "not support QP(%u) status from %d to %d\n", + ibqp->qp_num, cur_state, new_state); + return -EINVAL; + } + context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return -ENOMEM; @@ -3072,8 +3101,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_156_SL_S, rdma_ah_get_sl(&attr->ah_attr)); hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); - } else if (cur_state == IB_QPS_RTR && - new_state == IB_QPS_RTS) { + } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) { /* If exist optional param, return error */ if ((attr_mask & IB_QP_ALT_PATH) || (attr_mask & IB_QP_ACCESS_FLAGS) || @@ -3245,16 +3273,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_M, QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S, 0); - } else if (!((cur_state == IB_QPS_INIT && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_INIT && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_RTR && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_RTS && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_ERR && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR))) { - dev_err(dev, "not support this status migration\n"); - goto out; } /* Every status migrate must change state */ From cbb01c2f2f630f1497f703c51ff21538ae2d86b8 Mon Sep 17 00:00:00 2001 From: Arun Easi Date: Tue, 31 Mar 2020 03:40:13 -0700 Subject: [PATCH 112/562] scsi: qla2xxx: Fix MPI failure AEN (8200) handling Today, upon an MPI failure AEN, on top of collecting an MPI dump, a regular firmware dump is also taken and then chip reset. This is disruptive to IOs and not required. Make the firmware dump collection, followed by chip reset, optional (not done by default). Firmware dump buffer and MPI dump buffer are independent of each other with this change and each can have dump that was taken at two different times for two different issues. The MPI dump is saved in a separate buffer and is retrieved differently from firmware dump. To collect full dump on MPI failure AEN, a module parameter is introduced: ql2xfulldump_on_mpifail (default: 0) Link: https://lore.kernel.org/r/20200331104015.24868-2-njavali@marvell.com Reported-by: kbuild test robot Reviewed-by: Himanshu Madhani Signed-off-by: Arun Easi Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_attr.c | 30 +++++++- drivers/scsi/qla2xxx/qla_def.h | 13 +++- drivers/scsi/qla2xxx/qla_gbl.h | 3 + drivers/scsi/qla2xxx/qla_init.c | 2 + drivers/scsi/qla2xxx/qla_isr.c | 54 ++++++++++----- drivers/scsi/qla2xxx/qla_os.c | 6 ++ drivers/scsi/qla2xxx/qla_tmpl.c | 119 ++++++++++++++++++++++++++------ 7 files changed, 185 insertions(+), 42 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 97cabd7e0014..3a5f6f27587e 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -26,7 +26,8 @@ qla2x00_sysfs_read_fw_dump(struct file *filp, struct kobject *kobj, struct qla_hw_data *ha = vha->hw; int rval = 0; - if (!(ha->fw_dump_reading || ha->mctp_dump_reading)) + if (!(ha->fw_dump_reading || ha->mctp_dump_reading || + ha->mpi_fw_dump_reading)) return 0; mutex_lock(&ha->optrom_mutex); @@ -42,6 +43,10 @@ qla2x00_sysfs_read_fw_dump(struct file *filp, struct kobject *kobj, } else if (ha->mctp_dumped && ha->mctp_dump_reading) { rval = memory_read_from_buffer(buf, count, &off, ha->mctp_dump, MCTP_DUMP_SIZE); + } else if (ha->mpi_fw_dumped && ha->mpi_fw_dump_reading) { + rval = memory_read_from_buffer(buf, count, &off, + ha->mpi_fw_dump, + ha->mpi_fw_dump_len); } else if (ha->fw_dump_reading) { rval = memory_read_from_buffer(buf, count, &off, ha->fw_dump, ha->fw_dump_len); @@ -103,7 +108,6 @@ qla2x00_sysfs_write_fw_dump(struct file *filp, struct kobject *kobj, qla82xx_set_reset_owner(vha); qla8044_idc_unlock(ha); } else { - ha->fw_dump_mpi = 1; qla2x00_system_error(vha); } break; @@ -137,6 +141,22 @@ qla2x00_sysfs_write_fw_dump(struct file *filp, struct kobject *kobj, vha->host_no); } break; + case 8: + if (!ha->mpi_fw_dump_reading) + break; + ql_log(ql_log_info, vha, 0x70e7, + "MPI firmware dump cleared on (%ld).\n", vha->host_no); + ha->mpi_fw_dump_reading = 0; + ha->mpi_fw_dumped = 0; + break; + case 9: + if (ha->mpi_fw_dumped && !ha->mpi_fw_dump_reading) { + ha->mpi_fw_dump_reading = 1; + ql_log(ql_log_info, vha, 0x70e8, + "Raw MPI firmware dump ready for read on (%ld).\n", + vha->host_no); + } + break; } return count; } @@ -706,7 +726,8 @@ qla2x00_sysfs_write_reset(struct file *filp, struct kobject *kobj, scsi_unblock_requests(vha->host); break; case 0x2025d: - if (!IS_QLA81XX(ha) && !IS_QLA83XX(ha)) + if (!IS_QLA81XX(ha) && !IS_QLA83XX(ha) && + !IS_QLA27XX(ha) && !IS_QLA28XX(ha)) return -EPERM; ql_log(ql_log_info, vha, 0x706f, @@ -724,6 +745,8 @@ qla2x00_sysfs_write_reset(struct file *filp, struct kobject *kobj, qla83xx_idc_audit(vha, IDC_AUDIT_TIMESTAMP); qla83xx_idc_unlock(vha, 0); break; + } else if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) { + qla27xx_reset_mpi(vha); } else { /* Make sure FC side is not in reset */ WARN_ON_ONCE(qla2x00_wait_for_hba_online(vha) != @@ -737,6 +760,7 @@ qla2x00_sysfs_write_reset(struct file *filp, struct kobject *kobj, scsi_unblock_requests(vha->host); break; } + break; case 0x2025e: if (!IS_P3P_TYPE(ha) || vha != base_vha) { ql_log(ql_log_info, vha, 0x7071, diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 47c7a56438b5..daa9e936887b 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -3223,6 +3223,7 @@ struct isp_operations { uint32_t); void (*fw_dump) (struct scsi_qla_host *, int); + void (*mpi_fw_dump)(struct scsi_qla_host *, int); int (*beacon_on) (struct scsi_qla_host *); int (*beacon_off) (struct scsi_qla_host *); @@ -3748,6 +3749,11 @@ struct qlt_hw_data { #define LEAK_EXCHG_THRESH_HOLD_PERCENT 75 /* 75 percent */ +struct qla_hw_data_stat { + u32 num_fw_dump; + u32 num_mpi_reset; +}; + /* * Qlogic host adapter specific data structure. */ @@ -4230,7 +4236,6 @@ struct qla_hw_data { uint32_t fw_dump_len; u32 fw_dump_alloc_len; bool fw_dumped; - bool fw_dump_mpi; unsigned long fw_dump_cap_flags; #define RISC_PAUSE_CMPL 0 #define DMA_SHUTDOWN_CMPL 1 @@ -4241,6 +4246,10 @@ struct qla_hw_data { #define ISP_MBX_RDY 6 #define ISP_SOFT_RESET_CMPL 7 int fw_dump_reading; + void *mpi_fw_dump; + u32 mpi_fw_dump_len; + int mpi_fw_dump_reading:1; + int mpi_fw_dumped:1; int prev_minidump_failed; dma_addr_t eft_dma; void *eft; @@ -4454,6 +4463,8 @@ struct qla_hw_data { uint16_t last_zio_threshold; #define DEFAULT_ZIO_THRESHOLD 5 + + struct qla_hw_data_stat stat; }; struct active_regions { diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index 1b93f5b4d77d..b20c5fa122fb 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -173,6 +173,7 @@ extern int ql2xenablemsix; extern int qla2xuseresexchforels; extern int ql2xexlogins; extern int ql2xdifbundlinginternalbuffers; +extern int ql2xfulldump_on_mpifail; extern int qla2x00_loop_reset(scsi_qla_host_t *); extern void qla2x00_abort_all_cmds(scsi_qla_host_t *, int); @@ -645,6 +646,7 @@ extern void qla82xx_fw_dump(scsi_qla_host_t *, int); extern void qla8044_fw_dump(scsi_qla_host_t *, int); extern void qla27xx_fwdump(scsi_qla_host_t *, int); +extern void qla27xx_mpi_fwdump(scsi_qla_host_t *, int); extern ulong qla27xx_fwdt_calculate_dump_size(struct scsi_qla_host *, void *); extern int qla27xx_fwdt_template_valid(void *); extern ulong qla27xx_fwdt_template_size(void *); @@ -933,5 +935,6 @@ extern void qla24xx_process_purex_list(struct purex_list *); /* nvme.c */ void qla_nvme_unregister_remote_port(struct fc_port *fcport); +void qla27xx_reset_mpi(scsi_qla_host_t *vha); void qla_handle_els_plogi_done(scsi_qla_host_t *vha, struct event_arg *ea); #endif /* _QLA_GBL_H */ diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 80390d3f3236..95b6166ae0cc 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -3339,6 +3339,8 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) dump_size / 1024); if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) { + ha->mpi_fw_dump = (char *)fw_dump + + ha->fwdt[1].dump_size; mutex_unlock(&ha->optrom_mutex); return; } diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 8d7a905f6247..a9e8513e1cf1 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -757,6 +757,39 @@ qla2x00_find_fcport_by_nportid(scsi_qla_host_t *vha, port_id_t *id, return NULL; } +/* Shall be called only on supported adapters. */ +static void +qla27xx_handle_8200_aen(scsi_qla_host_t *vha, uint16_t *mb) +{ + struct qla_hw_data *ha = vha->hw; + bool reset_isp_needed = 0; + + ql_log(ql_log_warn, vha, 0x02f0, + "MPI Heartbeat stop. MPI reset is%s needed. " + "MB0[%xh] MB1[%xh] MB2[%xh] MB3[%xh]\n", + mb[0] & BIT_8 ? "" : " not", + mb[0], mb[1], mb[2], mb[3]); + + if ((mb[1] & BIT_8) == 0) + return; + + ql_log(ql_log_warn, vha, 0x02f1, + "MPI Heartbeat stop. FW dump needed\n"); + + if (ql2xfulldump_on_mpifail) { + ha->isp_ops->fw_dump(vha, 1); + reset_isp_needed = 1; + } + + ha->isp_ops->mpi_fw_dump(vha, 1); + + if (reset_isp_needed) { + vha->hw->flags.fw_init_done = 0; + set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); + qla2xxx_wake_dpc(vha); + } +} + /** * qla2x00_async_event() - Process aynchronous events. * @vha: SCSI driver HA context @@ -872,9 +905,9 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb) "ISP System Error - mbx1=%xh mbx2=%xh mbx3=%xh.\n ", mb[1], mb[2], mb[3]); - ha->fw_dump_mpi = - (IS_QLA27XX(ha) || IS_QLA28XX(ha)) && - RD_REG_WORD(®24->mailbox7) & BIT_8; + if ((IS_QLA27XX(ha) || IS_QLA28XX(ha)) && + RD_REG_WORD(®24->mailbox7) & BIT_8) + ha->isp_ops->mpi_fw_dump(vha, 1); ha->isp_ops->fw_dump(vha, 1); ha->flags.fw_init_done = 0; QLA_FW_STOPPED(ha); @@ -1375,20 +1408,7 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb) case MBA_IDC_AEN: if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) { - ha->flags.fw_init_done = 0; - ql_log(ql_log_warn, vha, 0xffff, - "MPI Heartbeat stop. Chip reset needed. MB0[%xh] MB1[%xh] MB2[%xh] MB3[%xh]\n", - mb[0], mb[1], mb[2], mb[3]); - - if ((mb[1] & BIT_8) || - (mb[2] & BIT_8)) { - ql_log(ql_log_warn, vha, 0xd013, - "MPI Heartbeat stop. FW dump needed\n"); - ha->fw_dump_mpi = 1; - ha->isp_ops->fw_dump(vha, 1); - } - set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); - qla2xxx_wake_dpc(vha); + qla27xx_handle_8200_aen(vha, mb); } else if (IS_QLA83XX(ha)) { mb[4] = RD_REG_WORD(®24->mailbox4); mb[5] = RD_REG_WORD(®24->mailbox5); diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index d190db5ea7d9..4c645d568cf7 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -35,6 +35,11 @@ static int apidev_major; */ struct kmem_cache *srb_cachep; +int ql2xfulldump_on_mpifail; +module_param(ql2xfulldump_on_mpifail, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(ql2xfulldump_on_mpifail, + "Set this to take full dump on MPI hang."); + /* * CT6 CTX allocation cache */ @@ -2518,6 +2523,7 @@ static struct isp_operations qla27xx_isp_ops = { .read_nvram = NULL, .write_nvram = NULL, .fw_dump = qla27xx_fwdump, + .mpi_fw_dump = qla27xx_mpi_fwdump, .beacon_on = qla24xx_beacon_on, .beacon_off = qla24xx_beacon_off, .beacon_blink = qla83xx_beacon_blink, diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c index 6aeb1c3fb7a8..342363862434 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.c +++ b/drivers/scsi/qla2xxx/qla_tmpl.c @@ -12,6 +12,33 @@ #define IOBASE(vha) IOBAR(ISPREG(vha)) #define INVALID_ENTRY ((struct qla27xx_fwdt_entry *)0xffffffffffffffffUL) +/* hardware_lock assumed held. */ +static void +qla27xx_write_remote_reg(struct scsi_qla_host *vha, + u32 addr, u32 data) +{ + char *reg = (char *)ISPREG(vha); + + ql_dbg(ql_dbg_misc, vha, 0xd300, + "%s: addr/data = %xh/%xh\n", __func__, addr, data); + + WRT_REG_DWORD(reg + IOBASE(vha), 0x40); + WRT_REG_DWORD(reg + 0xc4, data); + WRT_REG_DWORD(reg + 0xc0, addr); +} + +void +qla27xx_reset_mpi(scsi_qla_host_t *vha) +{ + ql_dbg(ql_dbg_misc + ql_dbg_verbose, vha, 0xd301, + "Entered %s.\n", __func__); + + qla27xx_write_remote_reg(vha, 0x104050, 0x40004); + qla27xx_write_remote_reg(vha, 0x10405c, 0x4); + + vha->hw->stat.num_mpi_reset++; +} + static inline void qla27xx_insert16(uint16_t value, void *buf, ulong *len) { @@ -997,6 +1024,62 @@ qla27xx_fwdt_template_valid(void *p) return true; } +void +qla27xx_mpi_fwdump(scsi_qla_host_t *vha, int hardware_locked) +{ + ulong flags = 0; + bool need_mpi_reset = 1; + +#ifndef __CHECKER__ + if (!hardware_locked) + spin_lock_irqsave(&vha->hw->hardware_lock, flags); +#endif + if (!vha->hw->mpi_fw_dump) { + ql_log(ql_log_warn, vha, 0x02f3, "-> mpi_fwdump no buffer\n"); + } else if (vha->hw->mpi_fw_dumped) { + ql_log(ql_log_warn, vha, 0x02f4, + "-> MPI firmware already dumped (%p) -- ignoring request\n", + vha->hw->mpi_fw_dump); + } else { + struct fwdt *fwdt = &vha->hw->fwdt[1]; + ulong len; + void *buf = vha->hw->mpi_fw_dump; + + ql_log(ql_log_warn, vha, 0x02f5, "-> fwdt1 running...\n"); + if (!fwdt->template) { + ql_log(ql_log_warn, vha, 0x02f6, + "-> fwdt1 no template\n"); + goto bailout; + } + len = qla27xx_execute_fwdt_template(vha, fwdt->template, buf); + if (len == 0) { + goto bailout; + } else if (len != fwdt->dump_size) { + ql_log(ql_log_warn, vha, 0x02f7, + "-> fwdt1 fwdump residual=%+ld\n", + fwdt->dump_size - len); + } else { + need_mpi_reset = 0; + } + + vha->hw->mpi_fw_dump_len = len; + vha->hw->mpi_fw_dumped = 1; + + ql_log(ql_log_warn, vha, 0x02f8, + "-> MPI firmware dump saved to buffer (%lu/%p)\n", + vha->host_no, vha->hw->mpi_fw_dump); + qla2x00_post_uevent_work(vha, QLA_UEVENT_CODE_FW_DUMP); + } + +bailout: + if (need_mpi_reset) + qla27xx_reset_mpi(vha); +#ifndef __CHECKER__ + if (!hardware_locked) + spin_unlock_irqrestore(&vha->hw->hardware_lock, flags); +#endif +} + void qla27xx_fwdump(scsi_qla_host_t *vha, int hardware_locked) { @@ -1015,30 +1098,25 @@ qla27xx_fwdump(scsi_qla_host_t *vha, int hardware_locked) vha->hw->fw_dump); } else { struct fwdt *fwdt = vha->hw->fwdt; - uint j; ulong len; void *buf = vha->hw->fw_dump; - uint count = vha->hw->fw_dump_mpi ? 2 : 1; - for (j = 0; j < count; j++, fwdt++, buf += len) { - ql_log(ql_log_warn, vha, 0xd011, - "-> fwdt%u running...\n", j); - if (!fwdt->template) { - ql_log(ql_log_warn, vha, 0xd012, - "-> fwdt%u no template\n", j); - break; - } - len = qla27xx_execute_fwdt_template(vha, - fwdt->template, buf); - if (len == 0) { - goto bailout; - } else if (len != fwdt->dump_size) { - ql_log(ql_log_warn, vha, 0xd013, - "-> fwdt%u fwdump residual=%+ld\n", - j, fwdt->dump_size - len); - } + ql_log(ql_log_warn, vha, 0xd011, "-> fwdt0 running...\n"); + if (!fwdt->template) { + ql_log(ql_log_warn, vha, 0xd012, + "-> fwdt0 no template\n"); + goto bailout; } - vha->hw->fw_dump_len = buf - (void *)vha->hw->fw_dump; + len = qla27xx_execute_fwdt_template(vha, fwdt->template, buf); + if (len == 0) { + goto bailout; + } else if (len != fwdt->dump_size) { + ql_log(ql_log_warn, vha, 0xd013, + "-> fwdt0 fwdump residual=%+ld\n", + fwdt->dump_size - len); + } + + vha->hw->fw_dump_len = len; vha->hw->fw_dumped = 1; ql_log(ql_log_warn, vha, 0xd015, @@ -1048,7 +1126,6 @@ qla27xx_fwdump(scsi_qla_host_t *vha, int hardware_locked) } bailout: - vha->hw->fw_dump_mpi = 0; #ifndef __CHECKER__ if (!hardware_locked) spin_unlock_irqrestore(&vha->hw->hardware_lock, flags); From f8e25f97407c2bcfaf54704938a5e5120e190002 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Thu, 2 Apr 2020 19:08:32 +0800 Subject: [PATCH 113/562] scsi: cxgb4i: Remove superfluous null check In do_abort_rpl_rss, the null check of 'clk' is not needed. Link: https://lore.kernel.org/r/20200402110832.12712-1-vulab@iscas.ac.cn Signed-off-by: Xu Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/cxgbi/cxgb4i/cxgb4i.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index bc1086ae6835..8ce8592f6a64 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -1127,10 +1127,9 @@ static void do_abort_rpl_rss(struct cxgbi_device *cdev, struct sk_buff *skb) if (!csk) goto rel_skb; - if (csk) - pr_info_ipaddr("csk 0x%p,%u,0x%lx,%u, status %u.\n", - (&csk->saddr), (&csk->daddr), csk, - csk->state, csk->flags, csk->tid, rpl->status); + pr_info_ipaddr("csk 0x%p,%u,0x%lx,%u, status %u.\n", + (&csk->saddr), (&csk->daddr), csk, + csk->state, csk->flags, csk->tid, rpl->status); if (rpl->status == CPL_ERR_ABORT_FAILED) goto rel_skb; From 9cd7d494bd2b1fb53884a797b460b4f1741b82cc Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Fri, 3 Apr 2020 17:36:10 +0100 Subject: [PATCH 114/562] scsi: aic7xxx: Use kzalloc() instead of kmalloc()+memset() There are a couple of places where kzalloc() could be used directly instead of calling kmalloc() then memset(). Replace them. Link: https://lore.kernel.org/r/20200403163611.46756-1-alex.dewar@gmx.co.uk Signed-off-by: Alex Dewar Signed-off-by: Martin K. Petersen --- drivers/scsi/aic7xxx/aic79xx_core.c | 3 +-- drivers/scsi/aic7xxx/aic7xxx_core.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/aic7xxx/aic79xx_core.c b/drivers/scsi/aic7xxx/aic79xx_core.c index a336a458c978..c38163707259 100644 --- a/drivers/scsi/aic7xxx/aic79xx_core.c +++ b/drivers/scsi/aic7xxx/aic79xx_core.c @@ -6054,14 +6054,13 @@ ahd_alloc(void *platform_arg, char *name) { struct ahd_softc *ahd; - ahd = kmalloc(sizeof(*ahd), GFP_ATOMIC); + ahd = kzalloc(sizeof(*ahd), GFP_ATOMIC); if (!ahd) { printk("aic7xxx: cannot malloc softc!\n"); kfree(name); return NULL; } - memset(ahd, 0, sizeof(*ahd)); ahd->seep_config = kmalloc(sizeof(*ahd->seep_config), GFP_ATOMIC); if (ahd->seep_config == NULL) { kfree(ahd); diff --git a/drivers/scsi/aic7xxx/aic7xxx_core.c b/drivers/scsi/aic7xxx/aic7xxx_core.c index 84fc499cb1e6..36a7ea1ba7da 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_core.c +++ b/drivers/scsi/aic7xxx/aic7xxx_core.c @@ -4384,13 +4384,13 @@ ahc_alloc(void *platform_arg, char *name) struct ahc_softc *ahc; int i; - ahc = kmalloc(sizeof(*ahc), GFP_ATOMIC); + ahc = kzalloc(sizeof(*ahc), GFP_ATOMIC); if (!ahc) { printk("aic7xxx: cannot malloc softc!\n"); kfree(name); return NULL; } - memset(ahc, 0, sizeof(*ahc)); + ahc->seep_config = kmalloc(sizeof(*ahc->seep_config), GFP_ATOMIC); if (ahc->seep_config == NULL) { kfree(ahc); From d1ec20a55f6c8ce485fd298a015ec83d7e5326d8 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Fri, 3 Apr 2020 17:47:11 +0100 Subject: [PATCH 115/562] scsi: aic7xxx: Remove unnecessary NULL checks before kfree There are a number of places in the aic7xxx driver where a NULL check is performed before a kfree(). However, kfree() already performs NULL checks so this is unnecessary. Remove the checks. Issue identified with Coccinelle. Link: https://lore.kernel.org/r/20200403164712.49579-1-alex.dewar@gmx.co.uk Signed-off-by: Alex Dewar Signed-off-by: Martin K. Petersen --- drivers/scsi/aic7xxx/aic79xx_core.c | 15 +++++---------- drivers/scsi/aic7xxx/aic7xxx_core.c | 15 +++++---------- 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/drivers/scsi/aic7xxx/aic79xx_core.c b/drivers/scsi/aic7xxx/aic79xx_core.c index c38163707259..e4a09b93d00c 100644 --- a/drivers/scsi/aic7xxx/aic79xx_core.c +++ b/drivers/scsi/aic7xxx/aic79xx_core.c @@ -3662,8 +3662,7 @@ ahd_free_tstate(struct ahd_softc *ahd, u_int scsi_id, char channel, int force) return; tstate = ahd->enabled_targets[scsi_id]; - if (tstate != NULL) - kfree(tstate); + kfree(tstate); ahd->enabled_targets[scsi_id] = NULL; } #endif @@ -6119,8 +6118,7 @@ ahd_set_unit(struct ahd_softc *ahd, int unit) void ahd_set_name(struct ahd_softc *ahd, char *name) { - if (ahd->name != NULL) - kfree(ahd->name); + kfree(ahd->name); ahd->name = name; } @@ -6181,12 +6179,9 @@ ahd_free(struct ahd_softc *ahd) kfree(ahd->black_hole); } #endif - if (ahd->name != NULL) - kfree(ahd->name); - if (ahd->seep_config != NULL) - kfree(ahd->seep_config); - if (ahd->saved_stack != NULL) - kfree(ahd->saved_stack); + kfree(ahd->name); + kfree(ahd->seep_config); + kfree(ahd->saved_stack); kfree(ahd); return; } diff --git a/drivers/scsi/aic7xxx/aic7xxx_core.c b/drivers/scsi/aic7xxx/aic7xxx_core.c index 36a7ea1ba7da..3d4df906fa4f 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_core.c +++ b/drivers/scsi/aic7xxx/aic7xxx_core.c @@ -2178,8 +2178,7 @@ ahc_free_tstate(struct ahc_softc *ahc, u_int scsi_id, char channel, int force) if (channel == 'B') scsi_id += 8; tstate = ahc->enabled_targets[scsi_id]; - if (tstate != NULL) - kfree(tstate); + kfree(tstate); ahc->enabled_targets[scsi_id] = NULL; } #endif @@ -4453,8 +4452,7 @@ ahc_set_unit(struct ahc_softc *ahc, int unit) void ahc_set_name(struct ahc_softc *ahc, char *name) { - if (ahc->name != NULL) - kfree(ahc->name); + kfree(ahc->name); ahc->name = name; } @@ -4515,10 +4513,8 @@ ahc_free(struct ahc_softc *ahc) kfree(ahc->black_hole); } #endif - if (ahc->name != NULL) - kfree(ahc->name); - if (ahc->seep_config != NULL) - kfree(ahc->seep_config); + kfree(ahc->name); + kfree(ahc->seep_config); kfree(ahc); return; } @@ -4927,8 +4923,7 @@ ahc_fini_scbdata(struct ahc_softc *ahc) case 0: break; } - if (scb_data->scbarray != NULL) - kfree(scb_data->scbarray); + kfree(scb_data->scbarray); } static void From bc834e074e36262d62a9e26742ed5a5246a469eb Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Sat, 11 Apr 2020 01:19:31 +0100 Subject: [PATCH 116/562] scsi: bnx2fc: Add missing annotation for bnx2fc_abts_cleanup() Sparse reports the following warning: warning: context imbalance in bnx2fc_abts_cleanup() - unexpected unlock The root cause is the missing annotation at bnx2fc_abts_cleanup(). Add the missing __must_hold(&tgt->tgt_lock) annotation. Link: https://lore.kernel.org/r/20200411001933.10072-8-jbi.octave@gmail.com Signed-off-by: Jules Irenge Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc_io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index 2b070f0835df..1aba5897ccb0 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -1081,6 +1081,7 @@ int bnx2fc_eh_device_reset(struct scsi_cmnd *sc_cmd) } static int bnx2fc_abts_cleanup(struct bnx2fc_cmd *io_req) + __must_hold(&tgt->tgt_lock) { struct bnx2fc_rport *tgt = io_req->tgt; unsigned int time_left; From ba27c5cf286df03a98fcdc6800b99c35e3cbdd2f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Apr 2020 03:23:12 -0400 Subject: [PATCH 117/562] scsi: mpt3sas: Don't change the DMA coherent mask after allocations The DMA layer does not allow changing the DMA coherent mask after there are outstanding allocations. Link: https://lore.kernel.org/r/1587626596-1044-2-git-send-email-suganath-prabu.subramani@broadcom.com Reported-by: Abdul Haleem Signed-off-by: Christoph Hellwig Signed-off-by: Suganath Prabu Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 67 ++++++++--------------------- drivers/scsi/mpt3sas/mpt3sas_base.h | 2 - 2 files changed, 19 insertions(+), 50 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 920a3a25a7a2..58d607b422d2 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2806,58 +2806,38 @@ _base_build_sg_ieee(struct MPT3SAS_ADAPTER *ioc, void *psge, static int _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev) { - u64 required_mask, coherent_mask; struct sysinfo s; + int dma_mask; + + if (ioc->is_mcpu_endpoint || + sizeof(dma_addr_t) == 4 || + dma_get_required_mask(&pdev->dev) <= 32) + dma_mask = 32; /* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */ - int dma_mask = (ioc->hba_mpi_version_belonged > MPI2_VERSION) ? 63 : 64; - - if (ioc->is_mcpu_endpoint) - goto try_32bit; - - required_mask = dma_get_required_mask(&pdev->dev); - if (sizeof(dma_addr_t) == 4 || required_mask == 32) - goto try_32bit; - - if (ioc->dma_mask) - coherent_mask = DMA_BIT_MASK(dma_mask); + else if (ioc->hba_mpi_version_belonged > MPI2_VERSION) + dma_mask = 63; else - coherent_mask = DMA_BIT_MASK(32); + dma_mask = 64; if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(dma_mask)) || - dma_set_coherent_mask(&pdev->dev, coherent_mask)) - goto try_32bit; - - ioc->base_add_sg_single = &_base_add_sg_single_64; - ioc->sge_size = sizeof(Mpi2SGESimple64_t); - ioc->dma_mask = dma_mask; - goto out; - - try_32bit: - if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) + dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(dma_mask))) return -ENODEV; - ioc->base_add_sg_single = &_base_add_sg_single_32; - ioc->sge_size = sizeof(Mpi2SGESimple32_t); - ioc->dma_mask = 32; - out: + if (dma_mask > 32) { + ioc->base_add_sg_single = &_base_add_sg_single_64; + ioc->sge_size = sizeof(Mpi2SGESimple64_t); + } else { + ioc->base_add_sg_single = &_base_add_sg_single_32; + ioc->sge_size = sizeof(Mpi2SGESimple32_t); + } + si_meminfo(&s); ioc_info(ioc, "%d BIT PCI BUS DMA ADDRESSING SUPPORTED, total mem (%ld kB)\n", - ioc->dma_mask, convert_to_kb(s.totalram)); + dma_mask, convert_to_kb(s.totalram)); return 0; } -static int -_base_change_consistent_dma_mask(struct MPT3SAS_ADAPTER *ioc, - struct pci_dev *pdev) -{ - if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(ioc->dma_mask))) { - if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) - return -ENODEV; - } - return 0; -} - /** * _base_check_enable_msix - checks MSIX capabable. * @ioc: per adapter object @@ -5169,14 +5149,6 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) total_sz += sz; } while (ioc->rdpq_array_enable && (++i < ioc->reply_queue_count)); - if (ioc->dma_mask > 32) { - if (_base_change_consistent_dma_mask(ioc, ioc->pdev) != 0) { - ioc_warn(ioc, "no suitable consistent DMA mask for %s\n", - pci_name(ioc->pdev)); - goto out; - } - } - ioc->scsiio_depth = ioc->hba_queue_depth - ioc->hi_priority_depth - ioc->internal_depth; @@ -7158,7 +7130,6 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) ioc->smp_affinity_enable = smp_affinity_enable; ioc->rdpq_array_enable_assigned = 0; - ioc->dma_mask = 0; if (ioc->is_aero_ioc) ioc->base_readl = &_base_readl_aero; else diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index e7197150721f..caae04086539 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -1026,7 +1026,6 @@ typedef void (*MPT3SAS_FLUSH_RUNNING_CMDS)(struct MPT3SAS_ADAPTER *ioc); * @ir_firmware: IR firmware present * @bars: bitmask of BAR's that must be configured * @mask_interrupts: ignore interrupt - * @dma_mask: used to set the consistent dma mask * @pci_access_mutex: Mutex to synchronize ioctl, sysfs show path and * pci resource handling * @fault_reset_work_q_name: fw fault work queue @@ -1205,7 +1204,6 @@ struct MPT3SAS_ADAPTER { u8 ir_firmware; int bars; u8 mask_interrupts; - int dma_mask; /* fw fault handler */ char fault_reset_work_q_name[20]; From 550dc875cfbc0b1847096f07d0964ce569e41579 Mon Sep 17 00:00:00 2001 From: Suganath Prabu Date: Thu, 23 Apr 2020 03:23:13 -0400 Subject: [PATCH 118/562] scsi: mpt3sas: Rename function name is_MSB_are_same Rename is_MSB_are_same() to mpt3sas_check_same_4gb_region() for better readability. Link: https://lore.kernel.org/r/1587626596-1044-3-git-send-email-suganath-prabu.subramani@broadcom.com Reviewed-by: Christoph Hellwig Signed-off-by: Suganath Prabu Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 58d607b422d2..f0eace3517f4 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -4915,7 +4915,7 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) } /** - * is_MSB_are_same - checks whether all reply queues in a set are + * mpt3sas_check_same_4gb_region - checks whether all reply queues in a set are * having same upper 32bits in their base memory address. * @reply_pool_start_address: Base address of a reply queue set * @pool_sz: Size of single Reply Descriptor Post Queues pool size @@ -4925,7 +4925,7 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) */ static int -is_MSB_are_same(long reply_pool_start_address, u32 pool_sz) +mpt3sas_check_same_4gb_region(long reply_pool_start_address, u32 pool_sz) { long reply_pool_end_address; @@ -5377,7 +5377,7 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) * Actual requirement is not alignment, but we need start and end of * DMA address must have same upper 32 bit address. */ - if (!is_MSB_are_same((long)ioc->sense, sz)) { + if (!mpt3sas_check_same_4gb_region((long)ioc->sense, sz)) { //Release Sense pool & Reallocate dma_pool_free(ioc->sense_dma_pool, ioc->sense, ioc->sense_dma); dma_pool_destroy(ioc->sense_dma_pool); From 85896421df0cf66347003cb4603613780e81621b Mon Sep 17 00:00:00 2001 From: Suganath Prabu Date: Thu, 23 Apr 2020 03:23:14 -0400 Subject: [PATCH 119/562] scsi: mpt3sas: Separate out RDPQ allocation to new function For readability separate out RDPQ allocations to new function base_alloc_rdpq_dma_pool(). Link: https://lore.kernel.org/r/1587626596-1044-4-git-send-email-suganath-prabu.subramani@broadcom.com Reviewed-by: Christoph Hellwig Signed-off-by: Suganath Prabu Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 79 ++++++++++++++++------------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index f0eace3517f4..cabbdc286c1e 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -4938,6 +4938,49 @@ mpt3sas_check_same_4gb_region(long reply_pool_start_address, u32 pool_sz) return 0; } +/** + * base_alloc_rdpq_dma_pool - Allocating DMA'able memory + * for reply queues. + * @ioc: per adapter object + * @sz: DMA Pool size + * Return: 0 for success, non-zero for failure. + */ +static int +base_alloc_rdpq_dma_pool(struct MPT3SAS_ADAPTER *ioc, int sz) +{ + int i; + int count = ioc->rdpq_array_enable ? ioc->reply_queue_count : 1; + + ioc->reply_post = kcalloc(count, sizeof(struct reply_post_struct), + GFP_KERNEL); + if (!ioc->reply_post) + return -ENOMEM; + ioc->reply_post_free_dma_pool = + dma_pool_create("reply_post_free pool", + &ioc->pdev->dev, sz, 16, 0); + if (!ioc->reply_post_free_dma_pool) + return -ENOMEM; + i = 0; + do { + ioc->reply_post[i].reply_post_free = + dma_pool_zalloc(ioc->reply_post_free_dma_pool, + GFP_KERNEL, + &ioc->reply_post[i].reply_post_free_dma); + if (!ioc->reply_post[i].reply_post_free) + return -ENOMEM; + dinitprintk(ioc, + ioc_info(ioc, "reply post free pool (0x%p): depth(%d)," + "element_size(%d), pool_size(%d kB)\n", + ioc->reply_post[i].reply_post_free, + ioc->reply_post_queue_depth, 8, sz / 1024)); + dinitprintk(ioc, + ioc_info(ioc, "reply_post_free_dma = (0x%llx)\n", + (u64)ioc->reply_post[i].reply_post_free_dma)); + + } while (ioc->rdpq_array_enable && ++i < ioc->reply_queue_count); + return 0; +} + /** * _base_allocate_memory_pools - allocate start of day memory pools * @ioc: per adapter object @@ -5113,41 +5156,9 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) sz = reply_post_free_sz; if (_base_is_controller_msix_enabled(ioc) && !ioc->rdpq_array_enable) sz *= ioc->reply_queue_count; - - ioc->reply_post = kcalloc((ioc->rdpq_array_enable) ? - (ioc->reply_queue_count):1, - sizeof(struct reply_post_struct), GFP_KERNEL); - - if (!ioc->reply_post) { - ioc_err(ioc, "reply_post_free pool: kcalloc failed\n"); + if (base_alloc_rdpq_dma_pool(ioc, sz)) goto out; - } - ioc->reply_post_free_dma_pool = dma_pool_create("reply_post_free pool", - &ioc->pdev->dev, sz, 16, 0); - if (!ioc->reply_post_free_dma_pool) { - ioc_err(ioc, "reply_post_free pool: dma_pool_create failed\n"); - goto out; - } - i = 0; - do { - ioc->reply_post[i].reply_post_free = - dma_pool_zalloc(ioc->reply_post_free_dma_pool, - GFP_KERNEL, - &ioc->reply_post[i].reply_post_free_dma); - if (!ioc->reply_post[i].reply_post_free) { - ioc_err(ioc, "reply_post_free pool: dma_pool_alloc failed\n"); - goto out; - } - dinitprintk(ioc, - ioc_info(ioc, "reply post free pool (0x%p): depth(%d), element_size(%d), pool_size(%d kB)\n", - ioc->reply_post[i].reply_post_free, - ioc->reply_post_queue_depth, - 8, sz / 1024)); - dinitprintk(ioc, - ioc_info(ioc, "reply_post_free_dma = (0x%llx)\n", - (u64)ioc->reply_post[i].reply_post_free_dma)); - total_sz += sz; - } while (ioc->rdpq_array_enable && (++i < ioc->reply_queue_count)); + total_sz += sz * (!ioc->rdpq_array_enable ? 1 : ioc->reply_queue_count); ioc->scsiio_depth = ioc->hba_queue_depth - ioc->hi_priority_depth - ioc->internal_depth; From 8012209eb26b7819385a6ec6eae4b1d0a0dbe585 Mon Sep 17 00:00:00 2001 From: Suganath Prabu Date: Thu, 23 Apr 2020 03:23:15 -0400 Subject: [PATCH 120/562] scsi: mpt3sas: Handle RDPQ DMA allocation in same 4G region For INVADER_SERIES, each set of 8 reply queues (0 - 7, 8 - 15,..), and for VENTURA_SERIES, each set of 16 reply queues (0 - 15, 16 - 31,..) need to be within the same 4 GB boundary. Driver uses limitation of VENTURA_SERIES to manage INVADER_SERIES as well. The driver is allocating the DMA able memory for RDPQs accordingly. 1) At driver load, set DMA mask to 64 and allocate memory for RDPQs 2) Check if allocated resources for RDPQ are in the same 4GB range 3) If #2 is true, continue with 64 bit DMA and go to #6 4) If #2 is false, then free all the resources from #1 5) Set DMA mask to 32 and allocate RDPQs 6) Proceed with driver loading and other allocations Link: https://lore.kernel.org/r/1587626596-1044-5-git-send-email-suganath-prabu.subramani@broadcom.com Reviewed-by: Christoph Hellwig Signed-off-by: Suganath Prabu Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 152 +++++++++++++++++++--------- drivers/scsi/mpt3sas/mpt3sas_base.h | 3 + 2 files changed, 109 insertions(+), 46 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index cabbdc286c1e..e9471e501ea7 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2810,7 +2810,7 @@ _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev) int dma_mask; if (ioc->is_mcpu_endpoint || - sizeof(dma_addr_t) == 4 || + sizeof(dma_addr_t) == 4 || ioc->use_32bit_dma || dma_get_required_mask(&pdev->dev) <= 32) dma_mask = 32; /* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */ @@ -4807,8 +4807,8 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) { int i = 0; int j = 0; + int dma_alloc_count = 0; struct chain_tracker *ct; - struct reply_post_struct *rps; dexitprintk(ioc, ioc_info(ioc, "%s\n", __func__)); @@ -4850,29 +4850,34 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) } if (ioc->reply_post) { - do { - rps = &ioc->reply_post[i]; - if (rps->reply_post_free) { - dma_pool_free( - ioc->reply_post_free_dma_pool, - rps->reply_post_free, - rps->reply_post_free_dma); - dexitprintk(ioc, - ioc_info(ioc, "reply_post_free_pool(0x%p): free\n", - rps->reply_post_free)); - rps->reply_post_free = NULL; + dma_alloc_count = DIV_ROUND_UP(ioc->reply_queue_count, + RDPQ_MAX_INDEX_IN_ONE_CHUNK); + for (i = 0; i < ioc->reply_queue_count; i++) { + if (i % RDPQ_MAX_INDEX_IN_ONE_CHUNK == 0 + && dma_alloc_count) { + if (ioc->reply_post[i].reply_post_free) { + dma_pool_free( + ioc->reply_post_free_dma_pool, + ioc->reply_post[i].reply_post_free, + ioc->reply_post[i].reply_post_free_dma); + dexitprintk(ioc, ioc_info(ioc, + "reply_post_free_pool(0x%p): free\n", + ioc->reply_post[i].reply_post_free)); + ioc->reply_post[i].reply_post_free = + NULL; + } + --dma_alloc_count; } - } while (ioc->rdpq_array_enable && - (++i < ioc->reply_queue_count)); + } + dma_pool_destroy(ioc->reply_post_free_dma_pool); if (ioc->reply_post_free_array && ioc->rdpq_array_enable) { dma_pool_free(ioc->reply_post_free_array_dma_pool, - ioc->reply_post_free_array, - ioc->reply_post_free_array_dma); + ioc->reply_post_free_array, + ioc->reply_post_free_array_dma); ioc->reply_post_free_array = NULL; } dma_pool_destroy(ioc->reply_post_free_array_dma_pool); - dma_pool_destroy(ioc->reply_post_free_dma_pool); kfree(ioc->reply_post); } @@ -4948,36 +4953,75 @@ mpt3sas_check_same_4gb_region(long reply_pool_start_address, u32 pool_sz) static int base_alloc_rdpq_dma_pool(struct MPT3SAS_ADAPTER *ioc, int sz) { - int i; + int i = 0; + u32 dma_alloc_count = 0; + int reply_post_free_sz = ioc->reply_post_queue_depth * + sizeof(Mpi2DefaultReplyDescriptor_t); int count = ioc->rdpq_array_enable ? ioc->reply_queue_count : 1; ioc->reply_post = kcalloc(count, sizeof(struct reply_post_struct), GFP_KERNEL); if (!ioc->reply_post) return -ENOMEM; + /* + * For INVADER_SERIES each set of 8 reply queues(0-7, 8-15, ..) and + * VENTURA_SERIES each set of 16 reply queues(0-15, 16-31, ..) should + * be within 4GB boundary i.e reply queues in a set must have same + * upper 32-bits in their memory address. so here driver is allocating + * the DMA'able memory for reply queues according. + * Driver uses limitation of + * VENTURA_SERIES to manage INVADER_SERIES as well. + */ + dma_alloc_count = DIV_ROUND_UP(ioc->reply_queue_count, + RDPQ_MAX_INDEX_IN_ONE_CHUNK); ioc->reply_post_free_dma_pool = - dma_pool_create("reply_post_free pool", - &ioc->pdev->dev, sz, 16, 0); + dma_pool_create("reply_post_free pool", + &ioc->pdev->dev, sz, 16, 0); if (!ioc->reply_post_free_dma_pool) return -ENOMEM; - i = 0; - do { - ioc->reply_post[i].reply_post_free = - dma_pool_zalloc(ioc->reply_post_free_dma_pool, - GFP_KERNEL, - &ioc->reply_post[i].reply_post_free_dma); - if (!ioc->reply_post[i].reply_post_free) - return -ENOMEM; - dinitprintk(ioc, - ioc_info(ioc, "reply post free pool (0x%p): depth(%d)," - "element_size(%d), pool_size(%d kB)\n", - ioc->reply_post[i].reply_post_free, - ioc->reply_post_queue_depth, 8, sz / 1024)); - dinitprintk(ioc, - ioc_info(ioc, "reply_post_free_dma = (0x%llx)\n", - (u64)ioc->reply_post[i].reply_post_free_dma)); + for (i = 0; i < ioc->reply_queue_count; i++) { + if ((i % RDPQ_MAX_INDEX_IN_ONE_CHUNK == 0) && dma_alloc_count) { + ioc->reply_post[i].reply_post_free = + dma_pool_alloc(ioc->reply_post_free_dma_pool, + GFP_KERNEL, + &ioc->reply_post[i].reply_post_free_dma); + if (!ioc->reply_post[i].reply_post_free) + return -ENOMEM; + /* + * Each set of RDPQ pool must satisfy 4gb boundary + * restriction. + * 1) Check if allocated resources for RDPQ pool are in + * the same 4GB range. + * 2) If #1 is true, continue with 64 bit DMA. + * 3) If #1 is false, return 1. which means free all the + * resources and set DMA mask to 32 and allocate. + */ + if (!mpt3sas_check_same_4gb_region( + (long)ioc->reply_post[i].reply_post_free, sz)) { + dinitprintk(ioc, + ioc_err(ioc, "bad Replypost free pool(0x%p)" + "reply_post_free_dma = (0x%llx)\n", + ioc->reply_post[i].reply_post_free, + (unsigned long long) + ioc->reply_post[i].reply_post_free_dma)); + return -EAGAIN; + } + memset(ioc->reply_post[i].reply_post_free, 0, + RDPQ_MAX_INDEX_IN_ONE_CHUNK * + reply_post_free_sz); + dma_alloc_count--; - } while (ioc->rdpq_array_enable && ++i < ioc->reply_queue_count); + } else { + ioc->reply_post[i].reply_post_free = + (Mpi2ReplyDescriptorsUnion_t *) + ((long)ioc->reply_post[i-1].reply_post_free + + reply_post_free_sz); + ioc->reply_post[i].reply_post_free_dma = + (dma_addr_t) + (ioc->reply_post[i-1].reply_post_free_dma + + reply_post_free_sz); + } + } return 0; } @@ -4995,10 +5039,12 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) u16 chains_needed_per_io; u32 sz, total_sz, reply_post_free_sz, reply_post_free_array_sz; u32 retry_sz; + u32 rdpq_sz = 0; u16 max_request_credit, nvme_blocks_needed; unsigned short sg_tablesize; u16 sge_size; int i, j; + int ret = 0; struct chain_tracker *ct; dinitprintk(ioc, ioc_info(ioc, "%s\n", __func__)); @@ -5152,14 +5198,28 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) /* reply post queue, 16 byte align */ reply_post_free_sz = ioc->reply_post_queue_depth * sizeof(Mpi2DefaultReplyDescriptor_t); - - sz = reply_post_free_sz; + rdpq_sz = reply_post_free_sz * RDPQ_MAX_INDEX_IN_ONE_CHUNK; if (_base_is_controller_msix_enabled(ioc) && !ioc->rdpq_array_enable) - sz *= ioc->reply_queue_count; - if (base_alloc_rdpq_dma_pool(ioc, sz)) - goto out; - total_sz += sz * (!ioc->rdpq_array_enable ? 1 : ioc->reply_queue_count); - + rdpq_sz = reply_post_free_sz * ioc->reply_queue_count; + ret = base_alloc_rdpq_dma_pool(ioc, rdpq_sz); + if (ret == -EAGAIN) { + /* + * Free allocated bad RDPQ memory pools. + * Change dma coherent mask to 32 bit and reallocate RDPQ + */ + _base_release_memory_pools(ioc); + ioc->use_32bit_dma = true; + if (_base_config_dma_addressing(ioc, ioc->pdev) != 0) { + ioc_err(ioc, + "32 DMA mask failed %s\n", pci_name(ioc->pdev)); + return -ENODEV; + } + if (base_alloc_rdpq_dma_pool(ioc, rdpq_sz)) + return -ENOMEM; + } else if (ret == -ENOMEM) + return -ENOMEM; + total_sz = rdpq_sz * (!ioc->rdpq_array_enable ? 1 : + DIV_ROUND_UP(ioc->reply_queue_count, RDPQ_MAX_INDEX_IN_ONE_CHUNK)); ioc->scsiio_depth = ioc->hba_queue_depth - ioc->hi_priority_depth - ioc->internal_depth; @@ -5171,7 +5231,6 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) ioc_info(ioc, "scsi host: can_queue depth (%d)\n", ioc->shost->can_queue)); - /* contiguous pool for request and chains, 16 byte align, one extra " * "frame for smid=0 */ @@ -7141,6 +7200,7 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) ioc->smp_affinity_enable = smp_affinity_enable; ioc->rdpq_array_enable_assigned = 0; + ioc->use_32bit_dma = 0; if (ioc->is_aero_ioc) ioc->base_readl = &_base_readl_aero; else diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index caae04086539..5a83971179f2 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -367,6 +367,7 @@ struct mpt3sas_nvme_cmd { #define MPT3SAS_HIGH_IOPS_REPLY_QUEUES 8 #define MPT3SAS_HIGH_IOPS_BATCH_COUNT 16 #define MPT3SAS_GEN35_MAX_MSIX_QUEUES 128 +#define RDPQ_MAX_INDEX_IN_ONE_CHUNK 16 /* OEM Specific Flags will come from OEM specific header files */ struct Mpi2ManufacturingPage10_t { @@ -1063,6 +1064,7 @@ typedef void (*MPT3SAS_FLUSH_RUNNING_CMDS)(struct MPT3SAS_ADAPTER *ioc); * @thresh_hold: Max number of reply descriptors processed * before updating Host Index * @drv_support_bitmap: driver's supported feature bit map + * @use_32bit_dma: Flag to use 32 bit consistent dma mask * @scsi_io_cb_idx: shost generated commands * @tm_cb_idx: task management commands * @scsih_cb_idx: scsih internal commands @@ -1252,6 +1254,7 @@ struct MPT3SAS_ADAPTER { u8 high_iops_queues; u32 drv_support_bitmap; bool enable_sdev_max_qd; + bool use_32bit_dma; /* internal commands, callback index */ u8 scsi_io_cb_idx; From ce4c43065c0180b7cb44f549591967fa00a627c3 Mon Sep 17 00:00:00 2001 From: Suganath Prabu Date: Thu, 23 Apr 2020 03:23:16 -0400 Subject: [PATCH 121/562] scsi: mpt3sas: Update mpt3sas version to 33.101.00.00 Update mpt3sas driver version from 33.100.00.00 to 33.101.00.00. Link: https://lore.kernel.org/r/1587626596-1044-6-git-send-email-suganath-prabu.subramani@broadcom.com Signed-off-by: Suganath Prabu Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 5a83971179f2..c5743792bbfb 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -76,9 +76,9 @@ #define MPT3SAS_DRIVER_NAME "mpt3sas" #define MPT3SAS_AUTHOR "Avago Technologies " #define MPT3SAS_DESCRIPTION "LSI MPT Fusion SAS 3.0 Device Driver" -#define MPT3SAS_DRIVER_VERSION "33.100.00.00" +#define MPT3SAS_DRIVER_VERSION "33.101.00.00" #define MPT3SAS_MAJOR_VERSION 33 -#define MPT3SAS_MINOR_VERSION 100 +#define MPT3SAS_MINOR_VERSION 101 #define MPT3SAS_BUILD_VERSION 0 #define MPT3SAS_RELEASE_VERSION 00 From 1a5d1d940ba8448ac9a17d5961f587ca36239dad Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 21 Apr 2020 11:41:11 +0800 Subject: [PATCH 122/562] scsi: megaraid: Use true, false for bool variables Fix the following coccicheck warning: drivers/scsi/megaraid/megaraid_sas_fusion.c:4242:6-16: WARNING: Assignment of 0/1 to bool variable drivers/scsi/megaraid/megaraid_sas_fusion.c:4786:1-29: WARNING: Assignment of 0/1 to bool variable drivers/scsi/megaraid/megaraid_sas_fusion.c:4791:1-29: WARNING: Assignment of 0/1 to bool variable drivers/scsi/megaraid/megaraid_sas_fusion.c:4716:1-29: WARNING: Assignment of 0/1 to bool variable drivers/scsi/megaraid/megaraid_sas_fusion.c:4721:1-29: WARNING: Assignment of 0/1 to bool variable Link: https://lore.kernel.org/r/20200421034111.28353-1-yanaijie@huawei.com Acked-by: Chandrakanth Patil Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index bec3d4cca74f..e0f923b8cc50 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -4239,7 +4239,7 @@ static void megasas_refire_mgmt_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd_mfi; union MEGASAS_REQUEST_DESCRIPTOR_UNION *req_desc; u16 smid; - bool refire_cmd = 0; + bool refire_cmd = false; u8 result; u32 opcode = 0; @@ -4713,12 +4713,12 @@ int megasas_task_abort_fusion(struct scsi_cmnd *scmd) "attempting task abort! scmd(0x%p) tm_dev_handle 0x%x\n", scmd, devhandle); - mr_device_priv_data->tm_busy = 1; + mr_device_priv_data->tm_busy = true; ret = megasas_issue_tm(instance, devhandle, scmd->device->channel, scmd->device->id, smid, MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK, mr_device_priv_data); - mr_device_priv_data->tm_busy = 0; + mr_device_priv_data->tm_busy = false; mutex_unlock(&instance->reset_mutex); scmd_printk(KERN_INFO, scmd, "task abort %s!! scmd(0x%p)\n", @@ -4783,12 +4783,12 @@ int megasas_reset_target_fusion(struct scsi_cmnd *scmd) sdev_printk(KERN_INFO, scmd->device, "attempting target reset! scmd(0x%p) tm_dev_handle: 0x%x\n", scmd, devhandle); - mr_device_priv_data->tm_busy = 1; + mr_device_priv_data->tm_busy = true; ret = megasas_issue_tm(instance, devhandle, scmd->device->channel, scmd->device->id, 0, MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET, mr_device_priv_data); - mr_device_priv_data->tm_busy = 0; + mr_device_priv_data->tm_busy = false; mutex_unlock(&instance->reset_mutex); scmd_printk(KERN_NOTICE, scmd, "target reset %s!!\n", (ret == SUCCESS) ? "SUCCESS" : "FAILED"); From 3fa65812c20297bf71724c6138c0aed8531012f9 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 21 Apr 2020 11:41:20 +0800 Subject: [PATCH 123/562] scsi: BusLogic: Remove conversion to bool in blogic_inquiry() The '!=' expression itself is bool, no need to convert it to bool again. This fixes the following coccicheck warning: drivers/scsi/BusLogic.c:2240:46-51: WARNING: conversion to bool not needed here Link: https://lore.kernel.org/r/20200421034120.28433-1-yanaijie@huawei.com Acked-by: Khalid Aziz Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/BusLogic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c index b5b3154e2c28..bb49d83cadc7 100644 --- a/drivers/scsi/BusLogic.c +++ b/drivers/scsi/BusLogic.c @@ -2237,7 +2237,7 @@ static bool __init blogic_inquiry(struct blogic_adapter *adapter) "INQUIRE INSTALLED DEVICES ID 0 TO 7"); for (tgt_id = 0; tgt_id < 8; tgt_id++) adapter->tgt_flags[tgt_id].tgt_exists = - (installed_devs0to7[tgt_id] != 0 ? true : false); + installed_devs0to7[tgt_id] != 0; } /* Issue the Inquire Setup Information command. From f983622ae60516d634008c7b1ff9ffff4f7bb8ae Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 23 Apr 2020 10:07:13 +0800 Subject: [PATCH 124/562] scsi: core: Avoid calling synchronize_rcu() for each device in scsi_host_block() scsi_host_block() calls scsi_internal_device_block() for each scsi_device and scsi_internal_device_block() calls blk_mq_quiesce_queue() for each LUN. Since synchronize_rcu() is called from blk_mq_quiesce_queue(), this can cause substantial slowdowns on systems with many LUNs. Use scsi_internal_device_block_nowait() to implement scsi_host_block() so it is sufficient to run synchronize_rcu() once. This is safe since SCSI does not set the BLK_MQ_F_BLOCKING flag. [mkp: commit desc and comment tweaks] Link: https://lore.kernel.org/r/20200423020713.332743-1-ming.lei@redhat.com Cc: Steffen Maier Cc: Bart Van Assche Cc: Christoph Hellwig Cc: Dexuan Cui Cc: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 47835c4b4ee0..ff0449fbe3a0 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2841,11 +2841,27 @@ scsi_host_block(struct Scsi_Host *shost) struct scsi_device *sdev; int ret = 0; + /* + * Call scsi_internal_device_block_nowait so we can avoid + * calling synchronize_rcu() for each LUN. + */ shost_for_each_device(sdev, shost) { - ret = scsi_internal_device_block(sdev); + mutex_lock(&sdev->state_mutex); + ret = scsi_internal_device_block_nowait(sdev); + mutex_unlock(&sdev->state_mutex); if (ret) break; } + + /* + * SCSI never enables blk-mq's BLK_MQ_F_BLOCKING flag so + * calling synchronize_rcu() once is enough. + */ + WARN_ON_ONCE(shost->tag_set.flags & BLK_MQ_F_BLOCKING); + + if (!ret) + synchronize_rcu(); + return ret; } EXPORT_SYMBOL_GPL(scsi_host_block); From 7c59dace7e8e8cc75815f11882805368e5d2a807 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 24 Apr 2020 13:39:13 +0200 Subject: [PATCH 125/562] scsi: target: iscsi: Remove the iscsi_data_count structure This patch removes the iscsi_data_count structure and the iscsit_do_rx_data() function because they are used only by rx_data() Link: https://lore.kernel.org/r/20200424113913.17237-1-mlombard@redhat.com Reviewed-by: Mike Christie Signed-off-by: Maurizio Lombardi Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target_util.c | 30 +++++------------------- include/target/iscsi/iscsi_target_core.h | 10 -------- 2 files changed, 6 insertions(+), 34 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 89183b3b178f..45ba07c6ec27 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -1228,18 +1228,20 @@ void iscsit_print_session_params(struct iscsi_session *sess) iscsi_dump_sess_ops(sess->sess_ops); } -static int iscsit_do_rx_data( +int rx_data( struct iscsi_conn *conn, - struct iscsi_data_count *count) + struct kvec *iov, + int iov_count, + int data) { - int data = count->data_length, rx_loop = 0, total_rx = 0; + int rx_loop = 0, total_rx = 0; struct msghdr msg; if (!conn || !conn->sock || !conn->conn_ops) return -1; memset(&msg, 0, sizeof(struct msghdr)); - iov_iter_kvec(&msg.msg_iter, READ, count->iov, count->iov_count, data); + iov_iter_kvec(&msg.msg_iter, READ, iov, iov_count, data); while (msg_data_left(&msg)) { rx_loop = sock_recvmsg(conn->sock, &msg, MSG_WAITALL); @@ -1256,26 +1258,6 @@ static int iscsit_do_rx_data( return total_rx; } -int rx_data( - struct iscsi_conn *conn, - struct kvec *iov, - int iov_count, - int data) -{ - struct iscsi_data_count c; - - if (!conn || !conn->sock || !conn->conn_ops) - return -1; - - memset(&c, 0, sizeof(struct iscsi_data_count)); - c.iov = iov; - c.iov_count = iov_count; - c.data_length = data; - c.type = ISCSI_RX_DATA; - - return iscsit_do_rx_data(conn, &c); -} - int tx_data( struct iscsi_conn *conn, struct kvec *iov, diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 591cd9e4692c..4fda324f4b35 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -301,16 +301,6 @@ struct iscsi_queue_req { struct list_head qr_list; }; -struct iscsi_data_count { - int data_length; - int sync_and_steering; - enum data_count_type type; - u32 iov_count; - u32 ss_iov_count; - u32 ss_marker_count; - struct kvec *iov; -}; - struct iscsi_param_list { bool iser; struct list_head param_list; From f8f794a15adcea250c85cfa51716d09b03507375 Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Sat, 18 Apr 2020 16:07:21 +0800 Subject: [PATCH 126/562] scsi: pmcraid: Replace dma_pool_malloc with dma_pool_zalloc Replace dma_pool_malloc with dma_pool_zalloc to make the code more concise in pmcraid_allocate_control_blocks() function. Link: https://lore.kernel.org/r/1587197241-274646-1-git-send-email-wubo40@huawei.com Signed-off-by: Wu Bo Signed-off-by: Martin K. Petersen --- drivers/scsi/pmcraid.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index 7eb88fe1eb0b..aa9ae2ae8579 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -4652,7 +4652,7 @@ static int pmcraid_allocate_control_blocks(struct pmcraid_instance *pinstance) for (i = 0; i < PMCRAID_MAX_CMD; i++) { pinstance->cmd_list[i]->ioa_cb = - dma_pool_alloc( + dma_pool_zalloc( pinstance->control_pool, GFP_KERNEL, &(pinstance->cmd_list[i]->ioa_cb_bus_addr)); @@ -4661,8 +4661,6 @@ static int pmcraid_allocate_control_blocks(struct pmcraid_instance *pinstance) pmcraid_release_control_blocks(pinstance, i); return -ENOMEM; } - memset(pinstance->cmd_list[i]->ioa_cb, 0, - sizeof(struct pmcraid_control_block)); } return 0; } From f166021c0f51ffdb1661330ef8c8a5bb273560b8 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Sat, 18 Apr 2020 15:05:53 +0800 Subject: [PATCH 127/562] scsi: bfa: Remove unneeded semicolon in bfa_fcs_lport_ns_sm_online() Fix the following coccicheck warning: drivers/scsi/bfa/bfa_fcs_lport.c:4361:3-4: Unneeded semicolon Link: https://lore.kernel.org/r/20200418070553.11262-1-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfa_fcs_lport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/bfa/bfa_fcs_lport.c b/drivers/scsi/bfa/bfa_fcs_lport.c index e09bf0729deb..297a77f5806c 100644 --- a/drivers/scsi/bfa/bfa_fcs_lport.c +++ b/drivers/scsi/bfa/bfa_fcs_lport.c @@ -4358,7 +4358,7 @@ bfa_fcs_lport_ns_sm_online(struct bfa_fcs_lport_ns_s *ns, bfa_sm_set_state(ns, bfa_fcs_lport_ns_sm_sending_gid_ft); bfa_fcs_lport_ns_send_gid_ft(ns, NULL); - }; + } break; default: From 2e9ef0fcac01a871ab1afcaab9398beeb4d76ea9 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Sat, 18 Apr 2020 17:59:03 +0800 Subject: [PATCH 128/562] scsi: ipr: Remove NULL check before freeing function Fix the following coccicheck warning: drivers/scsi/ipr.c:9533:2-18: WARNING: NULL check before some freeing functions is not needed. Link: https://lore.kernel.org/r/20200418095903.35118-1-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/ipr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 0db37b4f7265..7d77997d26d4 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -9529,8 +9529,7 @@ static void ipr_free_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) } } - if (ioa_cfg->ipr_cmd_pool) - dma_pool_destroy(ioa_cfg->ipr_cmd_pool); + dma_pool_destroy(ioa_cfg->ipr_cmd_pool); kfree(ioa_cfg->ipr_cmnd_list); kfree(ioa_cfg->ipr_cmnd_list_dma); From baf3fbf26cc003eaf7612ea215d6cc191bd6f850 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Sat, 18 Apr 2020 17:58:50 +0800 Subject: [PATCH 129/562] scsi: mpt3sas: Remove NULL check before freeing function Fix the following coccicheck warning: drivers/scsi/mpt3sas/mpt3sas_base.c:4906:3-19: WARNING: NULL check before some freeing functions is not needed. Link: https://lore.kernel.org/r/20200418095850.34883-1-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index e9471e501ea7..7416242624f0 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -4887,8 +4887,7 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) ioc->pcie_sg_lookup[i].pcie_sgl, ioc->pcie_sg_lookup[i].pcie_sgl_dma); } - if (ioc->pcie_sgl_dma_pool) - dma_pool_destroy(ioc->pcie_sgl_dma_pool); + dma_pool_destroy(ioc->pcie_sgl_dma_pool); } if (ioc->config_page) { From 6942d531e2d22a72492502be010fee9030c58993 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Sat, 18 Apr 2020 15:06:15 +0800 Subject: [PATCH 130/562] scsi: snic: Make snic_io_exch_ver_cmpl_handler() return void This function does not need a return value since no callers depend on it. Make it return void. This also fixes the coccicheck warning: drivers/scsi/snic/snic_ctl.c:163:5-8: Unneeded variable: "ret". Return "0" on line 228 Link: https://lore.kernel.org/r/20200418070615.11603-1-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/snic/snic.h | 2 +- drivers/scsi/snic/snic_ctl.c | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/snic/snic.h b/drivers/scsi/snic/snic.h index de0ab5fc8474..f4c666285bba 100644 --- a/drivers/scsi/snic/snic.h +++ b/drivers/scsi/snic/snic.h @@ -399,7 +399,7 @@ void snic_handle_link_event(struct snic *); void snic_handle_link(struct work_struct *); int snic_queue_exch_ver_req(struct snic *); -int snic_io_exch_ver_cmpl_handler(struct snic *, struct snic_fw_req *); +void snic_io_exch_ver_cmpl_handler(struct snic *, struct snic_fw_req *); int snic_queue_wq_desc(struct snic *, void *os_buf, u16 len); diff --git a/drivers/scsi/snic/snic_ctl.c b/drivers/scsi/snic/snic_ctl.c index 449b03f3bbd3..4cd86115cfb2 100644 --- a/drivers/scsi/snic/snic_ctl.c +++ b/drivers/scsi/snic/snic_ctl.c @@ -151,7 +151,7 @@ snic_queue_exch_ver_req(struct snic *snic) /* * snic_io_exch_ver_cmpl_handler */ -int +void snic_io_exch_ver_cmpl_handler(struct snic *snic, struct snic_fw_req *fwreq) { struct snic_req_info *rqi = NULL; @@ -160,7 +160,6 @@ snic_io_exch_ver_cmpl_handler(struct snic *snic, struct snic_fw_req *fwreq) u32 cmnd_id, hid, max_sgs; ulong ctx = 0; unsigned long flags; - int ret = 0; SNIC_HOST_INFO(snic->shost, "Exch Ver Compl Received.\n"); snic_io_hdr_dec(&fwreq->hdr, &typ, &hdr_stat, &cmnd_id, &hid, &ctx); @@ -224,8 +223,6 @@ snic_io_exch_ver_cmpl_handler(struct snic *snic, struct snic_fw_req *fwreq) snic_release_untagged_req(snic, rqi); SNIC_HOST_INFO(snic->shost, "Exch_cmpl Done, hdr_stat %d.\n", hdr_stat); - - return ret; } /* end of snic_io_exch_ver_cmpl_handler */ /* From 0745c834f7937bee2fe7de5c500ea4d6aa38ce35 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 18 Apr 2020 07:10:57 +0000 Subject: [PATCH 131/562] scsi: bfa: Remove set but not used variable 'fchs' Fixes gcc '-Wunused-but-set-variable' warning: drivers/scsi/bfa/bfa_svc.c: In function 'uf_recv': drivers/scsi/bfa/bfa_svc.c:5520:17: warning: variable 'fchs' set but not used [-Wunused-but-set-variable] struct fchs_s *fchs; ^ Link: https://lore.kernel.org/r/20200418071057.96699-1-yuehaibing@huawei.com Signed-off-by: YueHaibing Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfa_svc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/scsi/bfa/bfa_svc.c b/drivers/scsi/bfa/bfa_svc.c index d8a9e40fa257..0b7d2e8f4a66 100644 --- a/drivers/scsi/bfa/bfa_svc.c +++ b/drivers/scsi/bfa/bfa_svc.c @@ -5517,7 +5517,6 @@ uf_recv(struct bfa_s *bfa, struct bfi_uf_frm_rcvd_s *m) struct bfa_uf_s *uf = &ufm->uf_list[uf_tag]; struct bfa_uf_buf_s *uf_buf; uint8_t *buf; - struct fchs_s *fchs; uf_buf = (struct bfa_uf_buf_s *) bfa_mem_get_dmabuf_kva(ufm, uf_tag, uf->pb_len); @@ -5526,8 +5525,6 @@ uf_recv(struct bfa_s *bfa, struct bfi_uf_frm_rcvd_s *m) m->frm_len = be16_to_cpu(m->frm_len); m->xfr_len = be16_to_cpu(m->xfr_len); - fchs = (struct fchs_s *)uf_buf; - list_del(&uf->qe); /* dequeue from posted queue */ uf->data_ptr = buf; From f71ded01cc3ff8da997b6e910fe3eb59e2336e39 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 21 Apr 2020 11:39:57 +0800 Subject: [PATCH 132/562] scsi: bfa: Remove unneeded semicolon in bfa_fcs_rport.c Fix the following coccicheck warning: drivers/scsi/bfa/bfa_fcs_rport.c:2452:2-3: Unneeded semicolon drivers/scsi/bfa/bfa_fcs_rport.c:1578:3-4: Unneeded semicolon Link: https://lore.kernel.org/r/20200421033957.27783-1-yanaijie@huawei.com Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfa_fcs_rport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/bfa/bfa_fcs_rport.c b/drivers/scsi/bfa/bfa_fcs_rport.c index 82801b366500..fc294e1950a6 100644 --- a/drivers/scsi/bfa/bfa_fcs_rport.c +++ b/drivers/scsi/bfa/bfa_fcs_rport.c @@ -1575,7 +1575,7 @@ bfa_fcs_rport_sm_nsdisc_sent(struct bfa_fcs_rport_s *rport, bfa_timer_start(rport->fcs->bfa, &rport->timer, bfa_fcs_rport_timeout, rport, bfa_fcs_rport_del_timeout); - }; + } break; case RPSM_EVENT_DELETE: @@ -2449,7 +2449,7 @@ bfa_fcs_rport_hal_online_action(struct bfa_fcs_rport_s *rport) bfa_fcs_itnim_brp_online(rport->itnim); if (!BFA_FCS_PID_IS_WKA(rport->pid)) bfa_fcs_rpf_rport_online(rport); - }; + } wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(port)); wwn2str(rpwwn_buf, rport->pwwn); From acfcb728bd57f3f04ae82050c9c5903cee077b08 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 21 Apr 2020 11:40:19 +0800 Subject: [PATCH 133/562] scsi: bnx2fc: Remove unneeded semicolon in bnx2fc_fcoe.c Fix the following coccicheck warning: drivers/scsi/bnx2fc/bnx2fc_fcoe.c:948:4-5: Unneeded semicolon drivers/scsi/bnx2fc/bnx2fc_fcoe.c:968:4-5: Unneeded semicolon Link: https://lore.kernel.org/r/20200421034019.27949-1-yanaijie@huawei.com Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 1cbb431fa682..0e33324e16f5 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -945,7 +945,7 @@ static void bnx2fc_indicate_netevent(void *context, unsigned long event, */ if (interface->enabled) fcoe_ctlr_link_up(ctlr); - }; + } } else if (fcoe_ctlr_link_down(ctlr)) { switch (cdev->enabled) { case FCOE_CTLR_DISABLED: @@ -965,7 +965,7 @@ static void bnx2fc_indicate_netevent(void *context, unsigned long event, put_cpu(); fcoe_clean_pending_queue(lport); wait_for_upload = 1; - }; + } } } mutex_unlock(&bnx2fc_dev_lock); From 8d5e202802a5de1f81c445cacbb9359fcd4e6f23 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 21 Apr 2020 11:40:50 +0800 Subject: [PATCH 134/562] scsi: isci: Use true, false for bool variables Fix the following coccicheck warning: drivers/scsi/isci/isci.h:515:1-12: WARNING: Assignment of 0/1 to bool variable drivers/scsi/isci/isci.h:503:1-12: WARNING: Assignment of 0/1 to bool variable drivers/scsi/isci/isci.h:509:1-12: WARNING: Assignment of 0/1 to bool variable Link: https://lore.kernel.org/r/20200421034050.28193-1-yanaijie@huawei.com Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/isci/isci.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/isci/isci.h b/drivers/scsi/isci/isci.h index 680e30947671..4e6b1decbca7 100644 --- a/drivers/scsi/isci/isci.h +++ b/drivers/scsi/isci/isci.h @@ -500,19 +500,19 @@ struct sci_timer { static inline void sci_init_timer(struct sci_timer *tmr, void (*fn)(struct timer_list *t)) { - tmr->cancel = 0; + tmr->cancel = false; timer_setup(&tmr->timer, fn, 0); } static inline void sci_mod_timer(struct sci_timer *tmr, unsigned long msec) { - tmr->cancel = 0; + tmr->cancel = false; mod_timer(&tmr->timer, jiffies + msecs_to_jiffies(msec)); } static inline void sci_del_timer(struct sci_timer *tmr) { - tmr->cancel = 1; + tmr->cancel = true; del_timer(&tmr->timer); } From 9b77c9da6a1f0bd89f130710671b2396239b1ee4 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 21 Apr 2020 11:40:38 +0800 Subject: [PATCH 135/562] scsi: qla4xxx: Remove unneeded semicolon in ql4_os.c Fix the following coccicheck warning: drivers/scsi/qla4xxx/ql4_os.c:969:3-4: Unneeded semicolon Link: https://lore.kernel.org/r/20200421034038.28113-1-yanaijie@huawei.com Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/qla4xxx/ql4_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 5504ab11decc..5dc697ce8b5d 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -966,7 +966,7 @@ static int qla4xxx_set_chap_entry(struct Scsi_Host *shost, void *data, int len) "%s: No such sysfs attribute\n", __func__); rc = -ENOSYS; goto exit_set_chap; - }; + } } if (chap_rec.chap_type == CHAP_TYPE_IN) From f371d53453772bcbb9ac2201029b66281f713c07 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 21 Apr 2020 11:40:29 +0800 Subject: [PATCH 136/562] scsi: sgiwd93: Remove unneeded semicolon in sgiwd93.c Fix the following coccicheck warning: drivers/scsi/sgiwd93.c:190:2-3: Unneeded semicolon Link: https://lore.kernel.org/r/20200421034029.28030-1-yanaijie@huawei.com Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/sgiwd93.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sgiwd93.c b/drivers/scsi/sgiwd93.c index 713bce998b0e..3bdf0deb8f15 100644 --- a/drivers/scsi/sgiwd93.c +++ b/drivers/scsi/sgiwd93.c @@ -187,7 +187,7 @@ static inline void init_hpc_chain(struct ip22_hostdata *hdata) hcp++; dma += sizeof(struct hpc_chunk); start += sizeof(struct hpc_chunk); - }; + } hcp--; hcp->desc.pnext = hdata->dma; } From f7854c382240c1686900b2f098b36430c6f5047e Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 12 Apr 2020 11:40:39 +0200 Subject: [PATCH 137/562] scsi: aacraid: Fix error handling paths in aac_probe_one() If 'scsi_host_alloc()' or 'kcalloc()' fail, 'error' is known to be 0. Set it explicitly to -ENOMEM before branching to the error handling path. While at it, remove 2 useless assignments to 'error'. These values are overwridden a few lines later. Link: https://lore.kernel.org/r/20200412094039.8822-1-christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/linit.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 83a60b0a8cd8..a0a2b3abc512 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -1632,7 +1632,7 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) struct Scsi_Host *shost; struct aac_dev *aac; struct list_head *insert = &aac_devices; - int error = -ENODEV; + int error; int unique_id = 0; u64 dmamask; int mask_bits = 0; @@ -1657,7 +1657,6 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) error = pci_enable_device(pdev); if (error) goto out; - error = -ENODEV; if (!(aac_drivers[index].quirks & AAC_QUIRK_SRC)) { error = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); @@ -1689,8 +1688,10 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_master(pdev); shost = scsi_host_alloc(&aac_driver_template, sizeof(struct aac_dev)); - if (!shost) + if (!shost) { + error = -ENOMEM; goto out_disable_pdev; + } shost->irq = pdev->irq; shost->unique_id = unique_id; @@ -1714,8 +1715,11 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) aac->fibs = kcalloc(shost->can_queue + AAC_NUM_MGT_FIB, sizeof(struct fib), GFP_KERNEL); - if (!aac->fibs) + if (!aac->fibs) { + error = -ENOMEM; goto out_free_host; + } + spin_lock_init(&aac->fib_lock); mutex_init(&aac->ioctl_mutex); From 655da8e57a46c009a07373c4fdcf9221b5a9b9a6 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sun, 26 Apr 2020 18:48:44 -0700 Subject: [PATCH 138/562] scsi: sr: Use {get,put}_unaligned_be*() instead of open-coding these functions This patch makes the sr code slightly easier to read. Link: https://lore.kernel.org/r/20200427014844.12109-1-bvanassche@acm.org Cc: Merlijn Wajer Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sr.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index d2fe3fa470f9..7727893238c7 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -51,6 +51,8 @@ #include #include +#include + #include #include #include @@ -344,10 +346,8 @@ static int sr_done(struct scsi_cmnd *SCpnt) case ILLEGAL_REQUEST: if (!(SCpnt->sense_buffer[0] & 0x90)) break; - error_sector = (SCpnt->sense_buffer[3] << 24) | - (SCpnt->sense_buffer[4] << 16) | - (SCpnt->sense_buffer[5] << 8) | - SCpnt->sense_buffer[6]; + error_sector = + get_unaligned_be32(&SCpnt->sense_buffer[3]); if (SCpnt->request->bio != NULL) block_sectors = bio_sectors(SCpnt->request->bio); @@ -495,13 +495,9 @@ static blk_status_t sr_init_command(struct scsi_cmnd *SCpnt) SCpnt->sdb.length = this_count * s_size; } - SCpnt->cmnd[2] = (unsigned char) (block >> 24) & 0xff; - SCpnt->cmnd[3] = (unsigned char) (block >> 16) & 0xff; - SCpnt->cmnd[4] = (unsigned char) (block >> 8) & 0xff; - SCpnt->cmnd[5] = (unsigned char) block & 0xff; + put_unaligned_be32(block, &SCpnt->cmnd[2]); SCpnt->cmnd[6] = SCpnt->cmnd[9] = 0; - SCpnt->cmnd[7] = (unsigned char) (this_count >> 8) & 0xff; - SCpnt->cmnd[8] = (unsigned char) this_count & 0xff; + put_unaligned_be16(this_count, &SCpnt->cmnd[7]); /* * We shouldn't disconnect in the middle of a sector, so with a dumb @@ -854,8 +850,7 @@ static void get_sectorsize(struct scsi_cd *cd) } else { long last_written; - cd->capacity = 1 + ((buffer[0] << 24) | (buffer[1] << 16) | - (buffer[2] << 8) | buffer[3]); + cd->capacity = 1 + get_unaligned_be32(&buffer[0]); /* * READ_CAPACITY doesn't return the correct size on * certain UDF media. If last_written is larger, use @@ -866,8 +861,7 @@ static void get_sectorsize(struct scsi_cd *cd) if (!cdrom_get_last_written(&cd->cdi, &last_written)) cd->capacity = max_t(long, cd->capacity, last_written); - sector_size = (buffer[4] << 24) | - (buffer[5] << 16) | (buffer[6] << 8) | buffer[7]; + sector_size = get_unaligned_be32(&buffer[4]); switch (sector_size) { /* * HP 4020i CD-Recorder reports 2340 byte sectors @@ -955,13 +949,13 @@ static void get_capabilities(struct scsi_cd *cd) } n = data.header_length + data.block_descriptor_length; - cd->cdi.speed = ((buffer[n + 8] << 8) + buffer[n + 9]) / 176; + cd->cdi.speed = get_unaligned_be16(&buffer[n + 8]) / 176; cd->readcd_known = 1; cd->readcd_cdda = buffer[n + 5] & 0x01; /* print some capability bits */ sr_printk(KERN_INFO, cd, "scsi3-mmc drive: %dx/%dx %s%s%s%s%s%s\n", - ((buffer[n + 14] << 8) + buffer[n + 15]) / 176, + get_unaligned_be16(&buffer[n + 14]) / 176, cd->cdi.speed, buffer[n + 3] & 0x01 ? "writer " : "", /* CD Writer */ buffer[n + 3] & 0x20 ? "dvd-ram " : "", From 7dfdcc393dcdd63f4ea15d06c97a719451cfbb77 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Sun, 26 Apr 2020 17:43:05 +0800 Subject: [PATCH 139/562] scsi: ufs: Use true for bool variables in ufshcd_complete_dev_init() Fix the following coccicheck warning: drivers/scsi/ufs/ufshcd.c:4140:6-14: WARNING: Assignment of 0/1 to bool variable. Link: https://lore.kernel.org/r/20200426094305.24083-1-yanaijie@huawei.com Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 7d1fa1349d40..0291117dd987 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -4137,7 +4137,7 @@ static int ufshcd_complete_dev_init(struct ufs_hba *hba) { int i; int err; - bool flag_res = 1; + bool flag_res = true; err = ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_SET_FLAG, QUERY_FLAG_IDN_FDEVICEINIT, NULL); From 3d17b9b5ab11556b2fea07d4f24154095a685ad2 Mon Sep 17 00:00:00 2001 From: Asutosh Das Date: Wed, 22 Apr 2020 14:41:42 -0700 Subject: [PATCH 140/562] scsi: ufs: Add write booster feature support The write performance of TLC NAND is considerably lower than SLC NAND. Using SLC NAND as a WriteBooster Buffer enables the write request to be processed with lower latency and improves the overall write performance. Adds support for shared-buffer mode WriteBooster. WriteBooster enable: SW enables it when clocks are scaled up, thus it's enabled only in high load conditions. WriteBooster disable: SW will disable the feature, when clocks are scaled down. Thus writes would go as normal writes. To keep the endurance of the WriteBooster Buffer at a maximum, this load-based toggling is adopted. Link: https://lore.kernel.org/r/2871444d9083b0e9323ef6d8ff1b544b7784adc9.1587591527.git.asutoshd@codeaurora.org Reviewed-by: Avri Altman Signed-off-by: Asutosh Das Signed-off-by: Subhash Jadavani Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs.h | 31 ++++- drivers/scsi/ufs/ufshcd.c | 252 +++++++++++++++++++++++++++++++++++++- drivers/scsi/ufs/ufshcd.h | 14 +++ 3 files changed, 291 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h index 990cb48e2403..d77512d964bc 100644 --- a/drivers/scsi/ufs/ufs.h +++ b/drivers/scsi/ufs/ufs.h @@ -140,6 +140,9 @@ enum flag_idn { QUERY_FLAG_IDN_BUSY_RTC = 0x09, QUERY_FLAG_IDN_RESERVED3 = 0x0A, QUERY_FLAG_IDN_PERMANENTLY_DISABLE_FW_UPDATE = 0x0B, + QUERY_FLAG_IDN_WB_EN = 0x0E, + QUERY_FLAG_IDN_WB_BUFF_FLUSH_EN = 0x0F, + QUERY_FLAG_IDN_WB_BUFF_FLUSH_DURING_HIBERN8 = 0x10, }; /* Attribute idn for Query requests */ @@ -168,6 +171,10 @@ enum attr_idn { QUERY_ATTR_IDN_PSA_STATE = 0x15, QUERY_ATTR_IDN_PSA_DATA_SIZE = 0x16, QUERY_ATTR_IDN_REF_CLK_GATING_WAIT_TIME = 0x17, + QUERY_ATTR_IDN_WB_FLUSH_STATUS = 0x1C, + QUERY_ATTR_IDN_AVAIL_WB_BUFF_SIZE = 0x1D, + QUERY_ATTR_IDN_WB_BUFF_LIFE_TIME_EST = 0x1E, + QUERY_ATTR_IDN_CURR_WB_BUFF_SIZE = 0x1F, }; /* Descriptor idn for Query requests */ @@ -191,9 +198,9 @@ enum desc_header_offset { }; enum ufs_desc_def_size { - QUERY_DESC_DEVICE_DEF_SIZE = 0x40, + QUERY_DESC_DEVICE_DEF_SIZE = 0x59, QUERY_DESC_CONFIGURATION_DEF_SIZE = 0x90, - QUERY_DESC_UNIT_DEF_SIZE = 0x23, + QUERY_DESC_UNIT_DEF_SIZE = 0x2D, QUERY_DESC_INTERCONNECT_DEF_SIZE = 0x06, QUERY_DESC_GEOMETRY_DEF_SIZE = 0x48, QUERY_DESC_POWER_DEF_SIZE = 0x62, @@ -219,6 +226,7 @@ enum unit_desc_param { UNIT_DESC_PARAM_PHY_MEM_RSRC_CNT = 0x18, UNIT_DESC_PARAM_CTX_CAPABILITIES = 0x20, UNIT_DESC_PARAM_LARGE_UNIT_SIZE_M1 = 0x22, + UNIT_DESC_PARAM_WB_BUF_ALLOC_UNITS = 0x29, }; /* Device descriptor parameters offsets in bytes*/ @@ -258,6 +266,10 @@ enum device_desc_param { DEVICE_DESC_PARAM_PSA_MAX_DATA = 0x25, DEVICE_DESC_PARAM_PSA_TMT = 0x29, DEVICE_DESC_PARAM_PRDCT_REV = 0x2A, + DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP = 0x4F, + DEVICE_DESC_PARAM_WB_PRESRV_USRSPC_EN = 0x53, + DEVICE_DESC_PARAM_WB_TYPE = 0x54, + DEVICE_DESC_PARAM_WB_SHARED_ALLOC_UNITS = 0x55, }; /* Interconnect descriptor parameters offsets in bytes*/ @@ -333,6 +345,11 @@ enum { UFSHCD_AMP = 3, }; +/* Possible values for dExtendedUFSFeaturesSupport */ +enum { + UFS_DEV_WRITE_BOOSTER_SUP = BIT(8), +}; + #define POWER_DESC_MAX_SIZE 0x62 #define POWER_DESC_MAX_ACTV_ICC_LVLS 16 @@ -447,6 +464,11 @@ enum ufs_dev_pwr_mode { UFS_POWERDOWN_PWR_MODE = 3, }; +enum ufs_dev_wb_buf_avail_size { + UFS_WB_10_PERCENT_BUF_REMAIN = 0x1, + UFS_WB_40_PERCENT_BUF_REMAIN = 0x4, +}; + /** * struct utp_cmd_rsp - Response UPIU structure * @residual_transfer_count: Residual transfer count DW-3 @@ -537,6 +559,11 @@ struct ufs_dev_info { u8 *model; u16 wspecversion; u32 clk_gating_wait_us; + u32 d_ext_ufs_feature_sup; + u8 b_wb_buffer_type; + u32 d_wb_alloc_units; + bool keep_vcc_on; + u8 b_presrv_uspc_en; }; /** diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 0291117dd987..1827b57eb7db 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -48,6 +48,8 @@ #include "unipro.h" #include "ufs-sysfs.h" #include "ufs_bsg.h" +#include +#include #define CREATE_TRACE_POINTS #include @@ -251,6 +253,13 @@ static int ufshcd_scale_clks(struct ufs_hba *hba, bool scale_up); static irqreturn_t ufshcd_intr(int irq, void *__hba); static int ufshcd_change_power_mode(struct ufs_hba *hba, struct ufs_pa_layer_attr *pwr_mode); +static bool ufshcd_wb_sup(struct ufs_hba *hba); +static int ufshcd_wb_buf_flush_enable(struct ufs_hba *hba); +static int ufshcd_wb_buf_flush_disable(struct ufs_hba *hba); +static int ufshcd_wb_ctrl(struct ufs_hba *hba, bool enable); +static int ufshcd_wb_toggle_flush_during_h8(struct ufs_hba *hba, bool set); +static inline void ufshcd_wb_toggle_flush(struct ufs_hba *hba, bool enable); + static inline bool ufshcd_valid_tag(struct ufs_hba *hba, int tag) { return tag >= 0 && tag < hba->nutrs; @@ -272,6 +281,25 @@ static inline void ufshcd_disable_irq(struct ufs_hba *hba) } } +static inline void ufshcd_wb_config(struct ufs_hba *hba) +{ + int ret; + + if (!ufshcd_wb_sup(hba)) + return; + + ret = ufshcd_wb_ctrl(hba, true); + if (ret) + dev_err(hba->dev, "%s: Enable WB failed: %d\n", __func__, ret); + else + dev_info(hba->dev, "%s: Write Booster Configured\n", __func__); + ret = ufshcd_wb_toggle_flush_during_h8(hba, true); + if (ret) + dev_err(hba->dev, "%s: En WB flush during H8: failed: %d\n", + __func__, ret); + ufshcd_wb_toggle_flush(hba, true); +} + static void ufshcd_scsi_unblock_requests(struct ufs_hba *hba) { if (atomic_dec_and_test(&hba->scsi_block_reqs_cnt)) @@ -1150,10 +1178,17 @@ static int ufshcd_devfreq_scale(struct ufs_hba *hba, bool scale_up) /* scale up the gear after scaling up clocks */ if (scale_up) { ret = ufshcd_scale_gear(hba, true); - if (ret) + if (ret) { ufshcd_scale_clks(hba, false); + goto out_unprepare; + } } + /* Enable Write Booster if we have scaled up else disable it */ + up_write(&hba->clk_scaling_lock); + ufshcd_wb_ctrl(hba, scale_up); + down_write(&hba->clk_scaling_lock); + out_unprepare: ufshcd_clock_scaling_unprepare(hba); out: @@ -5161,6 +5196,166 @@ static void ufshcd_bkops_exception_event_handler(struct ufs_hba *hba) __func__, err); } +static bool ufshcd_wb_sup(struct ufs_hba *hba) +{ + return ufshcd_is_wb_allowed(hba); +} + +static int ufshcd_wb_ctrl(struct ufs_hba *hba, bool enable) +{ + int ret; + enum query_opcode opcode; + + if (!ufshcd_wb_sup(hba)) + return 0; + + if (!(enable ^ hba->wb_enabled)) + return 0; + if (enable) + opcode = UPIU_QUERY_OPCODE_SET_FLAG; + else + opcode = UPIU_QUERY_OPCODE_CLEAR_FLAG; + + ret = ufshcd_query_flag_retry(hba, opcode, + QUERY_FLAG_IDN_WB_EN, NULL); + if (ret) { + dev_err(hba->dev, "%s write booster %s failed %d\n", + __func__, enable ? "enable" : "disable", ret); + return ret; + } + + hba->wb_enabled = enable; + dev_dbg(hba->dev, "%s write booster %s %d\n", + __func__, enable ? "enable" : "disable", ret); + + return ret; +} + +static int ufshcd_wb_toggle_flush_during_h8(struct ufs_hba *hba, bool set) +{ + int val; + + if (set) + val = UPIU_QUERY_OPCODE_SET_FLAG; + else + val = UPIU_QUERY_OPCODE_CLEAR_FLAG; + + return ufshcd_query_flag_retry(hba, val, + QUERY_FLAG_IDN_WB_BUFF_FLUSH_DURING_HIBERN8, + NULL); +} + +static inline void ufshcd_wb_toggle_flush(struct ufs_hba *hba, bool enable) +{ + if (enable) + ufshcd_wb_buf_flush_enable(hba); + else + ufshcd_wb_buf_flush_disable(hba); + +} + +static int ufshcd_wb_buf_flush_enable(struct ufs_hba *hba) +{ + int ret; + + if (!ufshcd_wb_sup(hba) || hba->wb_buf_flush_enabled) + return 0; + + ret = ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_SET_FLAG, + QUERY_FLAG_IDN_WB_BUFF_FLUSH_EN, NULL); + if (ret) + dev_err(hba->dev, "%s WB - buf flush enable failed %d\n", + __func__, ret); + else + hba->wb_buf_flush_enabled = true; + + dev_dbg(hba->dev, "WB - Flush enabled: %d\n", ret); + return ret; +} + +static int ufshcd_wb_buf_flush_disable(struct ufs_hba *hba) +{ + int ret; + + if (!ufshcd_wb_sup(hba) || !hba->wb_buf_flush_enabled) + return 0; + + ret = ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_CLEAR_FLAG, + QUERY_FLAG_IDN_WB_BUFF_FLUSH_EN, NULL); + if (ret) { + dev_warn(hba->dev, "%s: WB - buf flush disable failed %d\n", + __func__, ret); + } else { + hba->wb_buf_flush_enabled = false; + dev_dbg(hba->dev, "WB - Flush disabled: %d\n", ret); + } + + return ret; +} + +static bool ufshcd_wb_presrv_usrspc_keep_vcc_on(struct ufs_hba *hba, + u32 avail_buf) +{ + u32 cur_buf; + int ret; + + ret = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR, + QUERY_ATTR_IDN_CURR_WB_BUFF_SIZE, + 0, 0, &cur_buf); + if (ret) { + dev_err(hba->dev, "%s dCurWriteBoosterBufferSize read failed %d\n", + __func__, ret); + return false; + } + + if (!cur_buf) { + dev_info(hba->dev, "dCurWBBuf: %d WB disabled until free-space is available\n", + cur_buf); + return false; + } + /* Let it continue to flush when >60% full */ + if (avail_buf < UFS_WB_40_PERCENT_BUF_REMAIN) + return true; + + return false; +} + +static bool ufshcd_wb_keep_vcc_on(struct ufs_hba *hba) +{ + int ret; + u32 avail_buf; + + if (!ufshcd_wb_sup(hba)) + return false; + /* + * The ufs device needs the vcc to be ON to flush. + * With user-space reduction enabled, it's enough to enable flush + * by checking only the available buffer. The threshold + * defined here is > 90% full. + * With user-space preserved enabled, the current-buffer + * should be checked too because the wb buffer size can reduce + * when disk tends to be full. This info is provided by current + * buffer (dCurrentWriteBoosterBufferSize). There's no point in + * keeping vcc on when current buffer is empty. + */ + ret = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR, + QUERY_ATTR_IDN_AVAIL_WB_BUFF_SIZE, + 0, 0, &avail_buf); + if (ret) { + dev_warn(hba->dev, "%s dAvailableWriteBoosterBufferSize read failed %d\n", + __func__, ret); + return false; + } + + if (!hba->dev_info.b_presrv_uspc_en) { + if (avail_buf <= UFS_WB_10_PERCENT_BUF_REMAIN) + return true; + return false; + } + + return ufshcd_wb_presrv_usrspc_keep_vcc_on(hba, avail_buf); +} + /** * ufshcd_exception_event_handler - handle exceptions raised by device * @work: pointer to work data @@ -6603,6 +6798,33 @@ static int ufshcd_scsi_add_wlus(struct ufs_hba *hba) return ret; } +static void ufshcd_wb_probe(struct ufs_hba *hba, u8 *desc_buf) +{ + hba->dev_info.d_ext_ufs_feature_sup = + get_unaligned_be32(desc_buf + + DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP); + /* + * WB may be supported but not configured while provisioning. + * The spec says, in dedicated wb buffer mode, + * a max of 1 lun would have wb buffer configured. + * Now only shared buffer mode is supported. + */ + hba->dev_info.b_wb_buffer_type = + desc_buf[DEVICE_DESC_PARAM_WB_TYPE]; + + hba->dev_info.d_wb_alloc_units = + get_unaligned_be32(desc_buf + + DEVICE_DESC_PARAM_WB_SHARED_ALLOC_UNITS); + hba->dev_info.b_presrv_uspc_en = + desc_buf[DEVICE_DESC_PARAM_WB_PRESRV_USRSPC_EN]; + + if (!((hba->dev_info.d_ext_ufs_feature_sup & + UFS_DEV_WRITE_BOOSTER_SUP) && + hba->dev_info.b_wb_buffer_type && + hba->dev_info.d_wb_alloc_units)) + hba->caps &= ~UFSHCD_CAP_WB_EN; +} + static int ufs_get_device_desc(struct ufs_hba *hba) { int err; @@ -6639,6 +6861,11 @@ static int ufs_get_device_desc(struct ufs_hba *hba) desc_buf[DEVICE_DESC_PARAM_SPEC_VER + 1]; model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME]; + + /* Enable WB only for UFS-3.1 */ + if (dev_info->wspecversion >= 0x310) + ufshcd_wb_probe(hba, desc_buf); + err = ufshcd_read_string_desc(hba, model_index, &dev_info->model, SD_ASCII_STD); if (err < 0) { @@ -7149,6 +7376,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool async) /* set the state as operational after switching to desired gear */ hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL; + ufshcd_wb_config(hba); /* Enable Auto-Hibernate if configured */ ufshcd_auto_hibern8_enable(hba); @@ -7809,12 +8037,16 @@ static void ufshcd_vreg_set_lpm(struct ufs_hba *hba) * * Ignore the error returned by ufshcd_toggle_vreg() as device is anyway * in low power state which would save some power. + * + * If Write Booster is enabled and the device needs to flush the WB + * buffer OR if bkops status is urgent for WB, keep Vcc on. */ if (ufshcd_is_ufs_dev_poweroff(hba) && ufshcd_is_link_off(hba) && !hba->dev_info.is_lu_power_on_wp) { ufshcd_setup_vreg(hba, false); } else if (!ufshcd_is_ufs_dev_active(hba)) { - ufshcd_toggle_vreg(hba->dev, hba->vreg_info.vcc, false); + if (!hba->dev_info.keep_vcc_on) + ufshcd_toggle_vreg(hba->dev, hba->vreg_info.vcc, false); if (!ufshcd_is_link_active(hba)) { ufshcd_config_vreg_lpm(hba, hba->vreg_info.vccq); ufshcd_config_vreg_lpm(hba, hba->vreg_info.vccq2); @@ -7938,11 +8170,23 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) /* make sure that auto bkops is disabled */ ufshcd_disable_auto_bkops(hba); } + /* + * With wb enabled, if the bkops is enabled or if the + * configured WB type is 70% full, keep vcc ON + * for the device to flush the wb buffer + */ + if ((hba->auto_bkops_enabled && ufshcd_wb_sup(hba)) || + ufshcd_wb_keep_vcc_on(hba)) + hba->dev_info.keep_vcc_on = true; + else + hba->dev_info.keep_vcc_on = false; + } else if (!ufshcd_is_runtime_pm(pm_op)) { + hba->dev_info.keep_vcc_on = false; } if ((req_dev_pwr_mode != hba->curr_dev_pwr_mode) && - ((ufshcd_is_runtime_pm(pm_op) && !hba->auto_bkops_enabled) || - !ufshcd_is_runtime_pm(pm_op))) { + ((ufshcd_is_runtime_pm(pm_op) && !hba->auto_bkops_enabled) || + !ufshcd_is_runtime_pm(pm_op))) { /* ensure that bkops is disabled */ ufshcd_disable_auto_bkops(hba); ret = ufshcd_set_dev_pwr_mode(hba, req_dev_pwr_mode); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 6ffc08ad85f6..056537e52c19 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -555,6 +555,13 @@ enum ufshcd_caps { * for userspace to control the power management. */ UFSHCD_CAP_RPM_AUTOSUSPEND = 1 << 6, + + /* + * This capability allows the host controller driver to turn-on + * WriteBooster, if the underlying device supports it and is + * provisioned to be used. This would increase the write performance. + */ + UFSHCD_CAP_WB_EN = 1 << 7, }; /** @@ -727,6 +734,8 @@ struct ufs_hba { struct device bsg_dev; struct request_queue *bsg_queue; + bool wb_buf_flush_enabled; + bool wb_enabled; }; /* Returns true if clocks can be gated. Otherwise false */ @@ -775,6 +784,11 @@ static inline bool ufshcd_is_auto_hibern8_enabled(struct ufs_hba *hba) return FIELD_GET(UFSHCI_AHIBERN8_TIMER_MASK, hba->ahit) ? true : false; } +static inline bool ufshcd_is_wb_allowed(struct ufs_hba *hba) +{ + return hba->caps & UFSHCD_CAP_WB_EN; +} + #define ufshcd_writel(hba, val, reg) \ writel((val), (hba)->mmio_base + (reg)) #define ufshcd_readl(hba, reg) \ From c14e7adf3a6a8d0ce4d16f839f9737ad3481e239 Mon Sep 17 00:00:00 2001 From: Asutosh Das Date: Wed, 22 Apr 2020 14:41:43 -0700 Subject: [PATCH 141/562] scsi: ufs: sysfs: Add sysfs entries for write booster Adds unit, device, geometry descriptor sysfs entries. Adds flags sysfs entries for write booster. Link: https://lore.kernel.org/r/98987ef17844292bd42c57613990a3a26c6de2b8.1587591527.git.asutoshd@codeaurora.org Reviewed-by: Avri Altman Signed-off-by: Asutosh Das Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-sysfs.c | 39 +++++++++++++++++++++++++++++++++++- drivers/scsi/ufs/ufs.h | 5 +++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufs-sysfs.c b/drivers/scsi/ufs/ufs-sysfs.c index 92a63eebdca9..93484408bc40 100644 --- a/drivers/scsi/ufs/ufs-sysfs.c +++ b/drivers/scsi/ufs/ufs-sysfs.c @@ -276,6 +276,10 @@ UFS_DEVICE_DESC_PARAM(device_version, _DEV_VER, 2); UFS_DEVICE_DESC_PARAM(number_of_secure_wpa, _NUM_SEC_WPA, 1); UFS_DEVICE_DESC_PARAM(psa_max_data_size, _PSA_MAX_DATA, 4); UFS_DEVICE_DESC_PARAM(psa_state_timeout, _PSA_TMT, 1); +UFS_DEVICE_DESC_PARAM(ext_feature_sup, _EXT_UFS_FEATURE_SUP, 4); +UFS_DEVICE_DESC_PARAM(wb_presv_us_en, _WB_PRESRV_USRSPC_EN, 1); +UFS_DEVICE_DESC_PARAM(wb_type, _WB_TYPE, 1); +UFS_DEVICE_DESC_PARAM(wb_shared_alloc_units, _WB_SHARED_ALLOC_UNITS, 4); static struct attribute *ufs_sysfs_device_descriptor[] = { &dev_attr_device_type.attr, @@ -304,6 +308,10 @@ static struct attribute *ufs_sysfs_device_descriptor[] = { &dev_attr_number_of_secure_wpa.attr, &dev_attr_psa_max_data_size.attr, &dev_attr_psa_state_timeout.attr, + &dev_attr_ext_feature_sup.attr, + &dev_attr_wb_presv_us_en.attr, + &dev_attr_wb_type.attr, + &dev_attr_wb_shared_alloc_units.attr, NULL, }; @@ -373,6 +381,12 @@ UFS_GEOMETRY_DESC_PARAM(enh4_memory_max_alloc_units, _ENM4_MAX_NUM_UNITS, 4); UFS_GEOMETRY_DESC_PARAM(enh4_memory_capacity_adjustment_factor, _ENM4_CAP_ADJ_FCTR, 2); +UFS_GEOMETRY_DESC_PARAM(wb_max_alloc_units, _WB_MAX_ALLOC_UNITS, 4); +UFS_GEOMETRY_DESC_PARAM(wb_max_wb_luns, _WB_MAX_WB_LUNS, 1); +UFS_GEOMETRY_DESC_PARAM(wb_buff_cap_adj, _WB_BUFF_CAP_ADJ, 1); +UFS_GEOMETRY_DESC_PARAM(wb_sup_red_type, _WB_SUP_RED_TYPE, 1); +UFS_GEOMETRY_DESC_PARAM(wb_sup_wb_type, _WB_SUP_WB_TYPE, 1); + static struct attribute *ufs_sysfs_geometry_descriptor[] = { &dev_attr_raw_device_capacity.attr, @@ -404,6 +418,11 @@ static struct attribute *ufs_sysfs_geometry_descriptor[] = { &dev_attr_enh3_memory_capacity_adjustment_factor.attr, &dev_attr_enh4_memory_max_alloc_units.attr, &dev_attr_enh4_memory_capacity_adjustment_factor.attr, + &dev_attr_wb_max_alloc_units.attr, + &dev_attr_wb_max_wb_luns.attr, + &dev_attr_wb_buff_cap_adj.attr, + &dev_attr_wb_sup_red_type.attr, + &dev_attr_wb_sup_wb_type.attr, NULL, }; @@ -616,7 +635,7 @@ static ssize_t _name##_show(struct device *dev, \ pm_runtime_put_sync(hba->dev); \ if (ret) \ return -EINVAL; \ - return sprintf(buf, "%s\n", flag ? "true" : "false"); \ + return sprintf(buf, "%s\n", flag ? "true" : "false"); \ } \ static DEVICE_ATTR_RO(_name) @@ -628,6 +647,9 @@ UFS_FLAG(life_span_mode_enable, _LIFE_SPAN_MODE_ENABLE); UFS_FLAG(phy_resource_removal, _FPHYRESOURCEREMOVAL); UFS_FLAG(busy_rtc, _BUSY_RTC); UFS_FLAG(disable_fw_update, _PERMANENTLY_DISABLE_FW_UPDATE); +UFS_FLAG(wb_enable, _WB_EN); +UFS_FLAG(wb_flush_en, _WB_BUFF_FLUSH_EN); +UFS_FLAG(wb_flush_during_h8, _WB_BUFF_FLUSH_DURING_HIBERN8); static struct attribute *ufs_sysfs_device_flags[] = { &dev_attr_device_init.attr, @@ -638,6 +660,9 @@ static struct attribute *ufs_sysfs_device_flags[] = { &dev_attr_phy_resource_removal.attr, &dev_attr_busy_rtc.attr, &dev_attr_disable_fw_update.attr, + &dev_attr_wb_enable.attr, + &dev_attr_wb_flush_en.attr, + &dev_attr_wb_flush_during_h8.attr, NULL, }; @@ -679,6 +704,11 @@ UFS_ATTRIBUTE(exception_event_status, _EE_STATUS); UFS_ATTRIBUTE(ffu_status, _FFU_STATUS); UFS_ATTRIBUTE(psa_state, _PSA_STATE); UFS_ATTRIBUTE(psa_data_size, _PSA_DATA_SIZE); +UFS_ATTRIBUTE(wb_flush_status, _WB_FLUSH_STATUS); +UFS_ATTRIBUTE(wb_avail_buf, _AVAIL_WB_BUFF_SIZE); +UFS_ATTRIBUTE(wb_life_time_est, _WB_BUFF_LIFE_TIME_EST); +UFS_ATTRIBUTE(wb_cur_buf, _CURR_WB_BUFF_SIZE); + static struct attribute *ufs_sysfs_attributes[] = { &dev_attr_boot_lun_enabled.attr, @@ -697,6 +727,10 @@ static struct attribute *ufs_sysfs_attributes[] = { &dev_attr_ffu_status.attr, &dev_attr_psa_state.attr, &dev_attr_psa_data_size.attr, + &dev_attr_wb_flush_status.attr, + &dev_attr_wb_avail_buf.attr, + &dev_attr_wb_life_time_est.attr, + &dev_attr_wb_cur_buf.attr, NULL, }; @@ -748,6 +782,8 @@ UFS_UNIT_DESC_PARAM(provisioning_type, _PROVISIONING_TYPE, 1); UFS_UNIT_DESC_PARAM(physical_memory_resourse_count, _PHY_MEM_RSRC_CNT, 8); UFS_UNIT_DESC_PARAM(context_capabilities, _CTX_CAPABILITIES, 2); UFS_UNIT_DESC_PARAM(large_unit_granularity, _LARGE_UNIT_SIZE_M1, 1); +UFS_UNIT_DESC_PARAM(wb_buf_alloc_units, _WB_BUF_ALLOC_UNITS, 4); + static struct attribute *ufs_sysfs_unit_descriptor[] = { &dev_attr_boot_lun_id.attr, @@ -763,6 +799,7 @@ static struct attribute *ufs_sysfs_unit_descriptor[] = { &dev_attr_physical_memory_resourse_count.attr, &dev_attr_context_capabilities.attr, &dev_attr_large_unit_granularity.attr, + &dev_attr_wb_buf_alloc_units.attr, NULL, }; diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h index d77512d964bc..daac5053b850 100644 --- a/drivers/scsi/ufs/ufs.h +++ b/drivers/scsi/ufs/ufs.h @@ -314,6 +314,11 @@ enum geometry_desc_param { GEOMETRY_DESC_PARAM_ENM4_MAX_NUM_UNITS = 0x3E, GEOMETRY_DESC_PARAM_ENM4_CAP_ADJ_FCTR = 0x42, GEOMETRY_DESC_PARAM_OPT_LOG_BLK_SIZE = 0x44, + GEOMETRY_DESC_PARAM_WB_MAX_ALLOC_UNITS = 0x4F, + GEOMETRY_DESC_PARAM_WB_MAX_WB_LUNS = 0x53, + GEOMETRY_DESC_PARAM_WB_BUFF_CAP_ADJ = 0x54, + GEOMETRY_DESC_PARAM_WB_SUP_RED_TYPE = 0x55, + GEOMETRY_DESC_PARAM_WB_SUP_WB_TYPE = 0x56, }; /* Health descriptor parameters offsets in bytes*/ From 04ee8a01abf8aa89d1bcdacffefc9a8879336a2c Mon Sep 17 00:00:00 2001 From: Asutosh Das Date: Wed, 22 Apr 2020 14:41:44 -0700 Subject: [PATCH 142/562] scsi: ufs-qcom: Configure write booster type Enable WriteBooster for Qualcomm platform. Link: https://lore.kernel.org/r/cd4cf745ea0b3a59c2075036e17316b97494fe65.1587591527.git.asutoshd@codeaurora.org Reviewed-by: Avri Altman Signed-off-by: Asutosh Das Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-qcom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index 701e9184adff..00ce8d6cbb36 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -1070,6 +1070,7 @@ static void ufs_qcom_set_caps(struct ufs_hba *hba) hba->caps |= UFSHCD_CAP_CLK_GATING | UFSHCD_CAP_HIBERN8_WITH_CLK_GATING; hba->caps |= UFSHCD_CAP_CLK_SCALING; hba->caps |= UFSHCD_CAP_AUTO_BKOPS_SUSPEND; + hba->caps |= UFSHCD_CAP_WB_EN; if (host->hw_ver.major >= 0x2) { host->caps = UFS_QCOM_CAP_QUNIPRO | From ea941016abf7e6f81b130f8eb792e9ad0971237a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Sun, 19 Apr 2020 02:01:48 -0300 Subject: [PATCH 143/562] scsi: core: doc: Change function comments to kernel-doc style MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Despite of functions being documented, they are not in the kernel-doc specification, and could not be included in kernel documentation. Change the style of functions comments to be compliant to the kernel-doc style. When the function comments are outdated, update then. [mkp: a few edits] Link: https://lore.kernel.org/r/20200419050148.33371-1-andrealmeid@collabora.com Signed-off-by: André Almeida Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 159 ++++++++++++++-------------------------- 1 file changed, 57 insertions(+), 102 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ff0449fbe3a0..53b9ff12e030 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -202,24 +202,17 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy) blk_mq_requeue_request(cmd->request, true); } -/* - * Function: scsi_queue_insert() +/** + * scsi_queue_insert - Reinsert a command in the queue. + * @cmd: command that we are adding to queue. + * @reason: why we are inserting command to queue. * - * Purpose: Insert a command in the midlevel queue. + * We do this for one of two cases. Either the host is busy and it cannot accept + * any more commands for the time being, or the device returned QUEUE_FULL and + * can accept no more commands. * - * Arguments: cmd - command that we are adding to queue. - * reason - why we are inserting command to queue. - * - * Lock status: Assumed that lock is not held upon entry. - * - * Returns: Nothing. - * - * Notes: We do this for one of two cases. Either the host is busy - * and it cannot accept any more commands for the time being, - * or the device returned QUEUE_FULL and can accept no more - * commands. - * Notes: This could be called either from an interrupt context or a - * normal process context. + * Context: This could be called either from an interrupt context or a normal + * process context. */ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) { @@ -301,16 +294,12 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, } EXPORT_SYMBOL(__scsi_execute); -/* - * Function: scsi_init_cmd_errh() +/** + * scsi_init_cmd_errh - Initialize cmd fields related to error handling. + * @cmd: command that is ready to be queued. * - * Purpose: Initialize cmd fields related to error handling. - * - * Arguments: cmd - command that is ready to be queued. - * - * Notes: This function has the job of initializing a number of - * fields related to error handling. Typically this will - * be called once for each command, as required. + * This function has the job of initializing a number of fields related to error + * handling. Typically this will be called once for each command, as required. */ static void scsi_init_cmd_errh(struct scsi_cmnd *cmd) { @@ -496,17 +485,11 @@ static void scsi_starved_list_run(struct Scsi_Host *shost) spin_unlock_irqrestore(shost->host_lock, flags); } -/* - * Function: scsi_run_queue() +/** + * scsi_run_queue - Select a proper request queue to serve next. + * @q: last request's queue * - * Purpose: Select a proper request queue to serve next - * - * Arguments: q - last request's queue - * - * Returns: Nothing - * - * Notes: The previous command was completely finished, start - * a new one if possible. + * The previous command was completely finished, start a new one if possible. */ static void scsi_run_queue(struct request_queue *q) { @@ -896,34 +879,27 @@ static int scsi_io_completion_nz_result(struct scsi_cmnd *cmd, int result, return result; } -/* - * Function: scsi_io_completion() +/** + * scsi_io_completion - Completion processing for SCSI commands. + * @cmd: command that is finished. + * @good_bytes: number of processed bytes. * - * Purpose: Completion processing for block device I/O requests. + * We will finish off the specified number of sectors. If we are done, the + * command block will be released and the queue function will be goosed. If we + * are not done then we have to figure out what to do next: * - * Arguments: cmd - command that is finished. + * a) We can call scsi_io_completion_reprep(). The request will be + * unprepared and put back on the queue. Then a new command will + * be created for it. This should be used if we made forward + * progress, or if we want to switch from READ(10) to READ(6) for + * example. * - * Lock status: Assumed that no lock is held upon entry. + * b) We can call scsi_io_completion_action(). The request will be + * put back on the queue and retried using the same command as + * before, possibly after a delay. * - * Returns: Nothing - * - * Notes: We will finish off the specified number of sectors. If we - * are done, the command block will be released and the queue - * function will be goosed. If we are not done then we have to - * figure out what to do next: - * - * a) We can call scsi_requeue_command(). The request - * will be unprepared and put back on the queue. Then - * a new command will be created for it. This should - * be used if we made forward progress, or if we want - * to switch from READ(10) to READ(6) for example. - * - * b) We can call __scsi_queue_insert(). The request will - * be put back on the queue and retried using the same - * command as before, possibly after a delay. - * - * c) We can call scsi_end_request() with blk_stat other than - * BLK_STS_OK, to fail the remainder of the request. + * c) We can call scsi_end_request() with blk_stat other than + * BLK_STS_OK, to fail the remainder of the request. */ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) { @@ -951,8 +927,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) blk_rq_sectors(req), good_bytes)); /* - * Next deal with any sectors which we were able to correctly - * handle. Failed, zero length commands always need to drop down + * Failed, zero length commands always need to drop down * to retry code. Fast path should return in this block. */ if (likely(blk_rq_bytes(req) > 0 || blk_stat == BLK_STS_OK)) { @@ -1002,16 +977,14 @@ static blk_status_t scsi_init_sgtable(struct request *req, return BLK_STS_OK; } -/* - * Function: scsi_init_io() +/** + * scsi_init_io - SCSI I/O initialization function. + * @cmd: command descriptor we wish to initialize * - * Purpose: SCSI I/O initialize function. - * - * Arguments: cmd - Command descriptor we wish to initialize - * - * Returns: BLK_STS_OK on success - * BLK_STS_RESOURCE if the failure is retryable - * BLK_STS_IOERR if the failure is fatal + * Returns: + * * BLK_STS_OK - on success + * * BLK_STS_RESOURCE - if the failure is retryable + * * BLK_STS_IOERR - if the failure is fatal */ blk_status_t scsi_init_io(struct scsi_cmnd *cmd) { @@ -1921,21 +1894,13 @@ struct scsi_device *scsi_device_from_queue(struct request_queue *q) } EXPORT_SYMBOL_GPL(scsi_device_from_queue); -/* - * Function: scsi_block_requests() +/** + * scsi_block_requests - Utility function used by low-level drivers to prevent + * further commands from being queued to the device. + * @shost: host in question * - * Purpose: Utility function used by low-level drivers to prevent further - * commands from being queued to the device. - * - * Arguments: shost - Host in question - * - * Returns: Nothing - * - * Lock status: No locks are assumed held. - * - * Notes: There is no timer nor any other means by which the requests - * get unblocked other than the low-level driver calling - * scsi_unblock_requests(). + * There is no timer nor any other means by which the requests get unblocked + * other than the low-level driver calling scsi_unblock_requests(). */ void scsi_block_requests(struct Scsi_Host *shost) { @@ -1943,25 +1908,15 @@ void scsi_block_requests(struct Scsi_Host *shost) } EXPORT_SYMBOL(scsi_block_requests); -/* - * Function: scsi_unblock_requests() +/** + * scsi_unblock_requests - Utility function used by low-level drivers to allow + * further commands to be queued to the device. + * @shost: host in question * - * Purpose: Utility function used by low-level drivers to allow further - * commands from being queued to the device. - * - * Arguments: shost - Host in question - * - * Returns: Nothing - * - * Lock status: No locks are assumed held. - * - * Notes: There is no timer nor any other means by which the requests - * get unblocked other than the low-level driver calling - * scsi_unblock_requests(). - * - * This is done as an API function so that changes to the - * internals of the scsi mid-layer won't require wholesale - * changes to drivers that use this feature. + * There is no timer nor any other means by which the requests get unblocked + * other than the low-level driver calling scsi_unblock_requests(). This is done + * as an API function so that changes to the internals of the scsi mid-layer + * won't require wholesale changes to drivers that use this feature. */ void scsi_unblock_requests(struct Scsi_Host *shost) { From 1afbc80c3d2b7831ec2027938876ebffbfd20abb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 22 Apr 2020 14:06:53 +0300 Subject: [PATCH 144/562] gpio: dwapb: Get rid of unnecessary conjunction over 32-bit value When we mask interrupts before sleep, there is no need to have a conjunction with 0xffffffff since the accepted by dwapb_write() value is 32-bit. Signed-off-by: Andy Shevchenko Acked-by: Serge Semin Cc: Serge Semin Link: https://lore.kernel.org/r/20200422110654.23442-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index 31d29ec6ab5c..9d8476afaba3 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -743,8 +743,7 @@ static int dwapb_gpio_suspend(struct device *dev) ctx->int_deb = dwapb_read(gpio, GPIO_PORTA_DEBOUNCE); /* Mask out interrupts */ - dwapb_write(gpio, GPIO_INTMASK, - 0xffffffff & ~ctx->wake_en); + dwapb_write(gpio, GPIO_INTMASK, ~ctx->wake_en); } } spin_unlock_irqrestore(&gc->bgpio_lock, flags); From 1475b6293af2e240294c09182872b18e47495f37 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 22 Apr 2020 14:06:54 +0300 Subject: [PATCH 145/562] gpio: dwapb: Amend indentation in some cases In some cases indentation makes code harder to read. Amend indentation in those cases despite of lines go a bit over 80 character limit. Signed-off-by: Andy Shevchenko Acked-by: Serge Semin Cc: Serge Semin Link: https://lore.kernel.org/r/20200422110654.23442-2-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index 9d8476afaba3..8639c4a7f469 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -437,7 +437,7 @@ static void dwapb_configure_irqs(struct dwapb_gpio *gpio, } } - for (hwirq = 0 ; hwirq < ngpio ; hwirq++) + for (hwirq = 0; hwirq < ngpio; hwirq++) irq_create_mapping(gpio->domain, hwirq); port->gc.to_irq = dwapb_gpio_to_irq; @@ -453,7 +453,7 @@ static void dwapb_irq_teardown(struct dwapb_gpio *gpio) if (!gpio->domain) return; - for (hwirq = 0 ; hwirq < ngpio ; hwirq++) + for (hwirq = 0; hwirq < ngpio; hwirq++) irq_dispose_mapping(irq_find_mapping(gpio->domain, hwirq)); irq_domain_remove(gpio->domain); @@ -478,10 +478,9 @@ static int dwapb_gpio_add_port(struct dwapb_gpio *gpio, return -ENOMEM; #endif - dat = gpio->regs + GPIO_EXT_PORTA + (pp->idx * GPIO_EXT_PORT_STRIDE); - set = gpio->regs + GPIO_SWPORTA_DR + (pp->idx * GPIO_SWPORT_DR_STRIDE); - dirout = gpio->regs + GPIO_SWPORTA_DDR + - (pp->idx * GPIO_SWPORT_DDR_STRIDE); + dat = gpio->regs + GPIO_EXT_PORTA + pp->idx * GPIO_EXT_PORT_STRIDE; + set = gpio->regs + GPIO_SWPORTA_DR + pp->idx * GPIO_SWPORT_DR_STRIDE; + dirout = gpio->regs + GPIO_SWPORTA_DDR + pp->idx * GPIO_SWPORT_DDR_STRIDE; /* This registers 32 GPIO lines per port */ err = bgpio_init(&port->gc, gpio->dev, 4, dat, set, NULL, dirout, @@ -588,8 +587,7 @@ static struct dwapb_platform_data *dwapb_gpio_get_pdata(struct device *dev) return ERR_PTR(-EINVAL); } - if (fwnode_property_read_u32(fwnode, "snps,nr-gpios", - &pp->ngpio)) { + if (fwnode_property_read_u32(fwnode, "snps,nr-gpios", &pp->ngpio)) { dev_info(dev, "failed to get number of gpios for port%d\n", i); From 262b90116619ecab0734a3d931851e1fa1e89a7d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 24 Apr 2020 16:14:32 +0200 Subject: [PATCH 146/562] gpiolib: Improve kernel messages Simplify the printing of kernel messages and make the messages more accurate by using the most appropriate {dev,chip,gpiod}_*() helpers. Sample impact: -gpiochip_setup_dev: registered GPIOs 496 to 511 on device: gpiochip0 (e6050000.gpio) +gpio gpiochip0: registered GPIOs 496 to 511 on e6050000.gpio -no flags found for gpios +gpio-953 (?): no flags found for gpios -GPIO line 355 (PCIE/SATA switch) hogged as output/low +gpio-355 (PCIE/SATA switch): hogged as output/low Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200424141432.11400-1-geert+renesas@glider.be Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 40f2d7f69be2..48b4f70e2898 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1508,9 +1508,8 @@ static int gpiochip_setup_dev(struct gpio_device *gdev) /* From this point, the .release() function cleans up gpio_device */ gdev->dev.release = gpiodevice_release; - pr_debug("%s: registered GPIOs %d to %d on device: %s (%s)\n", - __func__, gdev->base, gdev->base + gdev->ngpio - 1, - dev_name(&gdev->dev), gdev->chip->label ? : "generic"); + dev_dbg(&gdev->dev, "registered GPIOs %d to %d on %s\n", gdev->base, + gdev->base + gdev->ngpio - 1, gdev->chip->label ? : "generic"); return 0; @@ -1526,8 +1525,8 @@ static void gpiochip_machine_hog(struct gpio_chip *gc, struct gpiod_hog *hog) desc = gpiochip_get_desc(gc, hog->chip_hwnum); if (IS_ERR(desc)) { - pr_err("%s: unable to get GPIO desc: %ld\n", - __func__, PTR_ERR(desc)); + chip_err(gc, "%s: unable to get GPIO desc: %ld\n", __func__, + PTR_ERR(desc)); return; } @@ -1536,8 +1535,8 @@ static void gpiochip_machine_hog(struct gpio_chip *gc, struct gpiod_hog *hog) rv = gpiod_hog(desc, hog->line_name, hog->lflags, hog->dflags); if (rv) - pr_err("%s: unable to hog GPIO line (%s:%u): %d\n", - __func__, gc->label, hog->chip_hwnum, rv); + gpiod_err(desc, "%s: unable to hog GPIO line (%s:%u): %d\n", + __func__, gc->label, hog->chip_hwnum, rv); } static void machine_gpiochip_add(struct gpio_chip *gc) @@ -1562,8 +1561,8 @@ static void gpiochip_setup_devs(void) list_for_each_entry(gdev, &gpio_devices, list) { ret = gpiochip_setup_dev(gdev); if (ret) - pr_err("%s: Failed to initialize gpio device (%d)\n", - dev_name(&gdev->dev), ret); + dev_err(&gdev->dev, + "Failed to initialize gpio device (%d)\n", ret); } } @@ -2672,7 +2671,7 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gc, return -EINVAL; if (!gc->parent) { - pr_err("missing gpiochip .dev parent pointer\n"); + chip_err(gc, "missing gpiochip .dev parent pointer\n"); return -EINVAL; } gc->irq.threaded = threaded; @@ -4842,7 +4841,7 @@ int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id, /* No particular flag request, return here... */ if (!(dflags & GPIOD_FLAGS_BIT_DIR_SET)) { - pr_debug("no flags found for %s\n", con_id); + gpiod_dbg(desc, "no flags found for %s\n", con_id); return 0; } @@ -5067,8 +5066,7 @@ int gpiod_hog(struct gpio_desc *desc, const char *name, /* Mark GPIO as hogged so it can be identified and removed later */ set_bit(FLAG_IS_HOGGED, &desc->flags); - pr_info("GPIO line %d (%s) hogged as %s%s\n", - desc_to_gpio(desc), name, + gpiod_info(desc, "hogged as %s%s\n", (dflags & GPIOD_FLAGS_BIT_DIR_OUT) ? "output" : "input", (dflags & GPIOD_FLAGS_BIT_DIR_OUT) ? (dflags & GPIOD_FLAGS_BIT_DIR_VAL) ? "/high" : "/low" : ""); From a5e93436e18c8f05a4e777ab125e140335d0ade8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 24 Apr 2020 16:15:17 +0200 Subject: [PATCH 147/562] gpiolib: Rename "chip" variables to "gc" in core header file Consistently use "gc" for "struct gpio *" variables. This follows the spirit of commit a0b66a73785ccc8f ("gpio: Rename variable in core APIs"). Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200424141517.11582-1-geert+renesas@glider.be Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.h | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index 853ce681b4a4..9ed242316414 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -81,8 +81,7 @@ struct gpio_array { unsigned long invert_mask[]; }; -struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip, - unsigned int hwnum); +struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc, unsigned int hwnum); int gpiod_get_array_value_complex(bool raw, bool can_sleep, unsigned int array_size, struct gpio_desc **desc_array, @@ -163,18 +162,18 @@ static inline int gpio_chip_hwgpio(const struct gpio_desc *desc) /* With chip prefix */ -#define chip_emerg(chip, fmt, ...) \ - dev_emerg(&chip->gpiodev->dev, "(%s): " fmt, chip->label, ##__VA_ARGS__) -#define chip_crit(chip, fmt, ...) \ - dev_crit(&chip->gpiodev->dev, "(%s): " fmt, chip->label, ##__VA_ARGS__) -#define chip_err(chip, fmt, ...) \ - dev_err(&chip->gpiodev->dev, "(%s): " fmt, chip->label, ##__VA_ARGS__) -#define chip_warn(chip, fmt, ...) \ - dev_warn(&chip->gpiodev->dev, "(%s): " fmt, chip->label, ##__VA_ARGS__) -#define chip_info(chip, fmt, ...) \ - dev_info(&chip->gpiodev->dev, "(%s): " fmt, chip->label, ##__VA_ARGS__) -#define chip_dbg(chip, fmt, ...) \ - dev_dbg(&chip->gpiodev->dev, "(%s): " fmt, chip->label, ##__VA_ARGS__) +#define chip_emerg(gc, fmt, ...) \ + dev_emerg(&gc->gpiodev->dev, "(%s): " fmt, gc->label, ##__VA_ARGS__) +#define chip_crit(gc, fmt, ...) \ + dev_crit(&gc->gpiodev->dev, "(%s): " fmt, gc->label, ##__VA_ARGS__) +#define chip_err(gc, fmt, ...) \ + dev_err(&gc->gpiodev->dev, "(%s): " fmt, gc->label, ##__VA_ARGS__) +#define chip_warn(gc, fmt, ...) \ + dev_warn(&gc->gpiodev->dev, "(%s): " fmt, gc->label, ##__VA_ARGS__) +#define chip_info(gc, fmt, ...) \ + dev_info(&gc->gpiodev->dev, "(%s): " fmt, gc->label, ##__VA_ARGS__) +#define chip_dbg(gc, fmt, ...) \ + dev_dbg(&gc->gpiodev->dev, "(%s): " fmt, gc->label, ##__VA_ARGS__) #ifdef CONFIG_GPIO_SYSFS From 1c12857c6722579cd4aca67b21d3e80870a290b1 Mon Sep 17 00:00:00 2001 From: Dejin Zheng Date: Fri, 24 Apr 2020 23:41:02 +0800 Subject: [PATCH 148/562] gpio: ftgpio010: Fix small typo Fix a spelling typo in gpio-ftgpio010.c by codespell s/desireable/desirable/ Cc: Andy Shevchenko Signed-off-by: Dejin Zheng Link: https://lore.kernel.org/r/20200424154103.10311-2-zhengdejin5@gmail.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-ftgpio010.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-ftgpio010.c b/drivers/gpio/gpio-ftgpio010.c index fbddb1662428..4031164780f7 100644 --- a/drivers/gpio/gpio-ftgpio010.c +++ b/drivers/gpio/gpio-ftgpio010.c @@ -193,7 +193,7 @@ static int ftgpio_gpio_set_config(struct gpio_chip *gc, unsigned int offset, if (val == deb_div) { /* * The debounce timer happens to already be set to the - * desireable value, what a coincidence! We can just enable + * desirable value, what a coincidence! We can just enable * debounce on this GPIO line and return. This happens more * often than you think, for example when all GPIO keys * on a system are requesting the same debounce interval. From 66d8ad67aab3bc6f55e7de81565cd0d4875bd851 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 27 Apr 2020 11:08:29 +0000 Subject: [PATCH 149/562] gpio: mlxbf2: fix return value check in mlxbf2_gpio_get_lock_res() In case of error, the function devm_ioremap() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20200427110829.154785-1-weiyongjun1@huawei.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mlxbf2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-mlxbf2.c b/drivers/gpio/gpio-mlxbf2.c index 240b488609ac..fca6a50d9308 100644 --- a/drivers/gpio/gpio-mlxbf2.c +++ b/drivers/gpio/gpio-mlxbf2.c @@ -109,8 +109,8 @@ static int mlxbf2_gpio_get_lock_res(struct platform_device *pdev) } yu_arm_gpio_lock_param.io = devm_ioremap(dev, res->start, size); - if (IS_ERR(yu_arm_gpio_lock_param.io)) - ret = PTR_ERR(yu_arm_gpio_lock_param.io); + if (!yu_arm_gpio_lock_param.io) + ret = -ENOMEM; exit: mutex_unlock(yu_arm_gpio_lock_param.lock); From dc5c1439376de8e107396ff5bdbc05af3b04ff0b Mon Sep 17 00:00:00 2001 From: Dejin Zheng Date: Fri, 24 Apr 2020 23:41:03 +0800 Subject: [PATCH 150/562] gpio: mm-lantiq: Fix small typo Fix a spelling typo in gpio-mm-lantiq.c by codespell s/dont/don't/ Cc: Andy Shevchenko Signed-off-by: Dejin Zheng Link: https://lore.kernel.org/r/20200424154103.10311-3-zhengdejin5@gmail.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mm-lantiq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-mm-lantiq.c b/drivers/gpio/gpio-mm-lantiq.c index f460d71b0c92..538e31fe8903 100644 --- a/drivers/gpio/gpio-mm-lantiq.c +++ b/drivers/gpio/gpio-mm-lantiq.c @@ -36,7 +36,7 @@ struct ltq_mm { * @chip: Pointer to our private data structure. * * Write the shadow value to the EBU to set the gpios. We need to set the - * global EBU lock to make sure that PCI/MTD dont break. + * global EBU lock to make sure that PCI/MTD don't break. */ static void ltq_mm_apply(struct ltq_mm *chip) { From 6eb7edffb28558aaa3a3e625ac9dcd40fc603bc6 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:01 +0300 Subject: [PATCH 151/562] RDMA/mlx5: Organize QP types checks in one place Perform check if QP type is supported in one place at the beginning of the create_qp function instead of current implementation with checks buried inside of the code. Link: https://lore.kernel.org/r/20200427154636.381474-2-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 135 +++++++++++++++++--------------- 1 file changed, 71 insertions(+), 64 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index af599c8b88aa..fdab5b6db1e5 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2677,12 +2677,42 @@ static int set_mlx_qp_type(struct mlx5_ib_dev *dev, } } - if (!MLX5_CAP_GEN(dev->mdev, dct)) { - mlx5_ib_dbg(dev, "DC transport is not supported\n"); - return -EOPNOTSUPP; + return 0; +} + +static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr) +{ + if (attr->qp_type == IB_QPT_DRIVER && !MLX5_CAP_GEN(dev->mdev, dct)) + goto out; + + switch (attr->qp_type) { + case IB_QPT_XRC_TGT: + case IB_QPT_XRC_INI: + if (!MLX5_CAP_GEN(dev->mdev, xrc)) + goto out; + fallthrough; + case IB_QPT_RAW_PACKET: + case IB_QPT_RC: + case IB_QPT_UC: + case IB_QPT_UD: + case IB_QPT_SMI: + case MLX5_IB_QPT_HW_GSI: + case MLX5_IB_QPT_REG_UMR: + case IB_QPT_DRIVER: + case IB_QPT_GSI: + return 0; + case IB_QPT_RAW_IPV6: + case IB_QPT_RAW_ETHERTYPE: + case IB_QPT_MAX: + default: + goto out; } return 0; + +out: + mlx5_ib_dbg(dev, "Unsupported QP type %d\n", attr->qp_type); + return -EOPNOTSUPP; } struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, @@ -2698,9 +2728,17 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); - if (pd) { - dev = to_mdev(pd->device); + dev = pd ? to_mdev(pd->device) : + to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device); + err = check_qp_type(dev, init_attr); + if (err) { + mlx5_ib_dbg(dev, "Unsupported QP type %d\n", + init_attr->qp_type); + return ERR_PTR(err); + } + + if (pd) { if (init_attr->qp_type == IB_QPT_RAW_PACKET) { if (!ucontext) { mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n"); @@ -2718,7 +2756,6 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, ib_qp_type_str(init_attr->qp_type)); return ERR_PTR(-EINVAL); } - dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device); } if (init_attr->qp_type == IB_QPT_DRIVER) { @@ -2741,68 +2778,38 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, } } - switch (init_attr->qp_type) { - case IB_QPT_XRC_TGT: - case IB_QPT_XRC_INI: - if (!MLX5_CAP_GEN(dev->mdev, xrc)) { - mlx5_ib_dbg(dev, "XRC not supported\n"); - return ERR_PTR(-ENOSYS); - } - init_attr->recv_cq = NULL; - if (init_attr->qp_type == IB_QPT_XRC_TGT) { - xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; - init_attr->send_cq = NULL; - } - - /* fall through */ - case IB_QPT_RAW_PACKET: - case IB_QPT_RC: - case IB_QPT_UC: - case IB_QPT_UD: - case IB_QPT_SMI: - case MLX5_IB_QPT_HW_GSI: - case MLX5_IB_QPT_REG_UMR: - case MLX5_IB_QPT_DCI: - qp = kzalloc(sizeof(*qp), GFP_KERNEL); - if (!qp) - return ERR_PTR(-ENOMEM); - - err = create_qp_common(dev, pd, init_attr, udata, qp); - if (err) { - mlx5_ib_dbg(dev, "create_qp_common failed\n"); - kfree(qp); - return ERR_PTR(err); - } - - if (is_qp0(init_attr->qp_type)) - qp->ibqp.qp_num = 0; - else if (is_qp1(init_attr->qp_type)) - qp->ibqp.qp_num = 1; - else - qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn; - - mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n", - qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn, - init_attr->recv_cq ? to_mcq(init_attr->recv_cq)->mcq.cqn : -1, - init_attr->send_cq ? to_mcq(init_attr->send_cq)->mcq.cqn : -1); - - qp->trans_qp.xrcdn = xrcdn; - - break; - - case IB_QPT_GSI: + if (init_attr->qp_type == IB_QPT_GSI) return mlx5_ib_gsi_create_qp(pd, init_attr); - case IB_QPT_RAW_IPV6: - case IB_QPT_RAW_ETHERTYPE: - case IB_QPT_MAX: - default: - mlx5_ib_dbg(dev, "unsupported qp type %d\n", - init_attr->qp_type); - /* Don't support raw QPs */ - return ERR_PTR(-EOPNOTSUPP); + if (init_attr->qp_type == IB_QPT_XRC_TGT) { + init_attr->recv_cq = NULL; + xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; + init_attr->send_cq = NULL; } + if (init_attr->qp_type == IB_QPT_XRC_INI) + init_attr->recv_cq = NULL; + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + err = create_qp_common(dev, pd, init_attr, udata, qp); + if (err) { + mlx5_ib_dbg(dev, "create_qp_common failed\n"); + kfree(qp); + return ERR_PTR(err); + } + + if (is_qp0(init_attr->qp_type)) + qp->ibqp.qp_num = 0; + else if (is_qp1(init_attr->qp_type)) + qp->ibqp.qp_num = 1; + else + qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn; + + qp->trans_qp.xrcdn = xrcdn; + if (verbs_init_attr->qp_type == IB_QPT_DRIVER) qp->qp_sub_type = init_attr->qp_type; From 1265d9f7a522423ef5234457a6d2a2f2a3ccad13 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:02 +0300 Subject: [PATCH 152/562] RDMA/mlx5: Delete impossible GSI port check GSI QP is created in the kernel with very strict parameters, there is no possible way that port number will be wrong in such flow. Link: https://lore.kernel.org/r/20200427154636.381474-3-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/gsi.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 1ae6fd95acaa..1afbf03d1a98 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -123,15 +123,6 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0; int ret; - mlx5_ib_dbg(dev, "creating GSI QP\n"); - - if (port_num > ARRAY_SIZE(dev->devr.ports) || port_num < 1) { - mlx5_ib_warn(dev, - "invalid port number %d during GSI QP creation\n", - port_num); - return ERR_PTR(-EINVAL); - } - gsi = kzalloc(sizeof(*gsi), GFP_KERNEL); if (!gsi) return ERR_PTR(-ENOMEM); From 2242cc25ce82058986ff7721e3d2464d775032df Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:03 +0300 Subject: [PATCH 153/562] RDMA/mlx5: Perform check if QP creation flow is valid Fast check that kernel and user flows provides enough data to create QP. Link: https://lore.kernel.org/r/20200427154636.381474-4-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 128 +++++++++++++++----------------- 1 file changed, 61 insertions(+), 67 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index fdab5b6db1e5..91d6151c349c 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1666,9 +1666,6 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, size_t required_cmd_sz; u8 lb_flag = 0; - if (init_attr->qp_type != IB_QPT_RAW_PACKET) - return -EOPNOTSUPP; - if (init_attr->create_flags || init_attr->send_cq) return -EINVAL; @@ -2032,13 +2029,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (mlx5_st < 0) return -EINVAL; - if (init_attr->rwq_ind_tbl) { - if (!udata) - return -ENOSYS; - - err = create_rss_raw_qp_tir(dev, qp, pd, init_attr, udata); - return err; - } + if (init_attr->rwq_ind_tbl) + return create_rss_raw_qp_tir(dev, qp, pd, init_attr, udata); if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { if (!MLX5_CAP_GEN(mdev, block_lb_mc)) { @@ -2565,39 +2557,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base, udata); } -static const char *ib_qp_type_str(enum ib_qp_type type) -{ - switch (type) { - case IB_QPT_SMI: - return "IB_QPT_SMI"; - case IB_QPT_GSI: - return "IB_QPT_GSI"; - case IB_QPT_RC: - return "IB_QPT_RC"; - case IB_QPT_UC: - return "IB_QPT_UC"; - case IB_QPT_UD: - return "IB_QPT_UD"; - case IB_QPT_RAW_IPV6: - return "IB_QPT_RAW_IPV6"; - case IB_QPT_RAW_ETHERTYPE: - return "IB_QPT_RAW_ETHERTYPE"; - case IB_QPT_XRC_INI: - return "IB_QPT_XRC_INI"; - case IB_QPT_XRC_TGT: - return "IB_QPT_XRC_TGT"; - case IB_QPT_RAW_PACKET: - return "IB_QPT_RAW_PACKET"; - case MLX5_IB_QPT_REG_UMR: - return "MLX5_IB_QPT_REG_UMR"; - case IB_QPT_DRIVER: - return "IB_QPT_DRIVER"; - case IB_QPT_MAX: - default: - return "Invalid QP type"; - } -} - static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd, struct ib_qp_init_attr *attr, struct mlx5_ib_create_qp *ucmd, @@ -2655,9 +2614,6 @@ static int set_mlx_qp_type(struct mlx5_ib_dev *dev, enum { MLX_QP_FLAGS = MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI }; int err; - if (!udata) - return -EINVAL; - if (udata->inlen < sizeof(*ucmd)) { mlx5_ib_dbg(dev, "create_qp user command is smaller than expected\n"); return -EINVAL; @@ -2715,6 +2671,62 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr) return -EOPNOTSUPP; } +static int check_valid_flow(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct ib_qp_init_attr *attr, + struct ib_udata *udata) +{ + struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); + + if (!udata) { + /* Kernel create_qp callers */ + if (attr->rwq_ind_tbl) + return -EOPNOTSUPP; + + switch (attr->qp_type) { + case IB_QPT_RAW_PACKET: + case IB_QPT_DRIVER: + return -EOPNOTSUPP; + default: + return 0; + } + } + + /* Userspace create_qp callers */ + if (attr->qp_type == IB_QPT_RAW_PACKET && !ucontext->cqe_version) { + mlx5_ib_dbg(dev, + "Raw Packet QP is only supported for CQE version > 0\n"); + return -EINVAL; + } + + if (attr->qp_type != IB_QPT_RAW_PACKET && attr->rwq_ind_tbl) { + mlx5_ib_dbg(dev, + "Wrong QP type %d for the RWQ indirect table\n", + attr->qp_type); + return -EINVAL; + } + + switch (attr->qp_type) { + case IB_QPT_SMI: + case MLX5_IB_QPT_HW_GSI: + case MLX5_IB_QPT_REG_UMR: + case IB_QPT_GSI: + mlx5_ib_dbg(dev, "Kernel doesn't support QP type %d\n", + attr->qp_type); + return -EINVAL; + default: + break; + } + + /* + * We don't need to see this warning, it means that kernel code + * missing ib_pd. Placed here to catch developer's mistakes. + */ + WARN_ONCE(!pd && attr->qp_type != IB_QPT_XRC_TGT, + "There is a missing PD pointer assignment\n"); + return 0; +} + struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *verbs_init_attr, struct ib_udata *udata) @@ -2725,8 +2737,6 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, int err; struct ib_qp_init_attr mlx_init_attr; struct ib_qp_init_attr *init_attr = verbs_init_attr; - struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( - udata, struct mlx5_ib_ucontext, ibucontext); dev = pd ? to_mdev(pd->device) : to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device); @@ -2738,25 +2748,9 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, return ERR_PTR(err); } - if (pd) { - if (init_attr->qp_type == IB_QPT_RAW_PACKET) { - if (!ucontext) { - mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n"); - return ERR_PTR(-EINVAL); - } else if (!ucontext->cqe_version) { - mlx5_ib_dbg(dev, "Raw Packet QP is only supported for CQE version > 0\n"); - return ERR_PTR(-EINVAL); - } - } - } else { - /* being cautious here */ - if (init_attr->qp_type != IB_QPT_XRC_TGT && - init_attr->qp_type != MLX5_IB_QPT_REG_UMR) { - pr_warn("%s: no PD for transport %s\n", __func__, - ib_qp_type_str(init_attr->qp_type)); - return ERR_PTR(-EINVAL); - } - } + err = check_valid_flow(dev, pd, init_attr, udata); + if (err) + return ERR_PTR(err); if (init_attr->qp_type == IB_QPT_DRIVER) { struct mlx5_ib_create_qp ucmd; From 9c2ba4ede4c0166d4e3bdc15e3b32c9680309ca1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:04 +0300 Subject: [PATCH 154/562] RDMA/mlx5: Prepare QP allocation for future removal Unify the QP memory allocation across different paths, so it will be in one place. Link: https://lore.kernel.org/r/20200427154636.381474-5-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 45 +++++++++++++++------------------ 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 91d6151c349c..07df470e0d58 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2557,14 +2557,13 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base, udata); } -static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd, +static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd, struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr, struct mlx5_ib_create_qp *ucmd, struct ib_udata *udata) { struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); - struct mlx5_ib_qp *qp; int err = 0; u32 uidx = MLX5_IB_DEFAULT_UIDX; void *dctc; @@ -2576,15 +2575,9 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd, if (err) return ERR_PTR(err); - qp = kzalloc(sizeof(*qp), GFP_KERNEL); - if (!qp) - return ERR_PTR(-ENOMEM); - qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL); - if (!qp->dct.in) { - err = -ENOMEM; - goto err_free; - } + if (!qp->dct.in) + return ERR_PTR(-ENOMEM); MLX5_SET(create_dct_in, qp->dct.in, uid, to_mpd(pd)->uid); dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry); @@ -2601,9 +2594,6 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd, qp->state = IB_QPS_RESET; return &qp->ibqp; -err_free: - kfree(qp); - return ERR_PTR(err); } static int set_mlx_qp_type(struct mlx5_ib_dev *dev, @@ -2752,6 +2742,13 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, if (err) return ERR_PTR(err); + if (init_attr->qp_type == IB_QPT_GSI) + return mlx5_ib_gsi_create_qp(pd, init_attr); + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + if (init_attr->qp_type == IB_QPT_DRIVER) { struct mlx5_ib_create_qp ucmd; @@ -2759,22 +2756,21 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, memcpy(init_attr, verbs_init_attr, sizeof(*verbs_init_attr)); err = set_mlx_qp_type(dev, init_attr, &ucmd, udata); if (err) - return ERR_PTR(err); + goto free_qp; if (init_attr->qp_type == MLX5_IB_QPT_DCI) { if (init_attr->cap.max_recv_wr || init_attr->cap.max_recv_sge) { mlx5_ib_dbg(dev, "DCI QP requires zero size receive queue\n"); - return ERR_PTR(-EINVAL); + err = -EINVAL; + goto free_qp; } } else { - return mlx5_ib_create_dct(pd, init_attr, &ucmd, udata); + return mlx5_ib_create_dct(pd, qp, init_attr, &ucmd, + udata); } } - if (init_attr->qp_type == IB_QPT_GSI) - return mlx5_ib_gsi_create_qp(pd, init_attr); - if (init_attr->qp_type == IB_QPT_XRC_TGT) { init_attr->recv_cq = NULL; xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; @@ -2784,15 +2780,10 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, if (init_attr->qp_type == IB_QPT_XRC_INI) init_attr->recv_cq = NULL; - qp = kzalloc(sizeof(*qp), GFP_KERNEL); - if (!qp) - return ERR_PTR(-ENOMEM); - err = create_qp_common(dev, pd, init_attr, udata, qp); if (err) { mlx5_ib_dbg(dev, "create_qp_common failed\n"); - kfree(qp); - return ERR_PTR(err); + goto free_qp; } if (is_qp0(init_attr->qp_type)) @@ -2808,6 +2799,10 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, qp->qp_sub_type = init_attr->qp_type; return &qp->ibqp; + +free_qp: + kfree(qp); + return ERR_PTR(err); } static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp) From c86936e6eb13bf3759e4cc0629ccc0076dd763de Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:05 +0300 Subject: [PATCH 155/562] RDMA/mlx5: Avoid setting redundant NULL for XRC QPs There is no need to set NULL in recv_cq and send_cq, they are already set to NULL by the IB/core logic. Link: https://lore.kernel.org/r/20200427154636.381474-6-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 07df470e0d58..86933a2023dc 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2771,14 +2771,8 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, } } - if (init_attr->qp_type == IB_QPT_XRC_TGT) { - init_attr->recv_cq = NULL; + if (init_attr->qp_type == IB_QPT_XRC_TGT) xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; - init_attr->send_cq = NULL; - } - - if (init_attr->qp_type == IB_QPT_XRC_INI) - init_attr->recv_cq = NULL; err = create_qp_common(dev, pd, init_attr, udata, qp); if (err) { From 318d2b06fbaa8fbce379a4e00901251b6368b4e3 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:06 +0300 Subject: [PATCH 156/562] RDMA/mlx5: Set QP subtype immediately when it is known There is no need to delay QP subtype assignment to the end of the create_qp() function and it is better to move it to be immediately after it is checked so we would be able to rewrite later checks to be based on it and not on over-written struct ib_qp_init_attr. Link: https://lore.kernel.org/r/20200427154636.381474-7-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 86933a2023dc..d991c33c4d9b 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2581,7 +2581,6 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd, struct mlx5_ib_qp *qp, MLX5_SET(create_dct_in, qp->dct.in, uid, to_mpd(pd)->uid); dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry); - qp->qp_sub_type = MLX5_IB_QPT_DCT; MLX5_SET(dctc, dctc, pd, to_mpd(pd)->pdn); MLX5_SET(dctc, dctc, srqn_xrqn, to_msrq(attr->srq)->msrq.srqn); MLX5_SET(dctc, dctc, cqn, to_mcq(attr->recv_cq)->mcq.cqn); @@ -2765,7 +2764,9 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, err = -EINVAL; goto free_qp; } + qp->qp_sub_type = MLX5_IB_QPT_DCI; } else { + qp->qp_sub_type = MLX5_IB_QPT_DCT; return mlx5_ib_create_dct(pd, qp, init_attr, &ucmd, udata); } @@ -2789,9 +2790,6 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, qp->trans_qp.xrcdn = xrcdn; - if (verbs_init_attr->qp_type == IB_QPT_DRIVER) - qp->qp_sub_type = init_attr->qp_type; - return &qp->ibqp; free_qp: From 47c806121a515bfee3180cced40af25cbf2ac10c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:07 +0300 Subject: [PATCH 157/562] RDMA/mlx5: Separate create QP flows to be based on type Move driver QP creation flow to separate functions to simplify the create_qp() and allow future separation of create_qp_common() to subtypes. Link: https://lore.kernel.org/r/20200427154636.381474-8-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 54 ++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d991c33c4d9b..ae336c1eed74 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2557,10 +2557,9 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base, udata); } -static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd, struct mlx5_ib_qp *qp, - struct ib_qp_init_attr *attr, - struct mlx5_ib_create_qp *ucmd, - struct ib_udata *udata) +static int create_dct(struct ib_pd *pd, struct mlx5_ib_qp *qp, + struct ib_qp_init_attr *attr, + struct mlx5_ib_create_qp *ucmd, struct ib_udata *udata) { struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); @@ -2568,16 +2567,13 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd, struct mlx5_ib_qp *qp, u32 uidx = MLX5_IB_DEFAULT_UIDX; void *dctc; - if (!attr->srq || !attr->recv_cq) - return ERR_PTR(-EINVAL); - err = get_qp_user_index(ucontext, ucmd, sizeof(*ucmd), &uidx); if (err) - return ERR_PTR(err); + return err; qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL); if (!qp->dct.in) - return ERR_PTR(-ENOMEM); + return -ENOMEM; MLX5_SET(create_dct_in, qp->dct.in, uid, to_mpd(pd)->uid); dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry); @@ -2592,7 +2588,7 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd, struct mlx5_ib_qp *qp, qp->state = IB_QPS_RESET; - return &qp->ibqp; + return 0; } static int set_mlx_qp_type(struct mlx5_ib_dev *dev, @@ -2716,10 +2712,36 @@ static int check_valid_flow(struct mlx5_ib_dev *dev, struct ib_pd *pd, return 0; } +static int create_driver_qp(struct ib_pd *pd, struct mlx5_ib_qp *qp, + struct ib_qp_init_attr *attr, + struct mlx5_ib_create_qp *ucmd, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *mdev = to_mdev(pd->device); + int ret = -EINVAL; + + switch (qp->qp_sub_type) { + case MLX5_IB_QPT_DCT: + if (!attr->srq || !attr->recv_cq) + goto out; + + ret = create_dct(pd, qp, attr, ucmd, udata); + break; + case MLX5_IB_QPT_DCI: + ret = create_qp_common(mdev, pd, attr, udata, qp); + break; + default: + return -EINVAL; + } + +out: return ret; +} + struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *verbs_init_attr, struct ib_udata *udata) { + struct mlx5_ib_create_qp ucmd = {}; struct mlx5_ib_dev *dev; struct mlx5_ib_qp *qp; u16 xrcdn = 0; @@ -2749,8 +2771,6 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, return ERR_PTR(-ENOMEM); if (init_attr->qp_type == IB_QPT_DRIVER) { - struct mlx5_ib_create_qp ucmd; - init_attr = &mlx_init_attr; memcpy(init_attr, verbs_init_attr, sizeof(*verbs_init_attr)); err = set_mlx_qp_type(dev, init_attr, &ucmd, udata); @@ -2767,15 +2787,19 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, qp->qp_sub_type = MLX5_IB_QPT_DCI; } else { qp->qp_sub_type = MLX5_IB_QPT_DCT; - return mlx5_ib_create_dct(pd, qp, init_attr, &ucmd, - udata); } } if (init_attr->qp_type == IB_QPT_XRC_TGT) xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; - err = create_qp_common(dev, pd, init_attr, udata, qp); + switch (init_attr->qp_type) { + case IB_QPT_DRIVER: + err = create_driver_qp(pd, qp, init_attr, &ucmd, udata); + break; + default: + err = create_qp_common(dev, pd, init_attr, udata, qp); + } if (err) { mlx5_ib_dbg(dev, "create_qp_common failed\n"); goto free_qp; From fd9dab7edc590a52ed141265fa7c88cf938e9be0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:08 +0300 Subject: [PATCH 158/562] RDMA/mlx5: Split scatter CQE configuration for DCT QP DCT QPs have separate creation flow and can be easily extracted from configure_responder_scat_cqe(), this makes both updated functions more clear. Link: https://lore.kernel.org/r/20200427154636.381474-9-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index ae336c1eed74..d0e8d27305e9 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1907,13 +1907,6 @@ static void configure_responder_scat_cqe(struct ib_qp_init_attr *init_attr, rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq); - if (init_attr->qp_type == MLX5_IB_QPT_DCT) { - if (rcqe_sz == 128) - MLX5_SET(dctc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); - - return; - } - MLX5_SET(qpc, qpc, cs_res, rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE : MLX5_RES_SCAT_DATA32_CQE); @@ -2583,8 +2576,12 @@ static int create_dct(struct ib_pd *pd, struct mlx5_ib_qp *qp, MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key); MLX5_SET(dctc, dctc, user_index, uidx); - if (ucmd->flags & MLX5_QP_FLAG_SCATTER_CQE) - configure_responder_scat_cqe(attr, dctc); + if (ucmd->flags & MLX5_QP_FLAG_SCATTER_CQE) { + int rcqe_sz = mlx5_ib_get_cqe_size(attr->recv_cq); + + if (rcqe_sz == 128) + MLX5_SET(dctc, dctc, cs_res, MLX5_RES_SCAT_DATA64_CQE); + } qp->state = IB_QPS_RESET; From 8bde2c509e4035fb4a200a60f82f85eec914145b Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:09 +0300 Subject: [PATCH 159/562] RDMA/mlx5: Update all DRIVER QP places to use QP subtype Instead of overwriting QP init attributes with driver QP subtype, use that subtype directly. This change will allow us to remove logic which cached QP init attributes. Link: https://lore.kernel.org/r/20200427154636.381474-10-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 48 +++++++++++---------------------- 1 file changed, 15 insertions(+), 33 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d0e8d27305e9..0b2090bcb8e8 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1232,7 +1232,7 @@ static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) { if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) || - (attr->qp_type == MLX5_IB_QPT_DCI) || + (qp->qp_sub_type == MLX5_IB_QPT_DCI) || (attr->qp_type == IB_QPT_XRC_INI)) return MLX5_SRQ_RQ; else if (!qp->has_rq) @@ -1241,15 +1241,6 @@ static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) return MLX5_NON_ZERO_RQ; } -static int is_connected(enum ib_qp_type qp_type) -{ - if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC || - qp_type == MLX5_IB_QPT_DCI) - return 1; - - return 0; -} - static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct mlx5_ib_sq *sq, u32 tdn, @@ -1897,33 +1888,14 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return err; } -static void configure_responder_scat_cqe(struct ib_qp_init_attr *init_attr, - void *qpc) -{ - int rcqe_sz; - - if (init_attr->qp_type == MLX5_IB_QPT_DCI) - return; - - rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq); - - MLX5_SET(qpc, qpc, cs_res, - rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE : - MLX5_RES_SCAT_DATA32_CQE); -} - static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *init_attr, struct mlx5_ib_create_qp *ucmd, void *qpc) { - enum ib_qp_type qpt = init_attr->qp_type; int scqe_sz; bool allow_scat_cqe = false; - if (qpt == IB_QPT_UC || qpt == IB_QPT_UD) - return; - if (ucmd) allow_scat_cqe = ucmd->flags & MLX5_QP_FLAG_ALLOW_SCATTER_CQE; @@ -2018,7 +1990,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); - mlx5_st = to_mlx5_st(init_attr->qp_type); + mlx5_st = to_mlx5_st((init_attr->qp_type != IB_QPT_DRIVER) ? + init_attr->qp_type : + qp->qp_sub_type); if (mlx5_st < 0) return -EINVAL; @@ -2240,12 +2214,20 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, cd_slave_receive, 1); if (qp->flags & MLX5_IB_QP_PACKET_BASED_CREDIT) MLX5_SET(qpc, qpc, req_e2e_credit_mode, 1); - if (qp->scat_cqe && is_connected(init_attr->qp_type)) { - configure_responder_scat_cqe(init_attr, qpc); + if (qp->scat_cqe && (init_attr->qp_type == IB_QPT_RC || + init_attr->qp_type == IB_QPT_UC)) { + int rcqe_sz = rcqe_sz = + mlx5_ib_get_cqe_size(init_attr->recv_cq); + + MLX5_SET(qpc, qpc, cs_res, + rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE : + MLX5_RES_SCAT_DATA32_CQE); + } + if (qp->scat_cqe && (qp->qp_sub_type == MLX5_IB_QPT_DCI || + init_attr->qp_type == IB_QPT_RC)) configure_requester_scat_cqe(dev, init_attr, udata ? &ucmd : NULL, qpc); - } if (qp->rq.wqe_cnt) { MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4); From 2fdddbd5c966c1ff7e35b0e4d1fa4b951d0f5542 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:10 +0300 Subject: [PATCH 160/562] RDMA/mlx5: Move DRIVER QP flags check into separate function Perform validation of DRIVER QP in relevant function. Link: https://lore.kernel.org/r/20200427154636.381474-11-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 91 ++++++++++++++++----------------- 1 file changed, 43 insertions(+), 48 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0b2090bcb8e8..5e4c73c4a7b4 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2570,36 +2570,6 @@ static int create_dct(struct ib_pd *pd, struct mlx5_ib_qp *qp, return 0; } -static int set_mlx_qp_type(struct mlx5_ib_dev *dev, - struct ib_qp_init_attr *init_attr, - struct mlx5_ib_create_qp *ucmd, - struct ib_udata *udata) -{ - enum { MLX_QP_FLAGS = MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI }; - int err; - - if (udata->inlen < sizeof(*ucmd)) { - mlx5_ib_dbg(dev, "create_qp user command is smaller than expected\n"); - return -EINVAL; - } - err = ib_copy_from_udata(ucmd, udata, sizeof(*ucmd)); - if (err) - return err; - - if ((ucmd->flags & MLX_QP_FLAGS) == MLX5_QP_FLAG_TYPE_DCI) { - init_attr->qp_type = MLX5_IB_QPT_DCI; - } else { - if ((ucmd->flags & MLX_QP_FLAGS) == MLX5_QP_FLAG_TYPE_DCT) { - init_attr->qp_type = MLX5_IB_QPT_DCT; - } else { - mlx5_ib_dbg(dev, "Invalid QP flags\n"); - return -EINVAL; - } - } - - return 0; -} - static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr) { if (attr->qp_type == IB_QPT_DRIVER && !MLX5_CAP_GEN(dev->mdev, dct)) @@ -2691,6 +2661,24 @@ static int check_valid_flow(struct mlx5_ib_dev *dev, struct ib_pd *pd, return 0; } +static int process_vendor_flags(struct mlx5_ib_qp *qp, + struct ib_qp_init_attr *attr, + struct mlx5_ib_create_qp *ucmd) +{ + switch (ucmd->flags & (MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI)) { + case MLX5_QP_FLAG_TYPE_DCI: + qp->qp_sub_type = MLX5_IB_QPT_DCI; + break; + case MLX5_QP_FLAG_TYPE_DCT: + qp->qp_sub_type = MLX5_IB_QPT_DCT; + break; + default: + return -EINVAL; + } + + return 0; +} + static int create_driver_qp(struct ib_pd *pd, struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr, struct mlx5_ib_create_qp *ucmd, @@ -2707,6 +2695,9 @@ static int create_driver_qp(struct ib_pd *pd, struct mlx5_ib_qp *qp, ret = create_dct(pd, qp, attr, ucmd, udata); break; case MLX5_IB_QPT_DCI: + if (attr->cap.max_recv_wr || attr->cap.max_recv_sge) + goto out; + ret = create_qp_common(mdev, pd, attr, udata, qp); break; default: @@ -2716,8 +2707,16 @@ static int create_driver_qp(struct ib_pd *pd, struct mlx5_ib_qp *qp, out: return ret; } +static size_t process_udata_size(struct ib_qp_init_attr *attr, + struct ib_udata *udata) +{ + size_t ucmd = sizeof(struct mlx5_ib_create_qp); + + return (udata->inlen < ucmd) ? 0 : ucmd; +} + struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *verbs_init_attr, + struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { struct mlx5_ib_create_qp ucmd = {}; @@ -2725,8 +2724,6 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct mlx5_ib_qp *qp; u16 xrcdn = 0; int err; - struct ib_qp_init_attr mlx_init_attr; - struct ib_qp_init_attr *init_attr = verbs_init_attr; dev = pd ? to_mdev(pd->device) : to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device); @@ -2745,28 +2742,26 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, if (init_attr->qp_type == IB_QPT_GSI) return mlx5_ib_gsi_create_qp(pd, init_attr); + if (udata && init_attr->qp_type == IB_QPT_DRIVER) { + size_t inlen = + process_udata_size(init_attr, udata); + + if (!inlen) + return ERR_PTR(-EINVAL); + + err = ib_copy_from_udata(&ucmd, udata, inlen); + if (err) + return ERR_PTR(err); + } + qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); if (init_attr->qp_type == IB_QPT_DRIVER) { - init_attr = &mlx_init_attr; - memcpy(init_attr, verbs_init_attr, sizeof(*verbs_init_attr)); - err = set_mlx_qp_type(dev, init_attr, &ucmd, udata); + err = process_vendor_flags(qp, init_attr, &ucmd); if (err) goto free_qp; - - if (init_attr->qp_type == MLX5_IB_QPT_DCI) { - if (init_attr->cap.max_recv_wr || - init_attr->cap.max_recv_sge) { - mlx5_ib_dbg(dev, "DCI QP requires zero size receive queue\n"); - err = -EINVAL; - goto free_qp; - } - qp->qp_sub_type = MLX5_IB_QPT_DCI; - } else { - qp->qp_sub_type = MLX5_IB_QPT_DCT; - } } if (init_attr->qp_type == IB_QPT_XRC_TGT) From 2dfac92dbb5d6e7607d5a4d3dc9d750f45440d98 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:11 +0300 Subject: [PATCH 161/562] RDMA/mlx5: Remove second copy from user for non RSS RAW QPs Change the common code to use already copied user command buffer. Link: https://lore.kernel.org/r/20200427154636.381474-12-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 56 ++++++++++++++++----------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 5e4c73c4a7b4..91a2c9994b59 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1967,6 +1967,7 @@ static inline bool check_flags_mask(uint64_t input, uint64_t supported) static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, + struct mlx5_ib_create_qp *ucmd, struct ib_udata *udata, struct mlx5_ib_qp *qp) { struct mlx5_ib_resources *devr = &dev->devr; @@ -1979,7 +1980,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_cq *recv_cq; unsigned long flags; u32 uidx = MLX5_IB_DEFAULT_UIDX; - struct mlx5_ib_create_qp ucmd; struct mlx5_ib_qp_base *base; int mlx5_st; void *qpc; @@ -2056,12 +2056,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, } if (udata) { - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { - mlx5_ib_dbg(dev, "copy failed\n"); - return -EFAULT; - } - - if (!check_flags_mask(ucmd.flags, + if (!check_flags_mask(ucmd->flags, MLX5_QP_FLAG_ALLOW_SCATTER_CQE | MLX5_QP_FLAG_BFREG_INDEX | MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE | @@ -2075,14 +2070,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_QP_FLAG_TYPE_DCT)) return -EINVAL; - err = get_qp_user_index(ucontext, &ucmd, udata->inlen, &uidx); + err = get_qp_user_index(ucontext, ucmd, udata->inlen, &uidx); if (err) return err; - qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE); + qp->wq_sig = !!(ucmd->flags & MLX5_QP_FLAG_SIGNATURE); if (MLX5_CAP_GEN(dev->mdev, sctr_data_cqe)) - qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE); - if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) { + qp->scat_cqe = + !!(ucmd->flags & MLX5_QP_FLAG_SCATTER_CQE); + if (ucmd->flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) { if (init_attr->qp_type != IB_QPT_RAW_PACKET || !tunnel_offload_supported(mdev)) { mlx5_ib_dbg(dev, "Tunnel offload isn't supported\n"); @@ -2091,7 +2087,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->flags_en |= MLX5_QP_FLAG_TUNNEL_OFFLOADS; } - if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC) { + if (ucmd->flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC) { if (init_attr->qp_type != IB_QPT_RAW_PACKET) { mlx5_ib_dbg(dev, "Self-LB UC isn't supported\n"); return -EOPNOTSUPP; @@ -2099,7 +2095,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC; } - if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) { + if (ucmd->flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) { if (init_attr->qp_type != IB_QPT_RAW_PACKET) { mlx5_ib_dbg(dev, "Self-LB UM isn't supported\n"); return -EOPNOTSUPP; @@ -2107,7 +2103,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC; } - if (ucmd.flags & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE) { + if (ucmd->flags & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE) { if (init_attr->qp_type != IB_QPT_RC || !MLX5_CAP_GEN(dev->mdev, qp_packet_based)) { mlx5_ib_dbg(dev, "packet based credit mode isn't supported\n"); @@ -2138,8 +2134,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, &qp->trans_qp.base; qp->has_rq = qp_has_rq(init_attr); - err = set_rq_size(dev, &init_attr->cap, qp->has_rq, - qp, udata ? &ucmd : NULL); + err = set_rq_size(dev, &init_attr->cap, qp->has_rq, qp, ucmd); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); return err; @@ -2149,15 +2144,16 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (udata) { __u32 max_wqes = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); - mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count); - if (ucmd.rq_wqe_shift != qp->rq.wqe_shift || - ucmd.rq_wqe_count != qp->rq.wqe_cnt) { + mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", + ucmd->sq_wqe_count); + if (ucmd->rq_wqe_shift != qp->rq.wqe_shift || + ucmd->rq_wqe_count != qp->rq.wqe_cnt) { mlx5_ib_dbg(dev, "invalid rq params\n"); return -EINVAL; } - if (ucmd.sq_wqe_count > max_wqes) { + if (ucmd->sq_wqe_count > max_wqes) { mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n", - ucmd.sq_wqe_count, max_wqes); + ucmd->sq_wqe_count, max_wqes); return -EINVAL; } if (init_attr->create_flags & @@ -2225,9 +2221,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, } if (qp->scat_cqe && (qp->qp_sub_type == MLX5_IB_QPT_DCI || init_attr->qp_type == IB_QPT_RC)) - configure_requester_scat_cqe(dev, init_attr, - udata ? &ucmd : NULL, - qpc); + configure_requester_scat_cqe(dev, init_attr, ucmd, qpc); if (qp->rq.wqe_cnt) { MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4); @@ -2308,7 +2302,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (init_attr->qp_type == IB_QPT_RAW_PACKET || qp->flags & MLX5_IB_QP_UNDERLAY) { - qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr; + qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd->sq_buf_addr; raw_packet_qp_copy_info(qp, &qp->raw_packet_qp); err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata, &resp); @@ -2698,7 +2692,7 @@ static int create_driver_qp(struct ib_pd *pd, struct mlx5_ib_qp *qp, if (attr->cap.max_recv_wr || attr->cap.max_recv_sge) goto out; - ret = create_qp_common(mdev, pd, attr, udata, qp); + ret = create_qp_common(mdev, pd, attr, ucmd, udata, qp); break; default: return -EINVAL; @@ -2712,7 +2706,10 @@ static size_t process_udata_size(struct ib_qp_init_attr *attr, { size_t ucmd = sizeof(struct mlx5_ib_create_qp); - return (udata->inlen < ucmd) ? 0 : ucmd; + if (attr->qp_type == IB_QPT_DRIVER) + return (udata->inlen < ucmd) ? 0 : ucmd; + + return ucmd; } struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, @@ -2742,7 +2739,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, if (init_attr->qp_type == IB_QPT_GSI) return mlx5_ib_gsi_create_qp(pd, init_attr); - if (udata && init_attr->qp_type == IB_QPT_DRIVER) { + if (udata && !init_attr->rwq_ind_tbl) { size_t inlen = process_udata_size(init_attr, udata); @@ -2772,7 +2769,8 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, err = create_driver_qp(pd, qp, init_attr, &ucmd, udata); break; default: - err = create_qp_common(dev, pd, init_attr, udata, qp); + err = create_qp_common(dev, pd, init_attr, + (udata) ? &ucmd : NULL, udata, qp); } if (err) { mlx5_ib_dbg(dev, "create_qp_common failed\n"); From 5d0dc3d96c7b3bc6bc175754abcb132a1c94d02b Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:12 +0300 Subject: [PATCH 162/562] RDMA/mlx5: Initial separation of RAW_PACKET QP from common flow Create initial function for IB_QPT_RAW_PACKET flow. Link: https://lore.kernel.org/r/20200427154636.381474-13-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 91a2c9994b59..a514b4eca06e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1634,13 +1634,13 @@ static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *q to_mpd(qp->ibqp.pd)->uid); } -static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - struct ib_pd *pd, +static int create_rss_raw_qp_tir(struct ib_pd *pd, struct mlx5_ib_qp *qp, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { struct mlx5_ib_ucontext *mucontext = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); + struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_create_qp_resp resp = {}; int inlen; int outlen; @@ -1996,9 +1996,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (mlx5_st < 0) return -EINVAL; - if (init_attr->rwq_ind_tbl) - return create_rss_raw_qp_tir(dev, qp, pd, init_attr, udata); - if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { if (!MLX5_CAP_GEN(mdev, block_lb_mc)) { mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n"); @@ -2712,6 +2709,18 @@ static size_t process_udata_size(struct ib_qp_init_attr *attr, return ucmd; } +static int create_raw_qp(struct ib_pd *pd, struct mlx5_ib_qp *qp, + struct ib_qp_init_attr *attr, + struct mlx5_ib_create_qp *ucmd, struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + + if (attr->rwq_ind_tbl) + return create_rss_raw_qp_tir(pd, qp, attr, udata); + + return create_qp_common(dev, pd, attr, ucmd, udata, qp); +} + struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) @@ -2768,6 +2777,9 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, case IB_QPT_DRIVER: err = create_driver_qp(pd, qp, init_attr, &ucmd, udata); break; + case IB_QPT_RAW_PACKET: + err = create_raw_qp(pd, qp, init_attr, &ucmd, udata); + break; default: err = create_qp_common(dev, pd, init_attr, (udata) ? &ucmd : NULL, udata, qp); From 2be08c308f102eeaee7ffc4a0d08ecee82b77f9d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:13 +0300 Subject: [PATCH 163/562] RDMA/mlx5: Delete create QP flags obfuscation There is no point in redefinition of stable and exposed to users create flags. Their values won't be changed and it is equal to used by the mlx5. Delete the mlx5 definitions and use IB/core fields. Link: https://lore.kernel.org/r/20200427154636.381474-14-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 2 +- drivers/infiniband/hw/mlx5/flow.c | 2 +- drivers/infiniband/hw/mlx5/main.c | 6 +-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 21 +------- drivers/infiniband/hw/mlx5/qp.c | 80 ++++++++++++++-------------- 5 files changed, 47 insertions(+), 64 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 35b98c2d64d5..1d7feed6d3cb 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -615,7 +615,7 @@ static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs, enum ib_qp_type qp_type = qp->ibqp.qp_type; if (qp_type == IB_QPT_RAW_PACKET || - (qp->flags & MLX5_IB_QP_UNDERLAY)) { + (qp->flags & IB_QP_CREATE_SOURCE_QPN)) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_rq *rq = &raw_packet_qp->rq; diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index 69cb7e6e8955..08fd6a650868 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -142,7 +142,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( return -EINVAL; mqp = to_mqp(qp); - if (mqp->flags & MLX5_IB_QP_RSS) + if (mqp->is_rss) dest_id = mqp->rss_qp.tirn; else dest_id = mqp->raw_packet_qp.rq.tirn; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 65e0e24d463b..80ae8f04bfd5 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3967,7 +3967,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; } else { dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - if (mqp->flags & MLX5_IB_QP_RSS) + if (mqp->is_rss) dst->tir_num = mqp->rss_qp.tirn; else dst->tir_num = mqp->raw_packet_qp.rq.tirn; @@ -3978,7 +3978,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, handler = create_dont_trap_rule(dev, ft_prio, flow_attr, dst); } else { - underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ? + underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ? mqp->underlay_qpn : 0; handler = _create_flow_rule(dev, ft_prio, flow_attr, dst, underlay_qpn, ucmd); @@ -4447,7 +4447,7 @@ static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) uid = ibqp->pd ? to_mpd(ibqp->pd)->uid : 0; - if (mqp->flags & MLX5_IB_QP_UNDERLAY) { + if (mqp->flags & IB_QP_CREATE_SOURCE_QPN) { mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n"); return -EOPNOTSUPP; } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index aaabb8a98eed..b144660a47e1 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -450,7 +450,8 @@ struct mlx5_ib_qp { int scat_cqe; int max_inline_data; struct mlx5_bf bf; - int has_rq; + u8 has_rq:1; + u8 is_rss:1; /* only for user space QPs. For kernel * we have it from the bf object @@ -481,24 +482,6 @@ struct mlx5_ib_cq_buf { int nent; }; -enum mlx5_ib_qp_flags { - MLX5_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, - MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, - MLX5_IB_QP_CROSS_CHANNEL = IB_QP_CREATE_CROSS_CHANNEL, - MLX5_IB_QP_MANAGED_SEND = IB_QP_CREATE_MANAGED_SEND, - MLX5_IB_QP_MANAGED_RECV = IB_QP_CREATE_MANAGED_RECV, - MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5, - /* QP uses 1 as its source QP number */ - MLX5_IB_QP_SQPN_QP1 = 1 << 6, - MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7, - MLX5_IB_QP_RSS = 1 << 8, - MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9, - MLX5_IB_QP_UNDERLAY = 1 << 10, - MLX5_IB_QP_PCI_WRITE_END_PADDING = 1 << 11, - MLX5_IB_QP_TUNNEL_OFFLOAD = 1 << 12, - MLX5_IB_QP_PACKET_BASED_CREDIT = 1 << 13, -}; - struct mlx5_umr_wr { struct ib_send_wr wr; u64 virt_addr; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a514b4eca06e..cdbb837138c9 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -596,7 +596,7 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev, } if (attr->qp_type == IB_QPT_RAW_PACKET || - qp->flags & MLX5_IB_QP_UNDERLAY) { + qp->flags & IB_QP_CREATE_SOURCE_QPN) { base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift; qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6; } else { @@ -951,7 +951,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, bfregn = MLX5_IB_INVALID_BFREG; break; case 0: - if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) + if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL) return -EINVAL; bfregn = alloc_bfreg(dev, &context->bfregi); if (bfregn < 0) @@ -1169,7 +1169,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, if (init_attr->create_flags & MLX5_IB_QP_CREATE_SQPN_QP1) { MLX5_SET(qpc, qpc, deth_sqpn, 1); - qp->flags |= MLX5_IB_QP_SQPN_QP1; + qp->flags |= MLX5_IB_QP_CREATE_SQPN_QP1; } mlx5_fill_page_frag_array(&qp->buf, @@ -1251,7 +1251,7 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid); MLX5_SET(tisc, tisc, transport_domain, tdn); - if (qp->flags & MLX5_IB_QP_UNDERLAY) + if (qp->flags & IB_QP_CREATE_SOURCE_QPN) MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn); return mlx5_core_create_tis(dev->mdev, in, &sq->tisn); @@ -1400,7 +1400,7 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv)); - if (mqp->flags & MLX5_IB_QP_CAP_SCATTER_FCS) + if (mqp->flags & IB_QP_CREATE_SCATTER_FCS) MLX5_SET(rqc, rqc, scatter_fcs, 1); wq = MLX5_ADDR_OF(rqc, rqc, wq); @@ -1538,9 +1538,9 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (qp->rq.wqe_cnt) { rq->base.container_mibqp = qp; - if (qp->flags & MLX5_IB_QP_CVLAN_STRIPPING) + if (qp->flags & IB_QP_CREATE_CVLAN_STRIPPING) rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; - if (qp->flags & MLX5_IB_QP_PCI_WRITE_END_PADDING) + if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING; err = create_raw_packet_qp_rq(dev, rq, in, inlen, pd); if (err) @@ -1878,7 +1878,7 @@ static int create_rss_raw_qp_tir(struct ib_pd *pd, struct mlx5_ib_qp *qp, kvfree(in); /* qpn is reserved for that QP */ qp->trans_qp.base.mqp.qpn = 0; - qp->flags |= MLX5_IB_QP_RSS; + qp->is_rss = true; return 0; err_copy: @@ -2001,7 +2001,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n"); return -EINVAL; } else { - qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK; + qp->flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; } } @@ -2014,11 +2014,11 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, return -EINVAL; } if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL) - qp->flags |= MLX5_IB_QP_CROSS_CHANNEL; + qp->flags |= IB_QP_CREATE_CROSS_CHANNEL; if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND) - qp->flags |= MLX5_IB_QP_MANAGED_SEND; + qp->flags |= IB_QP_CREATE_MANAGED_SEND; if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV) - qp->flags |= MLX5_IB_QP_MANAGED_RECV; + qp->flags |= IB_QP_CREATE_MANAGED_RECV; } if (init_attr->qp_type == IB_QPT_UD && @@ -2038,7 +2038,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, mlx5_ib_dbg(dev, "Scatter FCS isn't supported\n"); return -EOPNOTSUPP; } - qp->flags |= MLX5_IB_QP_CAP_SCATTER_FCS; + qp->flags |= IB_QP_CREATE_SCATTER_FCS; } if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) @@ -2049,7 +2049,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_CAP_ETH(dev->mdev, vlan_cap)) || (init_attr->qp_type != IB_QPT_RAW_PACKET)) return -EOPNOTSUPP; - qp->flags |= MLX5_IB_QP_CVLAN_STRIPPING; + qp->flags |= IB_QP_CREATE_CVLAN_STRIPPING; } if (udata) { @@ -2106,7 +2106,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, mlx5_ib_dbg(dev, "packet based credit mode isn't supported\n"); return -EOPNOTSUPP; } - qp->flags |= MLX5_IB_QP_PACKET_BASED_CREDIT; + qp->flags_en |= MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE; } if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) { @@ -2118,7 +2118,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, return -EOPNOTSUPP; } - qp->flags |= MLX5_IB_QP_UNDERLAY; + qp->flags |= IB_QP_CREATE_SOURCE_QPN; qp->underlay_qpn = init_attr->source_qpn; } } else { @@ -2126,7 +2126,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, } base = (init_attr->qp_type == IB_QPT_RAW_PACKET || - qp->flags & MLX5_IB_QP_UNDERLAY) ? + qp->flags & IB_QP_CREATE_SOURCE_QPN) ? &qp->raw_packet_qp.rq.base : &qp->trans_qp.base; @@ -2196,16 +2196,16 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (qp->wq_sig) MLX5_SET(qpc, qpc, wq_signature, 1); - if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) + if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) MLX5_SET(qpc, qpc, block_lb_mc, 1); - if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) + if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL) MLX5_SET(qpc, qpc, cd_master, 1); - if (qp->flags & MLX5_IB_QP_MANAGED_SEND) + if (qp->flags & IB_QP_CREATE_MANAGED_SEND) MLX5_SET(qpc, qpc, cd_slave_send, 1); - if (qp->flags & MLX5_IB_QP_MANAGED_RECV) + if (qp->flags & IB_QP_CREATE_MANAGED_RECV) MLX5_SET(qpc, qpc, cd_slave_receive, 1); - if (qp->flags & MLX5_IB_QP_PACKET_BASED_CREDIT) + if (qp->flags_en & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE) MLX5_SET(qpc, qpc, req_e2e_credit_mode, 1); if (qp->scat_cqe && (init_attr->qp_type == IB_QPT_RC || init_attr->qp_type == IB_QPT_UC)) { @@ -2276,7 +2276,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (init_attr->qp_type == IB_QPT_UD && (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) { MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1); - qp->flags |= MLX5_IB_QP_LSO; + qp->flags |= IB_QP_CREATE_IPOIB_UD_LSO; } if (init_attr->create_flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) { @@ -2288,7 +2288,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); } else { - qp->flags |= MLX5_IB_QP_PCI_WRITE_END_PADDING; + qp->flags |= IB_QP_CREATE_PCI_WRITE_END_PADDING; } } @@ -2298,7 +2298,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, } if (init_attr->qp_type == IB_QPT_RAW_PACKET || - qp->flags & MLX5_IB_QP_UNDERLAY) { + qp->flags & IB_QP_CREATE_SOURCE_QPN) { qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd->sq_buf_addr; raw_packet_qp_copy_info(qp, &qp->raw_packet_qp); err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata, @@ -2463,13 +2463,13 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, } base = (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || - qp->flags & MLX5_IB_QP_UNDERLAY) ? + qp->flags & IB_QP_CREATE_SOURCE_QPN) ? &qp->raw_packet_qp.rq.base : &qp->trans_qp.base; if (qp->state != IB_QPS_RESET) { if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET && - !(qp->flags & MLX5_IB_QP_UNDERLAY)) { + !(qp->flags & IB_QP_CREATE_SOURCE_QPN)) { err = mlx5_core_qp_modify(dev, MLX5_CMD_OP_2RST_QP, 0, NULL, &base->mqp); } else { @@ -2508,7 +2508,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || - qp->flags & MLX5_IB_QP_UNDERLAY) { + qp->flags & IB_QP_CREATE_SOURCE_QPN) { destroy_raw_packet_qp(dev, qp); } else { err = mlx5_core_destroy_qp(dev, &base->mqp); @@ -3550,7 +3550,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) { if ((ibqp->qp_type == IB_QPT_RC) || (ibqp->qp_type == IB_QPT_UD && - !(qp->flags & MLX5_IB_QP_SQPN_QP1)) || + !(qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)) || (ibqp->qp_type == IB_QPT_UC) || (ibqp->qp_type == IB_QPT_RAW_PACKET) || (ibqp->qp_type == IB_QPT_XRC_INI) || @@ -3567,7 +3567,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (is_sqp(ibqp->qp_type)) { context->mtu_msgmax = (IB_MTU_256 << 5) | 8; } else if ((ibqp->qp_type == IB_QPT_UD && - !(qp->flags & MLX5_IB_QP_UNDERLAY)) || + !(qp->flags & IB_QP_CREATE_SOURCE_QPN)) || ibqp->qp_type == MLX5_IB_QPT_REG_UMR) { context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; } else if (attr_mask & IB_QP_PATH_MTU) { @@ -3672,7 +3672,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, qp->port) - 1; /* Underlay port should be used - index 0 function per port */ - if (qp->flags & MLX5_IB_QP_UNDERLAY) + if (qp->flags & IB_QP_CREATE_SOURCE_QPN) port_num = 0; if (ibqp->counter) @@ -3686,7 +3686,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->sq_crq_size |= cpu_to_be16(1 << 4); - if (qp->flags & MLX5_IB_QP_SQPN_QP1) + if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1) context->deth_sqpn = cpu_to_be32(1); mlx5_cur = to_mlx5_state(cur_state); @@ -3703,7 +3703,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || - qp->flags & MLX5_IB_QP_UNDERLAY) { + qp->flags & IB_QP_CREATE_SOURCE_QPN) { struct mlx5_modify_raw_qp_param raw_qp_param = {}; raw_qp_param.operation = op; @@ -3999,7 +3999,7 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; } - if (qp->flags & MLX5_IB_QP_UNDERLAY) { + if (qp->flags & IB_QP_CREATE_SOURCE_QPN) { if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) { mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n", attr_mask); @@ -5831,7 +5831,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, mutex_lock(&qp->mutex); if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || - qp->flags & MLX5_IB_QP_UNDERLAY) { + qp->flags & IB_QP_CREATE_SOURCE_QPN) { err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state); if (err) goto out; @@ -5866,16 +5866,16 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_init_attr->cap = qp_attr->cap; qp_init_attr->create_flags = 0; - if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) + if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; - if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) + if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL) qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL; - if (qp->flags & MLX5_IB_QP_MANAGED_SEND) + if (qp->flags & IB_QP_CREATE_MANAGED_SEND) qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND; - if (qp->flags & MLX5_IB_QP_MANAGED_RECV) + if (qp->flags & IB_QP_CREATE_MANAGED_RECV) qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV; - if (qp->flags & MLX5_IB_QP_SQPN_QP1) + if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1) qp_init_attr->create_flags |= MLX5_IB_QP_CREATE_SQPN_QP1; qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? From 2978975ce7f16131ddf70468f0b189231e33086b Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:14 +0300 Subject: [PATCH 164/562] RDMA/mlx5: Process create QP flags in one place create_flags is checked in too many places and scattered across all the code, consolidate all the checks inside one function, so we will be easily see the flow. As part of such change, delete unreachable code, because IB/core is responsible sanitize the input. Link: https://lore.kernel.org/r/20200427154636.381474-15-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 200 ++++++++++++++++---------------- 1 file changed, 101 insertions(+), 99 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index cdbb837138c9..02eb03484b91 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1097,17 +1097,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, void *qpc; int err; - if (init_attr->create_flags & ~(IB_QP_CREATE_INTEGRITY_EN | - IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | - IB_QP_CREATE_IPOIB_UD_LSO | - IB_QP_CREATE_NETIF_QP | - MLX5_IB_QP_CREATE_SQPN_QP1 | - MLX5_IB_QP_CREATE_WC_TEST)) - return -EINVAL; - if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) qp->bf.bfreg = &dev->fp_bfreg; - else if (init_attr->create_flags & MLX5_IB_QP_CREATE_WC_TEST) + else if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST) qp->bf.bfreg = &dev->wc_bfreg; else qp->bf.bfreg = &dev->bfreg; @@ -1167,10 +1159,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, MLX5_SET(qpc, qpc, fre, 1); MLX5_SET(qpc, qpc, rlky, 1); - if (init_attr->create_flags & MLX5_IB_QP_CREATE_SQPN_QP1) { + if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1) MLX5_SET(qpc, qpc, deth_sqpn, 1); - qp->flags |= MLX5_IB_QP_CREATE_SQPN_QP1; - } mlx5_fill_page_frag_array(&qp->buf, (__be64 *)MLX5_ADDR_OF(create_qp_in, @@ -1657,7 +1647,7 @@ static int create_rss_raw_qp_tir(struct ib_pd *pd, struct mlx5_ib_qp *qp, size_t required_cmd_sz; u8 lb_flag = 0; - if (init_attr->create_flags || init_attr->send_cq) + if (init_attr->send_cq) return -EINVAL; min_resp_len = offsetof(typeof(resp), bfreg_index) + sizeof(resp.bfreg_index); @@ -1996,62 +1986,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (mlx5_st < 0) return -EINVAL; - if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { - if (!MLX5_CAP_GEN(mdev, block_lb_mc)) { - mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n"); - return -EINVAL; - } else { - qp->flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; - } - } - - if (init_attr->create_flags & - (IB_QP_CREATE_CROSS_CHANNEL | - IB_QP_CREATE_MANAGED_SEND | - IB_QP_CREATE_MANAGED_RECV)) { - if (!MLX5_CAP_GEN(mdev, cd)) { - mlx5_ib_dbg(dev, "cross-channel isn't supported\n"); - return -EINVAL; - } - if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL) - qp->flags |= IB_QP_CREATE_CROSS_CHANNEL; - if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND) - qp->flags |= IB_QP_CREATE_MANAGED_SEND; - if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV) - qp->flags |= IB_QP_CREATE_MANAGED_RECV; - } - - if (init_attr->qp_type == IB_QPT_UD && - (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) - if (!MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { - mlx5_ib_dbg(dev, "ipoib UD lso qp isn't supported\n"); - return -EOPNOTSUPP; - } - - if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) { - if (init_attr->qp_type != IB_QPT_RAW_PACKET) { - mlx5_ib_dbg(dev, "Scatter FCS is supported only for Raw Packet QPs"); - return -EOPNOTSUPP; - } - if (!MLX5_CAP_GEN(dev->mdev, eth_net_offloads) || - !MLX5_CAP_ETH(dev->mdev, scatter_fcs)) { - mlx5_ib_dbg(dev, "Scatter FCS isn't supported\n"); - return -EOPNOTSUPP; - } - qp->flags |= IB_QP_CREATE_SCATTER_FCS; - } - if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; - if (init_attr->create_flags & IB_QP_CREATE_CVLAN_STRIPPING) { - if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && - MLX5_CAP_ETH(dev->mdev, vlan_cap)) || - (init_attr->qp_type != IB_QPT_RAW_PACKET)) - return -EOPNOTSUPP; - qp->flags |= IB_QP_CREATE_CVLAN_STRIPPING; - } - if (udata) { if (!check_flags_mask(ucmd->flags, MLX5_QP_FLAG_ALLOW_SCATTER_CQE | @@ -2108,23 +2045,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, } qp->flags_en |= MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE; } - - if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) { - if (init_attr->qp_type != IB_QPT_UD || - (MLX5_CAP_GEN(dev->mdev, port_type) != - MLX5_CAP_PORT_TYPE_IB) || - !mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) { - mlx5_ib_dbg(dev, "Source QP option isn't supported\n"); - return -EOPNOTSUPP; - } - - qp->flags |= IB_QP_CREATE_SOURCE_QPN; - qp->underlay_qpn = init_attr->source_qpn; - } } else { qp->wq_sig = !!wq_signature; } + if (qp->flags & IB_QP_CREATE_SOURCE_QPN) + qp->underlay_qpn = init_attr->source_qpn; + base = (init_attr->qp_type == IB_QPT_RAW_PACKET || qp->flags & IB_QP_CREATE_SOURCE_QPN) ? &qp->raw_packet_qp.rq.base : @@ -2153,11 +2080,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, ucmd->sq_wqe_count, max_wqes); return -EINVAL; } - if (init_attr->create_flags & - MLX5_IB_QP_CREATE_SQPN_QP1) { - mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n"); - return -EINVAL; - } err = create_user_qp(dev, pd, qp, udata, init_attr, &in, &resp, &inlen, base); if (err) @@ -2273,23 +2195,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, user_index, uidx); /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */ - if (init_attr->qp_type == IB_QPT_UD && - (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) { + if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1); - qp->flags |= IB_QP_CREATE_IPOIB_UD_LSO; - } - if (init_attr->create_flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) { - if (!MLX5_CAP_GEN(dev->mdev, end_pad)) { - mlx5_ib_dbg(dev, "scatter end padding is not supported\n"); - err = -EOPNOTSUPP; - goto err; - } else if (init_attr->qp_type != IB_QPT_RAW_PACKET) { - MLX5_SET(qpc, qpc, end_padding_mode, - MLX5_WQ_END_PAD_MODE_ALIGN); - } else { - qp->flags |= IB_QP_CREATE_PCI_WRITE_END_PADDING; - } + if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING && + init_attr->qp_type != IB_QPT_RAW_PACKET) { + MLX5_SET(qpc, qpc, end_padding_mode, + MLX5_WQ_END_PAD_MODE_ALIGN); + /* Special case to clean flag */ + qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING; } if (inlen < 0) { @@ -2670,6 +2584,91 @@ static int process_vendor_flags(struct mlx5_ib_qp *qp, return 0; } +static void process_create_flag(struct mlx5_ib_dev *dev, int *flags, int flag, + bool cond, struct mlx5_ib_qp *qp) +{ + if (!(*flags & flag)) + return; + + if (cond) { + qp->flags |= flag; + *flags &= ~flag; + return; + } + + if (flag == MLX5_IB_QP_CREATE_WC_TEST) { + /* + * Special case, if condition didn't meet, it won't be error, + * just different in-kernel flow. + */ + *flags &= ~MLX5_IB_QP_CREATE_WC_TEST; + return; + } + mlx5_ib_dbg(dev, "Verbs create QP flag 0x%X is not supported\n", flag); +} + +static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + struct ib_qp_init_attr *attr) +{ + enum ib_qp_type qp_type = attr->qp_type; + struct mlx5_core_dev *mdev = dev->mdev; + int create_flags = attr->create_flags; + bool cond; + + if (qp->qp_sub_type == MLX5_IB_QPT_DCT) + return (create_flags) ? -EINVAL : 0; + + if (qp_type == IB_QPT_RAW_PACKET && attr->rwq_ind_tbl) + return (create_flags) ? -EINVAL : 0; + + process_create_flag(dev, &create_flags, + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, + MLX5_CAP_GEN(mdev, block_lb_mc), qp); + process_create_flag(dev, &create_flags, IB_QP_CREATE_CROSS_CHANNEL, + MLX5_CAP_GEN(mdev, cd), qp); + process_create_flag(dev, &create_flags, IB_QP_CREATE_MANAGED_SEND, + MLX5_CAP_GEN(mdev, cd), qp); + process_create_flag(dev, &create_flags, IB_QP_CREATE_MANAGED_RECV, + MLX5_CAP_GEN(mdev, cd), qp); + + if (qp_type == IB_QPT_UD) { + process_create_flag(dev, &create_flags, + IB_QP_CREATE_IPOIB_UD_LSO, + MLX5_CAP_GEN(mdev, ipoib_basic_offloads), + qp); + cond = MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_IB; + process_create_flag(dev, &create_flags, IB_QP_CREATE_SOURCE_QPN, + cond, qp); + } + + if (qp_type == IB_QPT_RAW_PACKET) { + cond = MLX5_CAP_GEN(mdev, eth_net_offloads) && + MLX5_CAP_ETH(mdev, scatter_fcs); + process_create_flag(dev, &create_flags, + IB_QP_CREATE_SCATTER_FCS, cond, qp); + + cond = MLX5_CAP_GEN(mdev, eth_net_offloads) && + MLX5_CAP_ETH(mdev, vlan_cap); + process_create_flag(dev, &create_flags, + IB_QP_CREATE_CVLAN_STRIPPING, cond, qp); + } + + process_create_flag(dev, &create_flags, + IB_QP_CREATE_PCI_WRITE_END_PADDING, + MLX5_CAP_GEN(mdev, end_pad), qp); + + process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_WC_TEST, + qp_type != MLX5_IB_QPT_REG_UMR, qp); + process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1, + true, qp); + + if (create_flags) + mlx5_ib_dbg(dev, "Create QP has unsupported flags 0x%X\n", + create_flags); + + return (create_flags) ? -EINVAL : 0; +} + static int create_driver_qp(struct ib_pd *pd, struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr, struct mlx5_ib_create_qp *ucmd, @@ -2769,6 +2768,9 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, if (err) goto free_qp; } + err = process_create_flags(dev, qp, init_attr); + if (err) + goto free_qp; if (init_attr->qp_type == IB_QPT_XRC_TGT) xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; From c95e6d53970254fa04a09c0fd79ae2cfa54cd1f5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:15 +0300 Subject: [PATCH 165/562] RDMA/mlx5: Use flags_en mechanism to mark QP created with WQE signature MLX5_QP_FLAG_SIGNATURE is exposed to the users but in the kernel the create_qp flow treated it differently from other MLX5_QP_FLAG_*s. Fix it by ditching wq_sig boolean variable and use general flag_en mechanism. Link: https://lore.kernel.org/r/20200427154636.381474-16-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 - drivers/infiniband/hw/mlx5/odp.c | 2 +- drivers/infiniband/hw/mlx5/qp.c | 36 +++++++++++++++++----------- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b144660a47e1..61a96c1dd125 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -446,7 +446,6 @@ struct mlx5_ib_qp { u32 flags; u8 port; u8 state; - int wq_sig; int scat_cqe; int max_inline_data; struct mlx5_bf bf; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 16af1105cfcf..e4759310c0e2 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1190,7 +1190,7 @@ static int mlx5_ib_mr_responder_pfault_handler_rq(struct mlx5_ib_dev *dev, struct mlx5_ib_wq *wq = &qp->rq; int wqe_size = 1 << wq->wqe_shift; - if (qp->wq_sig) { + if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) { mlx5_ib_err(dev, "ODP fault with WQE signatures is not supported\n"); return -EFAULT; } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 02eb03484b91..9d29b84242f9 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -41,9 +41,6 @@ #include "cmd.h" #include "qp.h" -/* not supported currently */ -static int wq_signature; - enum { MLX5_IB_ACK_REQ_FREQ = 8, }; @@ -392,17 +389,26 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, cap->max_recv_wr = 0; cap->max_recv_sge = 0; } else { + int wq_sig = !!(qp->flags_en & MLX5_QP_FLAG_SIGNATURE); + if (ucmd) { qp->rq.wqe_cnt = ucmd->rq_wqe_count; if (ucmd->rq_wqe_shift > BITS_PER_BYTE * sizeof(ucmd->rq_wqe_shift)) return -EINVAL; qp->rq.wqe_shift = ucmd->rq_wqe_shift; - if ((1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) < qp->wq_sig) + if ((1 << qp->rq.wqe_shift) / + sizeof(struct mlx5_wqe_data_seg) < + wq_sig) return -EINVAL; - qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig; + qp->rq.max_gs = + (1 << qp->rq.wqe_shift) / + sizeof(struct mlx5_wqe_data_seg) - + wq_sig; qp->rq.max_post = qp->rq.wqe_cnt; } else { - wqe_size = qp->wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : 0; + wqe_size = + wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : + 0; wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg); wqe_size = roundup_pow_of_two(wqe_size); wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size; @@ -416,7 +422,10 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, return -EINVAL; } qp->rq.wqe_shift = ilog2(wqe_size); - qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig; + qp->rq.max_gs = + (1 << qp->rq.wqe_shift) / + sizeof(struct mlx5_wqe_data_seg) - + wq_sig; qp->rq.max_post = qp->rq.wqe_cnt; } } @@ -2008,7 +2017,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (err) return err; - qp->wq_sig = !!(ucmd->flags & MLX5_QP_FLAG_SIGNATURE); + if (ucmd->flags & MLX5_QP_FLAG_SIGNATURE) + qp->flags_en |= MLX5_QP_FLAG_SIGNATURE; if (MLX5_CAP_GEN(dev->mdev, sctr_data_cqe)) qp->scat_cqe = !!(ucmd->flags & MLX5_QP_FLAG_SCATTER_CQE); @@ -2045,8 +2055,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, } qp->flags_en |= MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE; } - } else { - qp->wq_sig = !!wq_signature; } if (qp->flags & IB_QP_CREATE_SOURCE_QPN) @@ -2115,7 +2123,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, latency_sensitive, 1); - if (qp->wq_sig) + if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) MLX5_SET(qpc, qpc, wq_signature, 1); if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) @@ -4997,7 +5005,7 @@ static void finish_wqe(struct mlx5_ib_qp *qp, mlx5_opcode | ((u32)opmod << 24)); ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8)); ctrl->fm_ce_se |= fence; - if (unlikely(qp->wq_sig)) + if (unlikely(qp->flags_en & MLX5_QP_FLAG_SIGNATURE)) ctrl->signature = wq_sig(ctrl); qp->sq.wrid[idx] = wr_id; @@ -5449,7 +5457,7 @@ static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, } scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind); - if (qp->wq_sig) + if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) scat++; for (i = 0; i < wr->num_sge; i++) @@ -5461,7 +5469,7 @@ static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, scat[i].addr = 0; } - if (qp->wq_sig) { + if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) { sig = (struct mlx5_rwqe_sig *)scat; set_sig_seg(sig, (qp->rq.max_gs + 1) << 2); } From 90ecb37a751b6923bee846c4e19f73b943c6ffa1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:16 +0300 Subject: [PATCH 166/562] RDMA/mlx5: Change scatter CQE flag to be set like other vendor flags In similar way to wqe_sig, the scat_cqe was treated differently from other create QP vendor flags. Change it to be similar to other flags and use flags_en mechanism. Link: https://lore.kernel.org/r/20200427154636.381474-17-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 - drivers/infiniband/hw/mlx5/qp.c | 17 ++++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 61a96c1dd125..b6467cadc384 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -446,7 +446,6 @@ struct mlx5_ib_qp { u32 flags; u8 port; u8 state; - int scat_cqe; int max_inline_data; struct mlx5_bf bf; u8 has_rq:1; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 9d29b84242f9..6a4b20c71b40 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2019,9 +2019,10 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (ucmd->flags & MLX5_QP_FLAG_SIGNATURE) qp->flags_en |= MLX5_QP_FLAG_SIGNATURE; - if (MLX5_CAP_GEN(dev->mdev, sctr_data_cqe)) - qp->scat_cqe = - !!(ucmd->flags & MLX5_QP_FLAG_SCATTER_CQE); + if (ucmd->flags & MLX5_QP_FLAG_SCATTER_CQE && + MLX5_CAP_GEN(dev->mdev, sctr_data_cqe)) + qp->flags_en |= MLX5_QP_FLAG_SCATTER_CQE; + if (ucmd->flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) { if (init_attr->qp_type != IB_QPT_RAW_PACKET || !tunnel_offload_supported(mdev)) { @@ -2137,8 +2138,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, cd_slave_receive, 1); if (qp->flags_en & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE) MLX5_SET(qpc, qpc, req_e2e_credit_mode, 1); - if (qp->scat_cqe && (init_attr->qp_type == IB_QPT_RC || - init_attr->qp_type == IB_QPT_UC)) { + if ((qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) && + (init_attr->qp_type == IB_QPT_RC || + init_attr->qp_type == IB_QPT_UC)) { int rcqe_sz = rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq); @@ -2146,8 +2148,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE : MLX5_RES_SCAT_DATA32_CQE); } - if (qp->scat_cqe && (qp->qp_sub_type == MLX5_IB_QPT_DCI || - init_attr->qp_type == IB_QPT_RC)) + if ((qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) && + (qp->qp_sub_type == MLX5_IB_QPT_DCI || + init_attr->qp_type == IB_QPT_RC)) configure_requester_scat_cqe(dev, init_attr, ucmd, qpc); if (qp->rq.wqe_cnt) { From a8f3ea61e1c826a1f882b3fffbb052390ddee310 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:17 +0300 Subject: [PATCH 167/562] RDMA/mlx5: Return all configured create flags through query QP The "flags" field in struct mlx5_ib_qp contains all UAPI flags configured at the create QP stage. Return all the data as is without masking. Link: https://lore.kernel.org/r/20200427154636.381474-18-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/qp.c | 13 +------------ 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b6467cadc384..9b2baf119823 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -443,6 +443,7 @@ struct mlx5_ib_qp { /* serialize qp state modifications */ struct mutex mutex; + /* cached variant of create_flags from struct ib_qp_init_attr */ u32 flags; u8 port; u8 state; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 6a4b20c71b40..f0385965a694 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -5878,18 +5878,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_init_attr->cap = qp_attr->cap; - qp_init_attr->create_flags = 0; - if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) - qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; - - if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL) - qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL; - if (qp->flags & IB_QP_CREATE_MANAGED_SEND) - qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND; - if (qp->flags & IB_QP_CREATE_MANAGED_RECV) - qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV; - if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1) - qp_init_attr->create_flags |= MLX5_IB_QP_CREATE_SQPN_QP1; + qp_init_attr->create_flags = qp->flags; qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; From 37518fa49f764516ba68fcc6ec933066bb545276 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:18 +0300 Subject: [PATCH 168/562] RDMA/mlx5: Process all vendor flags in one place Check that vendor flags provided through ucmd are valid. Link: https://lore.kernel.org/r/20200427154636.381474-19-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 158 +++++++++++++++----------------- 1 file changed, 72 insertions(+), 86 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index f0385965a694..2673678f1899 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1430,13 +1430,6 @@ static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev, mlx5_core_destroy_rq_tracked(dev, &rq->base.mqp); } -static bool tunnel_offload_supported(struct mlx5_core_dev *dev) -{ - return (MLX5_CAP_ETH(dev, tunnel_stateless_vxlan) || - MLX5_CAP_ETH(dev, tunnel_stateless_gre) || - MLX5_CAP_ETH(dev, tunnel_stateless_geneve_rx)); -} - static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, u32 qp_flags_en, @@ -1693,27 +1686,20 @@ static int create_rss_raw_qp_tir(struct ib_pd *pd, struct mlx5_ib_qp *qp, return -EOPNOTSUPP; } - if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS && - !tunnel_offload_supported(dev->mdev)) { - mlx5_ib_dbg(dev, "tunnel offloads isn't supported\n"); - return -EOPNOTSUPP; - } - if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER && !(ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)) { mlx5_ib_dbg(dev, "Tunnel offloads must be set for inner RSS\n"); return -EOPNOTSUPP; } - if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC || dev->is_rep) { - lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; + if (dev->is_rep) qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC; - } - if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) { + if (qp->flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC) + lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; + + if (qp->flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST; - qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC; - } err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); if (err) { @@ -1959,11 +1945,6 @@ static int get_atomic_mode(struct mlx5_ib_dev *dev, return atomic_mode; } -static inline bool check_flags_mask(uint64_t input, uint64_t supported) -{ - return (input & ~supported) == 0; -} - static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct mlx5_ib_create_qp *ucmd, @@ -1999,63 +1980,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; if (udata) { - if (!check_flags_mask(ucmd->flags, - MLX5_QP_FLAG_ALLOW_SCATTER_CQE | - MLX5_QP_FLAG_BFREG_INDEX | - MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE | - MLX5_QP_FLAG_SCATTER_CQE | - MLX5_QP_FLAG_SIGNATURE | - MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC | - MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC | - MLX5_QP_FLAG_TUNNEL_OFFLOADS | - MLX5_QP_FLAG_UAR_PAGE_INDEX | - MLX5_QP_FLAG_TYPE_DCI | - MLX5_QP_FLAG_TYPE_DCT)) - return -EINVAL; - err = get_qp_user_index(ucontext, ucmd, udata->inlen, &uidx); if (err) return err; - - if (ucmd->flags & MLX5_QP_FLAG_SIGNATURE) - qp->flags_en |= MLX5_QP_FLAG_SIGNATURE; - if (ucmd->flags & MLX5_QP_FLAG_SCATTER_CQE && - MLX5_CAP_GEN(dev->mdev, sctr_data_cqe)) - qp->flags_en |= MLX5_QP_FLAG_SCATTER_CQE; - - if (ucmd->flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) { - if (init_attr->qp_type != IB_QPT_RAW_PACKET || - !tunnel_offload_supported(mdev)) { - mlx5_ib_dbg(dev, "Tunnel offload isn't supported\n"); - return -EOPNOTSUPP; - } - qp->flags_en |= MLX5_QP_FLAG_TUNNEL_OFFLOADS; - } - - if (ucmd->flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC) { - if (init_attr->qp_type != IB_QPT_RAW_PACKET) { - mlx5_ib_dbg(dev, "Self-LB UC isn't supported\n"); - return -EOPNOTSUPP; - } - qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC; - } - - if (ucmd->flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) { - if (init_attr->qp_type != IB_QPT_RAW_PACKET) { - mlx5_ib_dbg(dev, "Self-LB UM isn't supported\n"); - return -EOPNOTSUPP; - } - qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC; - } - - if (ucmd->flags & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE) { - if (init_attr->qp_type != IB_QPT_RC || - !MLX5_CAP_GEN(dev->mdev, qp_packet_based)) { - mlx5_ib_dbg(dev, "packet based credit mode isn't supported\n"); - return -EOPNOTSUPP; - } - qp->flags_en |= MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE; - } } if (qp->flags & IB_QP_CREATE_SOURCE_QPN) @@ -2474,7 +2401,7 @@ static int create_dct(struct ib_pd *pd, struct mlx5_ib_qp *qp, MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key); MLX5_SET(dctc, dctc, user_index, uidx); - if (ucmd->flags & MLX5_QP_FLAG_SCATTER_CQE) { + if (qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) { int rcqe_sz = mlx5_ib_get_cqe_size(attr->recv_cq); if (rcqe_sz == 128) @@ -2577,22 +2504,81 @@ static int check_valid_flow(struct mlx5_ib_dev *dev, struct ib_pd *pd, return 0; } -static int process_vendor_flags(struct mlx5_ib_qp *qp, +static void process_vendor_flag(struct mlx5_ib_dev *dev, int *flags, int flag, + bool cond, struct mlx5_ib_qp *qp) +{ + if (!(*flags & flag)) + return; + + if (cond) { + qp->flags_en |= flag; + *flags &= ~flag; + return; + } + + if (flag == MLX5_QP_FLAG_SCATTER_CQE) { + /* + * We don't return error if this flag was provided, + * and mlx5 doesn't have right capability. + */ + *flags &= ~MLX5_QP_FLAG_SCATTER_CQE; + return; + } + mlx5_ib_dbg(dev, "Vendor create QP flag 0x%X is not supported\n", flag); +} + +static int process_vendor_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr, struct mlx5_ib_create_qp *ucmd) { - switch (ucmd->flags & (MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI)) { + struct mlx5_core_dev *mdev = dev->mdev; + int flags = ucmd->flags; + bool cond; + + switch (flags & (MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI)) { case MLX5_QP_FLAG_TYPE_DCI: qp->qp_sub_type = MLX5_IB_QPT_DCI; break; case MLX5_QP_FLAG_TYPE_DCT: qp->qp_sub_type = MLX5_IB_QPT_DCT; - break; + fallthrough; default: - return -EINVAL; + break; } - return 0; + if (attr->qp_type == IB_QPT_DRIVER && !qp->qp_sub_type) + return -EINVAL; + + process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCI, true, qp); + process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCT, true, qp); + + process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SIGNATURE, true, qp); + process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SCATTER_CQE, + MLX5_CAP_GEN(mdev, sctr_data_cqe), qp); + + if (attr->qp_type == IB_QPT_RAW_PACKET) { + cond = MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) || + MLX5_CAP_ETH(mdev, tunnel_stateless_gre) || + MLX5_CAP_ETH(mdev, tunnel_stateless_geneve_rx); + process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TUNNEL_OFFLOADS, + cond, qp); + process_vendor_flag(dev, &flags, + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC, true, + qp); + process_vendor_flag(dev, &flags, + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC, true, + qp); + } + + if (attr->qp_type == IB_QPT_RC) + process_vendor_flag(dev, &flags, + MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE, + MLX5_CAP_GEN(mdev, qp_packet_based), qp); + + if (flags) + mlx5_ib_dbg(dev, "udata has unsupported flags 0x%X\n", flags); + + return (flags) ? -EINVAL : 0; } static void process_create_flag(struct mlx5_ib_dev *dev, int *flags, int flag, @@ -2774,8 +2760,8 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, if (!qp) return ERR_PTR(-ENOMEM); - if (init_attr->qp_type == IB_QPT_DRIVER) { - err = process_vendor_flags(qp, init_attr, &ucmd); + if (udata) { + err = process_vendor_flags(dev, qp, init_attr, &ucmd); if (err) goto free_qp; } From 6f793485fc03fe832e1abcf0682efd7b4419ae2d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 20 Apr 2020 20:27:50 +0300 Subject: [PATCH 169/562] gpio: pca953x: Rewrite ->get_multiple() function The commit 96d7c7b3e654 ("gpio: gpio-pca953x, Add get_multiple function") basically did everything wrong from style and code reuse perspective, i.e. - it didn't utilize existing PCA953x internal helpers - it didn't utilize bitmap API - it misses the point that ilog2(), besides that BANK_SFT is useless, can be used in macros - it has indentation issues. Rewrite the function completely. Cc: Paul Thomas Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 41 ++++++++++--------------------------- 1 file changed, 11 insertions(+), 30 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 60ae18e4b5f5..41be681ae77c 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -115,7 +115,6 @@ MODULE_DEVICE_TABLE(acpi, pca953x_acpi_ids); #define MAX_BANK 5 #define BANK_SZ 8 -#define BANK_SFT 3 /* ilog2(BANK_SZ) */ #define MAX_LINE (MAX_BANK * BANK_SZ) #define NBANK(chip) DIV_ROUND_UP(chip->gpio_chip.ngpio, BANK_SZ) @@ -469,38 +468,20 @@ static int pca953x_gpio_get_direction(struct gpio_chip *gc, unsigned off) } static int pca953x_gpio_get_multiple(struct gpio_chip *gc, - unsigned long *mask, unsigned long *bits) + unsigned long *mask, unsigned long *bits) { struct pca953x_chip *chip = gpiochip_get_data(gc); - unsigned int reg_val; - int offset, value, i, ret = 0; - u8 inreg; + DECLARE_BITMAP(reg_val, MAX_LINE); + int ret; - /* Force offset outside the range of i so that - * at least the first relevant register is read - */ - offset = gc->ngpio; - for_each_set_bit(i, mask, gc->ngpio) { - /* whenever i goes into a new bank update inreg - * and read the register - */ - if ((offset >> BANK_SFT) != (i >> BANK_SFT)) { - offset = i; - inreg = pca953x_recalc_addr(chip, chip->regs->input, - offset, true, false); - mutex_lock(&chip->i2c_lock); - ret = regmap_read(chip->regmap, inreg, ®_val); - mutex_unlock(&chip->i2c_lock); - if (ret < 0) - return ret; - } - /* reg_val is relative to the last read byte, - * so only shift the relative bits - */ - value = (reg_val >> (i % 8)) & 0x01; - __assign_bit(i, bits, value); - } - return ret; + mutex_lock(&chip->i2c_lock); + ret = pca953x_read_regs(chip, chip->regs->input, reg_val); + mutex_unlock(&chip->i2c_lock); + if (ret) + return ret; + + bitmap_replace(bits, bits, reg_val, mask, gc->ngpio); + return 0; } static void pca953x_gpio_set_multiple(struct gpio_chip *gc, From bcf41dc480b179bfb669a232080a2e26dc7294b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 20 Apr 2020 20:27:51 +0300 Subject: [PATCH 170/562] gpio: pca953x: fix handling of automatic address incrementing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some of the chips supported by the pca953x driver need the most significant bit in the address word set to automatically increment the address pointer on subsequent reads and writes (example: PCA9505). With this bit unset the same register is read multiple times on a multi-byte read sequence. Other chips must not have this bit set and autoincrement always (example: PCA9555). Up to now this AI bit was interpreted to be part of the address, which resulted in inconsistent regmap caching when a register was written with AI set and then read without it. This happened for the PCA9505 in pca953x_gpio_set_multiple() where pca953x_read_regs() bulk read from the cache for registers 0x8-0xc and then wrote to registers 0x88-0x8c. (Side note: reading 5 values from offset 0x8 yiels OP0 5 times because AI must be set to get OP0-OP4, which is another bug that is resolved here as a by-product.) The same problem happens when calls to gpio_set_value() and gpio_set_array_value() were mixed. With this patch the AI bit is always set for chips that support it. This works as there are no code locations that make use of the behaviour with AI unset (for the chips that support it). Note that the call to pca953x_setup_gpio() had to be done a bit earlier to make the NBANK macro work. The history of this bug is a bit complicated. Commit b32cecb46bdc ("gpio: pca953x: Extract the register address mangling to single function") changed which chips and functions are affected. Commit 3b00691cc46a ("gpio: pca953x: hack to fix 24 bit gpio expanders") used some duct tape to make the driver at least appear to work. Commit 49427232764d ("gpio: pca953x: Perform basic regmap conversion") introduced the caching. Commit b4818afeacbd ("gpio: pca953x: Add set_multiple to allow multiple bits to be set in one write.") introduced the .set_multiple() callback which didn't work for chips that need the AI bit which was fixed later for some chips in 8958262af3fb ("gpio: pca953x: Repair multi-byte IO address increment on PCA9575"). So I'm sorry, I don't know which commit I should pick for a Fixes: line. Tested-by: Marcel Gudert Signed-off-by: Uwe Kleine-König Tested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 44 +++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 41be681ae77c..590b07236637 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -308,8 +308,22 @@ static const struct regmap_config pca953x_i2c_regmap = { .disable_locking = true, .cache_type = REGCACHE_RBTREE, - /* REVISIT: should be 0x7f but some 24 bit chips use REG_ADDR_AI */ - .max_register = 0xff, + .max_register = 0x7f, +}; + +static const struct regmap_config pca953x_ai_i2c_regmap = { + .reg_bits = 8, + .val_bits = 8, + + .read_flag_mask = REG_ADDR_AI, + .write_flag_mask = REG_ADDR_AI, + + .readable_reg = pca953x_readable_register, + .writeable_reg = pca953x_writeable_register, + .volatile_reg = pca953x_volatile_register, + + .cache_type = REGCACHE_RBTREE, + .max_register = 0x7f, }; static u8 pca953x_recalc_addr(struct pca953x_chip *chip, int reg, int off, @@ -320,18 +334,6 @@ static u8 pca953x_recalc_addr(struct pca953x_chip *chip, int reg, int off, int pinctrl = (reg & PCAL_PINCTRL_MASK) << 1; u8 regaddr = pinctrl | addr | (off / BANK_SZ); - /* Single byte read doesn't need AI bit set. */ - if (!addrinc) - return regaddr; - - /* Chips with 24 and more GPIOs always support Auto Increment */ - if (write && NBANK(chip) > 2) - regaddr |= REG_ADDR_AI; - - /* PCA9575 needs address-increment on multi-byte writes */ - if (PCA_CHIP_TYPE(chip->driver_data) == PCA957X_TYPE) - regaddr |= REG_ADDR_AI; - return regaddr; } @@ -882,6 +884,7 @@ static int pca953x_probe(struct i2c_client *client, int ret; u32 invert = 0; struct regulator *reg; + const struct regmap_config *regmap_config; chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); if (chip == NULL) @@ -944,7 +947,17 @@ static int pca953x_probe(struct i2c_client *client, i2c_set_clientdata(client, chip); - chip->regmap = devm_regmap_init_i2c(client, &pca953x_i2c_regmap); + pca953x_setup_gpio(chip, chip->driver_data & PCA_GPIO_MASK); + + if (NBANK(chip) > 2 || PCA_CHIP_TYPE(chip->driver_data) == PCA957X_TYPE) { + dev_info(&client->dev, "using AI\n"); + regmap_config = &pca953x_ai_i2c_regmap; + } else { + dev_info(&client->dev, "using no AI\n"); + regmap_config = &pca953x_i2c_regmap; + } + + chip->regmap = devm_regmap_init_i2c(client, regmap_config); if (IS_ERR(chip->regmap)) { ret = PTR_ERR(chip->regmap); goto err_exit; @@ -975,7 +988,6 @@ static int pca953x_probe(struct i2c_client *client, /* initialize cached registers from their original values. * we can't share this chip with another i2c master. */ - pca953x_setup_gpio(chip, chip->driver_data & PCA_GPIO_MASK); if (PCA_CHIP_TYPE(chip->driver_data) == PCA953X_TYPE) { chip->regs = &pca953x_regs; From 6fdeb6cbe1ef3bccd2c5e2b3427a06e9cda1efc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 20 Apr 2020 20:27:52 +0300 Subject: [PATCH 171/562] gpio: pca953x: drop unused parameters of pca953x_recalc_addr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the previous patch the two last parameters of pca953x_recalc_addr() are unused and so can be dropped. Tested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Signed-off-by: Uwe Kleine-König Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 590b07236637..06d6af60e6b7 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -326,8 +326,7 @@ static const struct regmap_config pca953x_ai_i2c_regmap = { .max_register = 0x7f, }; -static u8 pca953x_recalc_addr(struct pca953x_chip *chip, int reg, int off, - bool write, bool addrinc) +static u8 pca953x_recalc_addr(struct pca953x_chip *chip, int reg, int off) { int bank_shift = pca953x_bank_shift(chip); int addr = (reg & PCAL_GPIO_MASK) << bank_shift; @@ -339,7 +338,7 @@ static u8 pca953x_recalc_addr(struct pca953x_chip *chip, int reg, int off, static int pca953x_write_regs(struct pca953x_chip *chip, int reg, unsigned long *val) { - u8 regaddr = pca953x_recalc_addr(chip, reg, 0, true, true); + u8 regaddr = pca953x_recalc_addr(chip, reg, 0); u8 value[MAX_BANK]; int i, ret; @@ -357,7 +356,7 @@ static int pca953x_write_regs(struct pca953x_chip *chip, int reg, unsigned long static int pca953x_read_regs(struct pca953x_chip *chip, int reg, unsigned long *val) { - u8 regaddr = pca953x_recalc_addr(chip, reg, 0, false, true); + u8 regaddr = pca953x_recalc_addr(chip, reg, 0); u8 value[MAX_BANK]; int i, ret; @@ -376,8 +375,7 @@ static int pca953x_read_regs(struct pca953x_chip *chip, int reg, unsigned long * static int pca953x_gpio_direction_input(struct gpio_chip *gc, unsigned off) { struct pca953x_chip *chip = gpiochip_get_data(gc); - u8 dirreg = pca953x_recalc_addr(chip, chip->regs->direction, off, - true, false); + u8 dirreg = pca953x_recalc_addr(chip, chip->regs->direction, off); u8 bit = BIT(off % BANK_SZ); int ret; @@ -391,10 +389,8 @@ static int pca953x_gpio_direction_output(struct gpio_chip *gc, unsigned off, int val) { struct pca953x_chip *chip = gpiochip_get_data(gc); - u8 dirreg = pca953x_recalc_addr(chip, chip->regs->direction, off, - true, false); - u8 outreg = pca953x_recalc_addr(chip, chip->regs->output, off, - true, false); + u8 dirreg = pca953x_recalc_addr(chip, chip->regs->direction, off); + u8 outreg = pca953x_recalc_addr(chip, chip->regs->output, off); u8 bit = BIT(off % BANK_SZ); int ret; @@ -414,8 +410,7 @@ static int pca953x_gpio_direction_output(struct gpio_chip *gc, static int pca953x_gpio_get_value(struct gpio_chip *gc, unsigned off) { struct pca953x_chip *chip = gpiochip_get_data(gc); - u8 inreg = pca953x_recalc_addr(chip, chip->regs->input, off, - true, false); + u8 inreg = pca953x_recalc_addr(chip, chip->regs->input, off); u8 bit = BIT(off % BANK_SZ); u32 reg_val; int ret; @@ -439,8 +434,7 @@ static int pca953x_gpio_get_value(struct gpio_chip *gc, unsigned off) static void pca953x_gpio_set_value(struct gpio_chip *gc, unsigned off, int val) { struct pca953x_chip *chip = gpiochip_get_data(gc); - u8 outreg = pca953x_recalc_addr(chip, chip->regs->output, off, - true, false); + u8 outreg = pca953x_recalc_addr(chip, chip->regs->output, off); u8 bit = BIT(off % BANK_SZ); mutex_lock(&chip->i2c_lock); @@ -451,8 +445,7 @@ static void pca953x_gpio_set_value(struct gpio_chip *gc, unsigned off, int val) static int pca953x_gpio_get_direction(struct gpio_chip *gc, unsigned off) { struct pca953x_chip *chip = gpiochip_get_data(gc); - u8 dirreg = pca953x_recalc_addr(chip, chip->regs->direction, off, - true, false); + u8 dirreg = pca953x_recalc_addr(chip, chip->regs->direction, off); u8 bit = BIT(off % BANK_SZ); u32 reg_val; int ret; @@ -509,10 +502,8 @@ static int pca953x_gpio_set_pull_up_down(struct pca953x_chip *chip, unsigned int offset, unsigned long config) { - u8 pull_en_reg = pca953x_recalc_addr(chip, PCAL953X_PULL_EN, offset, - true, false); - u8 pull_sel_reg = pca953x_recalc_addr(chip, PCAL953X_PULL_SEL, offset, - true, false); + u8 pull_en_reg = pca953x_recalc_addr(chip, PCAL953X_PULL_EN, offset); + u8 pull_sel_reg = pca953x_recalc_addr(chip, PCAL953X_PULL_SEL, offset); u8 bit = BIT(offset % BANK_SZ); int ret; From 36b5215436ad115551e12478ceb3be4a2279e415 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 28 Apr 2020 17:23:27 -0700 Subject: [PATCH 172/562] gpio: Document proper return value for gpio drivers The legacy defines GPIOF_DIR_XXX are only for consumers. Document the proper ones. Also: don't use "_XXX" since that's harder to find with "git grep". Just list both of the values. Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20200428172322.1.I396f351e364f3c09df7c7606e79abefb8682c092@changeid Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index b8fc92c177eb..7b5f5681b7e4 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -267,9 +267,9 @@ struct gpio_irq_chip { * @free: optional hook for chip-specific deactivation, such as * disabling module power and clock; may sleep * @get_direction: returns direction for signal "offset", 0=out, 1=in, - * (same as GPIOF_DIR_XXX), or negative error. - * It is recommended to always implement this function, even on - * input-only or output-only gpio chips. + * (same as GPIO_LINE_DIRECTION_OUT / GPIO_LINE_DIRECTION_IN), + * or negative error. It is recommended to always implement this + * function, even on input-only or output-only gpio chips. * @direction_input: configures signal "offset" as input, or returns error * This can be omitted on input-only or output-only gpio chips. * @direction_output: configures signal "offset" as output, or returns error From 8d0910121b070b4edf4239d91cf9a4523b33ca0c Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 28 Apr 2020 17:23:28 -0700 Subject: [PATCH 173/562] gpio: Make "offset" and "unsigned int", not just "unsigned" When I copied the function prototypes from the GPIO header file into my own driver, checkpatch yelled at me saying that I shouldn't use use "unsigned" but instead should say "unsigned int". Let's make the header file use "unsigned int" so others who copy like I did won't get yelled at. Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20200428172322.2.Iacb3c8152c3cf9015a91308678155a578b0cc050@changeid Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 7b5f5681b7e4..8c41ae41b6bb 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -349,30 +349,30 @@ struct gpio_chip { struct module *owner; int (*request)(struct gpio_chip *gc, - unsigned offset); + unsigned int offset); void (*free)(struct gpio_chip *gc, - unsigned offset); + unsigned int offset); int (*get_direction)(struct gpio_chip *gc, - unsigned offset); + unsigned int offset); int (*direction_input)(struct gpio_chip *gc, - unsigned offset); + unsigned int offset); int (*direction_output)(struct gpio_chip *gc, - unsigned offset, int value); + unsigned int offset, int value); int (*get)(struct gpio_chip *gc, - unsigned offset); + unsigned int offset); int (*get_multiple)(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits); void (*set)(struct gpio_chip *gc, - unsigned offset, int value); + unsigned int offset, int value); void (*set_multiple)(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits); int (*set_config)(struct gpio_chip *gc, - unsigned offset, + unsigned int offset, unsigned long config); int (*to_irq)(struct gpio_chip *gc, - unsigned offset); + unsigned int offset); void (*dbg_show)(struct seq_file *s, struct gpio_chip *gc); @@ -459,7 +459,7 @@ struct gpio_chip { }; extern const char *gpiochip_is_requested(struct gpio_chip *gc, - unsigned offset); + unsigned int offset); /* add/remove chips */ extern int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, @@ -657,9 +657,9 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gc, } #endif /* CONFIG_LOCKDEP */ -int gpiochip_generic_request(struct gpio_chip *gc, unsigned offset); -void gpiochip_generic_free(struct gpio_chip *gc, unsigned offset); -int gpiochip_generic_config(struct gpio_chip *gc, unsigned offset, +int gpiochip_generic_request(struct gpio_chip *gc, unsigned int offset); +void gpiochip_generic_free(struct gpio_chip *gc, unsigned int offset); +int gpiochip_generic_config(struct gpio_chip *gc, unsigned int offset, unsigned long config); /** From 20a66f2bf280277ab5bb22e27445153b4eb0ac88 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 28 Apr 2020 19:45:55 +0900 Subject: [PATCH 174/562] scsi: core: free sgtables in case command setup fails In case scsi_setup_fs_cmnd() fails we're not freeing the sgtables allocated by scsi_init_io(), thus we leak the allocated memory. Free the sgtables allocated by scsi_init_io() in case scsi_setup_fs_cmnd() fails. Technically scsi_setup_scsi_cmnd() does not suffer from this problem as it can only fail if scsi_init_io() fails, so it does not have sgtables allocated. But to maintain symmetry and as a measure of defensive programming, free the sgtables on scsi_setup_scsi_cmnd() failure as well. scsi_mq_free_sgtables() has safeguards against double-freeing of memory so this is safe to do. While we're at it, rename scsi_mq_free_sgtables() to scsi_free_sgtables(). Link: https://bugzilla.kernel.org/show_bug.cgi?id=205595 Link: https://lore.kernel.org/r/20200428104605.8143-2-johannes.thumshirn@wdc.com Reviewed-by: Christoph Hellwig Reviewed-by: Daniel Wagner Reviewed-by: Hannes Reinecke Signed-off-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 53b9ff12e030..01d229ea4e1c 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -531,7 +531,7 @@ static void scsi_uninit_cmd(struct scsi_cmnd *cmd) } } -static void scsi_mq_free_sgtables(struct scsi_cmnd *cmd) +static void scsi_free_sgtables(struct scsi_cmnd *cmd) { if (cmd->sdb.table.nents) sg_free_table_chained(&cmd->sdb.table, @@ -543,7 +543,7 @@ static void scsi_mq_free_sgtables(struct scsi_cmnd *cmd) static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd) { - scsi_mq_free_sgtables(cmd); + scsi_free_sgtables(cmd); scsi_uninit_cmd(cmd); } @@ -1032,7 +1032,7 @@ blk_status_t scsi_init_io(struct scsi_cmnd *cmd) return BLK_STS_OK; out_free_sgtables: - scsi_mq_free_sgtables(cmd); + scsi_free_sgtables(cmd); return ret; } EXPORT_SYMBOL(scsi_init_io); @@ -1163,6 +1163,7 @@ static blk_status_t scsi_setup_cmnd(struct scsi_device *sdev, struct request *req) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); + blk_status_t ret; if (!blk_rq_bytes(req)) cmd->sc_data_direction = DMA_NONE; @@ -1172,9 +1173,14 @@ static blk_status_t scsi_setup_cmnd(struct scsi_device *sdev, cmd->sc_data_direction = DMA_FROM_DEVICE; if (blk_rq_is_scsi(req)) - return scsi_setup_scsi_cmnd(sdev, req); + ret = scsi_setup_scsi_cmnd(sdev, req); else - return scsi_setup_fs_cmnd(sdev, req); + ret = scsi_setup_fs_cmnd(sdev, req); + + if (ret != BLK_STS_OK) + scsi_free_sgtables(cmd); + + return ret; } static blk_status_t From 78b874b7cbf09fbfadfa5f18a347ebef7bbb49fe Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 28 Apr 2020 11:20:13 +0100 Subject: [PATCH 175/562] scsi: qla2xxx: make 1-bit bit-fields unsigned int The bitfields mpi_fw_dump_reading and mpi_fw_dumped are currently signed which is not recommended as the representation is an implementation defined behaviour. Fix this by making the bit-fields unsigned ints. Link: https://lore.kernel.org/r/20200428102013.1040598-1-colin.king@canonical.com Fixes: cbb01c2f2f63 ("scsi: qla2xxx: Fix MPI failure AEN (8200) handling") Reviewed-by: Bart Van Assche Reviewed-by: Himanshu Madhani Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_def.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index daa9e936887b..172ea4e5887d 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -4248,8 +4248,8 @@ struct qla_hw_data { int fw_dump_reading; void *mpi_fw_dump; u32 mpi_fw_dump_len; - int mpi_fw_dump_reading:1; - int mpi_fw_dumped:1; + unsigned int mpi_fw_dump_reading:1; + unsigned int mpi_fw_dumped:1; int prev_minidump_failed; dma_addr_t eft_dma; void *eft; From 4a4c0cfb4be74e216dd4446b254594707455bfc6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 28 Apr 2020 16:19:39 +0300 Subject: [PATCH 176/562] scsi: qedi: Check for buffer overflow in qedi_set_path() Smatch complains that the "path_data->handle" variable is user controlled. It comes from iscsi_set_path() so that seems possible. It's harmless to add a limit check. The qedi->ep_tbl[] array has qedi->max_active_conns elements (which is always ISCSI_MAX_SESS_PER_HBA (4096) elements). The array is allocated in the qedi_cm_alloc_mem() function. Link: https://lore.kernel.org/r/20200428131939.GA696531@mwanda Fixes: ace7f46ba5fd ("scsi: qedi: Add QLogic FastLinQ offload iSCSI driver framework.") Acked-by: Manish Rangankar Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_iscsi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index b867a143d263..425e665ec08b 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -1221,6 +1221,10 @@ static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data) } iscsi_cid = (u32)path_data->handle; + if (iscsi_cid >= qedi->max_active_conns) { + ret = -EINVAL; + goto set_path_exit; + } qedi_ep = qedi->ep_tbl[iscsi_cid]; QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, "iscsi_cid=0x%x, qedi_ep=%p\n", iscsi_cid, qedi_ep); From 6f41f08c88c59ae6e32d32563c9af73ae2d71ece Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Wed, 29 Apr 2020 20:10:18 +0800 Subject: [PATCH 177/562] scsi: dpt_i2o: Remove always false 'chan < 0' statement The channel index is represented by an unsigned variable 'u32 chan'. We don't need to check whether it is less than zero, the 'chan < 0' statement is always false. Remove it. Link: https://lore.kernel.org/r/1588162218-61757-1-git-send-email-wangxiongfeng2@huawei.com Reported-by: Hulk Robot Signed-off-by: Xiongfeng Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/dpt_i2o.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c index 02dff3a684e0..2cf889512ece 100644 --- a/drivers/scsi/dpt_i2o.c +++ b/drivers/scsi/dpt_i2o.c @@ -1120,7 +1120,7 @@ static struct adpt_device* adpt_find_device(adpt_hba* pHba, u32 chan, u32 id, u6 { struct adpt_device* d; - if(chan < 0 || chan >= MAX_CHANNEL) + if (chan >= MAX_CHANNEL) return NULL; d = pHba->channel[chan].device[id]; From 3ae7e66a019e18896c46fcbb7ae28bfc343331c4 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:19 +0300 Subject: [PATCH 178/562] RDMA/mlx5: Delete unsupported QP types There is no need to explicitly check unsupported QP types, rely on "default" keyword in switch-case to catch them. Link: https://lore.kernel.org/r/20200427154636.381474-20-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 2673678f1899..5e156b02816a 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -760,10 +760,7 @@ static int to_mlx5_st(enum ib_qp_type type) case IB_QPT_SMI: return MLX5_QP_ST_QP0; case MLX5_IB_QPT_HW_GSI: return MLX5_QP_ST_QP1; case MLX5_IB_QPT_DCI: return MLX5_QP_ST_DCI; - case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6; - case IB_QPT_RAW_PACKET: - case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE; - case IB_QPT_MAX: + case IB_QPT_RAW_PACKET: return MLX5_QP_ST_RAW_ETHERTYPE; default: return -EINVAL; } } @@ -2282,14 +2279,10 @@ static void get_cqs(enum ib_qp_type qp_type, case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_UD: - case IB_QPT_RAW_IPV6: - case IB_QPT_RAW_ETHERTYPE: case IB_QPT_RAW_PACKET: *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL; *recv_cq = ib_recv_cq ? to_mcq(ib_recv_cq) : NULL; break; - - case IB_QPT_MAX: default: *send_cq = NULL; *recv_cq = NULL; @@ -2434,9 +2427,6 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr) case IB_QPT_DRIVER: case IB_QPT_GSI: return 0; - case IB_QPT_RAW_IPV6: - case IB_QPT_RAW_ETHERTYPE: - case IB_QPT_MAX: default: goto out; } From 7aede1a25f4b84318e8a266d7b830a5ed554e370 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:20 +0300 Subject: [PATCH 179/562] RDMA/mlx5: Store QP type in the vendor QP structure QP type is stored in the IB/core QP struct, but it doesn't have all the needed information, like internal QP type used in the driver itself. Update mlx5_ib to have cached QP type which includes both IBTA and Mellanox specific one. Such change allows us to make even further cleanup of QP creation flow. Link: https://lore.kernel.org/r/20200427154636.381474-21-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 8 +- drivers/infiniband/hw/mlx5/odp.c | 3 +- drivers/infiniband/hw/mlx5/qp.c | 136 +++++++++++++-------------- 3 files changed, 74 insertions(+), 73 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 9b2baf119823..82ea01a211dd 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -465,8 +465,12 @@ struct mlx5_ib_qp { struct mlx5_rate_limit rl; u32 underlay_qpn; u32 flags_en; - /* storage for qp sub type when core qp type is IB_QPT_DRIVER */ - enum ib_qp_type qp_sub_type; + /* + * IB/core doesn't store low-level QP types, so + * store both MLX and IBTA types in the field below. + * IB_QPT_DRIVER will be break to DCI/DCT subtypes. + */ + enum ib_qp_type type; /* A flag to indicate if there's a new counter is configured * but not take effective */ diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index e4759310c0e2..70577d546567 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1136,8 +1136,7 @@ static int mlx5_ib_mr_initiator_pfault_handler( if (qp->ibqp.qp_type == IB_QPT_XRC_INI) *wqe += sizeof(struct mlx5_wqe_xrc_seg); - if (qp->ibqp.qp_type == IB_QPT_UD || - qp->qp_sub_type == MLX5_IB_QPT_DCI) { + if (qp->type == IB_QPT_UD || qp->type == MLX5_IB_QPT_DCI) { av = *wqe; if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV)) *wqe += sizeof(struct mlx5_av); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 5e156b02816a..0d3f4bafe448 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1227,14 +1227,13 @@ static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) { - if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) || - (qp->qp_sub_type == MLX5_IB_QPT_DCI) || - (attr->qp_type == IB_QPT_XRC_INI)) + if (attr->srq || (qp->type == IB_QPT_XRC_TGT) || + (qp->type == MLX5_IB_QPT_DCI) || (qp->type == IB_QPT_XRC_INI)) return MLX5_SRQ_RQ; else if (!qp->has_rq) return MLX5_ZERO_LEN_RQ; - else - return MLX5_NON_ZERO_RQ; + + return MLX5_NON_ZERO_RQ; } static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, @@ -1967,9 +1966,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); - mlx5_st = to_mlx5_st((init_attr->qp_type != IB_QPT_DRIVER) ? - init_attr->qp_type : - qp->qp_sub_type); + mlx5_st = to_mlx5_st(qp->type); if (mlx5_st < 0) return -EINVAL; @@ -2073,8 +2070,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_RES_SCAT_DATA32_CQE); } if ((qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) && - (qp->qp_sub_type == MLX5_IB_QPT_DCI || - init_attr->qp_type == IB_QPT_RC)) + (qp->type == MLX5_IB_QPT_DCI || qp->type == IB_QPT_RC)) configure_requester_scat_cqe(dev, init_attr, ucmd, qpc); if (qp->rq.wqe_cnt) { @@ -2166,7 +2162,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, base->container_mibqp = qp; base->mqp.event = mlx5_ib_qp_event; - get_cqs(init_attr->qp_type, init_attr->send_cq, init_attr->recv_cq, + get_cqs(qp->type, init_attr->send_cq, init_attr->recv_cq, &send_cq, &recv_cq); spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); mlx5_ib_lock_cqs(send_cq, recv_cq); @@ -2406,7 +2402,8 @@ static int create_dct(struct ib_pd *pd, struct mlx5_ib_qp *qp, return 0; } -static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr) +static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, + enum ib_qp_type *type) { if (attr->qp_type == IB_QPT_DRIVER && !MLX5_CAP_GEN(dev->mdev, dct)) goto out; @@ -2426,11 +2423,12 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr) case MLX5_IB_QPT_REG_UMR: case IB_QPT_DRIVER: case IB_QPT_GSI: - return 0; + break; default: goto out; } + *type = attr->qp_type; return 0; out: @@ -2518,7 +2516,6 @@ static void process_vendor_flag(struct mlx5_ib_dev *dev, int *flags, int flag, } static int process_vendor_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - struct ib_qp_init_attr *attr, struct mlx5_ib_create_qp *ucmd) { struct mlx5_core_dev *mdev = dev->mdev; @@ -2527,17 +2524,20 @@ static int process_vendor_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, switch (flags & (MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI)) { case MLX5_QP_FLAG_TYPE_DCI: - qp->qp_sub_type = MLX5_IB_QPT_DCI; + qp->type = MLX5_IB_QPT_DCI; break; case MLX5_QP_FLAG_TYPE_DCT: - qp->qp_sub_type = MLX5_IB_QPT_DCT; - fallthrough; - default: + qp->type = MLX5_IB_QPT_DCT; break; - } - - if (attr->qp_type == IB_QPT_DRIVER && !qp->qp_sub_type) + default: + if (qp->type != IB_QPT_DRIVER) + break; + /* + * It is IB_QPT_DRIVER and or no subtype or + * wrong subtype were provided. + */ return -EINVAL; + } process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCI, true, qp); process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCT, true, qp); @@ -2546,7 +2546,7 @@ static int process_vendor_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SCATTER_CQE, MLX5_CAP_GEN(mdev, sctr_data_cqe), qp); - if (attr->qp_type == IB_QPT_RAW_PACKET) { + if (qp->type == IB_QPT_RAW_PACKET) { cond = MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre) || MLX5_CAP_ETH(mdev, tunnel_stateless_geneve_rx); @@ -2560,7 +2560,7 @@ static int process_vendor_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, qp); } - if (attr->qp_type == IB_QPT_RC) + if (qp->type == IB_QPT_RC) process_vendor_flag(dev, &flags, MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE, MLX5_CAP_GEN(mdev, qp_packet_based), qp); @@ -2597,12 +2597,12 @@ static void process_create_flag(struct mlx5_ib_dev *dev, int *flags, int flag, static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) { - enum ib_qp_type qp_type = attr->qp_type; + enum ib_qp_type qp_type = qp->type; struct mlx5_core_dev *mdev = dev->mdev; int create_flags = attr->create_flags; bool cond; - if (qp->qp_sub_type == MLX5_IB_QPT_DCT) + if (qp_type == MLX5_IB_QPT_DCT) return (create_flags) ? -EINVAL : 0; if (qp_type == IB_QPT_RAW_PACKET && attr->rwq_ind_tbl) @@ -2656,34 +2656,6 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return (create_flags) ? -EINVAL : 0; } -static int create_driver_qp(struct ib_pd *pd, struct mlx5_ib_qp *qp, - struct ib_qp_init_attr *attr, - struct mlx5_ib_create_qp *ucmd, - struct ib_udata *udata) -{ - struct mlx5_ib_dev *mdev = to_mdev(pd->device); - int ret = -EINVAL; - - switch (qp->qp_sub_type) { - case MLX5_IB_QPT_DCT: - if (!attr->srq || !attr->recv_cq) - goto out; - - ret = create_dct(pd, qp, attr, ucmd, udata); - break; - case MLX5_IB_QPT_DCI: - if (attr->cap.max_recv_wr || attr->cap.max_recv_sge) - goto out; - - ret = create_qp_common(mdev, pd, attr, ucmd, udata, qp); - break; - default: - return -EINVAL; - } - -out: return ret; -} - static size_t process_udata_size(struct ib_qp_init_attr *attr, struct ib_udata *udata) { @@ -2707,6 +2679,30 @@ static int create_raw_qp(struct ib_pd *pd, struct mlx5_ib_qp *qp, return create_qp_common(dev, pd, attr, ucmd, udata, qp); } +static int check_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + struct ib_qp_init_attr *attr) +{ + int ret = 0; + + switch (qp->type) { + case MLX5_IB_QPT_DCT: + ret = (!attr->srq || !attr->recv_cq) ? -EINVAL : 0; + break; + case MLX5_IB_QPT_DCI: + ret = (attr->cap.max_recv_wr || attr->cap.max_recv_sge) ? + -EINVAL : + 0; + break; + default: + break; + } + + if (ret) + mlx5_ib_dbg(dev, "QP type %d has wrong attributes\n", qp->type); + + return ret; +} + struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) @@ -2714,13 +2710,14 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct mlx5_ib_create_qp ucmd = {}; struct mlx5_ib_dev *dev; struct mlx5_ib_qp *qp; + enum ib_qp_type type; u16 xrcdn = 0; int err; dev = pd ? to_mdev(pd->device) : to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device); - err = check_qp_type(dev, init_attr); + err = check_qp_type(dev, init_attr, &type); if (err) { mlx5_ib_dbg(dev, "Unsupported QP type %d\n", init_attr->qp_type); @@ -2750,8 +2747,9 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, if (!qp) return ERR_PTR(-ENOMEM); + qp->type = type; if (udata) { - err = process_vendor_flags(dev, qp, init_attr, &ucmd); + err = process_vendor_flags(dev, qp, &ucmd); if (err) goto free_qp; } @@ -2759,16 +2757,20 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, if (err) goto free_qp; - if (init_attr->qp_type == IB_QPT_XRC_TGT) + if (qp->type == IB_QPT_XRC_TGT) xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; - switch (init_attr->qp_type) { - case IB_QPT_DRIVER: - err = create_driver_qp(pd, qp, init_attr, &ucmd, udata); - break; + err = check_qp_attr(dev, qp, init_attr); + if (err) + goto free_qp; + + switch (qp->type) { case IB_QPT_RAW_PACKET: err = create_raw_qp(pd, qp, init_attr, &ucmd, udata); break; + case MLX5_IB_QPT_DCT: + err = create_dct(pd, qp, init_attr, &ucmd, udata); + break; default: err = create_qp_common(dev, pd, init_attr, (udata) ? &ucmd : NULL, udata, qp); @@ -2821,7 +2823,7 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) if (unlikely(qp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_destroy_qp(qp); - if (mqp->qp_sub_type == MLX5_IB_QPT_DCT) + if (mqp->type == MLX5_IB_QPT_DCT) return mlx5_ib_destroy_dct(mqp); destroy_qp_common(dev, mqp, udata); @@ -3508,8 +3510,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, u16 op; u8 tx_affinity = 0; - mlx5_st = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ? - qp->qp_sub_type : ibqp->qp_type); + mlx5_st = to_mlx5_st(qp->type); if (mlx5_st < 0) return -EINVAL; @@ -3970,11 +3971,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask); - if (ibqp->qp_type == IB_QPT_DRIVER) - qp_type = qp->qp_sub_type; - else - qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ? - IB_QPT_GSI : ibqp->qp_type; + qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ? IB_QPT_GSI : + qp->type; if (qp_type == MLX5_IB_QPT_DCT) return mlx5_ib_modify_dct(ibqp, attr, attr_mask, udata); @@ -5813,7 +5811,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, memset(qp_init_attr, 0, sizeof(*qp_init_attr)); memset(qp_attr, 0, sizeof(*qp_attr)); - if (unlikely(qp->qp_sub_type == MLX5_IB_QPT_DCT)) + if (unlikely(qp->type == MLX5_IB_QPT_DCT)) return mlx5_ib_dct_query_qp(dev, qp, qp_attr, qp_attr_mask, qp_init_attr); From 266424eba6c90ab8b12cf73aae00f1b08c0619cf Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:21 +0300 Subject: [PATCH 180/562] RDMA/mlx5: Promote RSS RAW QP attribute check in higher level Perform check of attributes of RAW PACKET QP in separate function. Link: https://lore.kernel.org/r/20200427154636.381474-22-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0d3f4bafe448..454433a18b97 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1645,9 +1645,6 @@ static int create_rss_raw_qp_tir(struct ib_pd *pd, struct mlx5_ib_qp *qp, size_t required_cmd_sz; u8 lb_flag = 0; - if (init_attr->send_cq) - return -EINVAL; - min_resp_len = offsetof(typeof(resp), bfreg_index) + sizeof(resp.bfreg_index); if (udata->outlen < min_resp_len) return -EINVAL; @@ -2693,6 +2690,9 @@ static int check_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, -EINVAL : 0; break; + case IB_QPT_RAW_PACKET: + ret = (attr->rwq_ind_tbl && attr->send_cq) ? -EINVAL : 0; + break; default: break; } From 5ce0592b0ee56e41f1a4a164ac2f54dbfbbf5e49 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:22 +0300 Subject: [PATCH 181/562] RDMA/mlx5: Combine copy of create QP command in RSS RAW QP Change the create QP flow to handle all copy_from_user() operations in one place. Link: https://lore.kernel.org/r/20200427154636.381474-23-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 156 +++++++++++++++++--------------- 1 file changed, 82 insertions(+), 74 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 454433a18b97..4f69105f082b 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1624,6 +1624,7 @@ static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *q static int create_rss_raw_qp_tir(struct ib_pd *pd, struct mlx5_ib_qp *qp, struct ib_qp_init_attr *init_attr, + struct mlx5_ib_create_qp_rss *ucmd, struct ib_udata *udata) { struct mlx5_ib_ucontext *mucontext = rdma_udata_to_drv_context( @@ -1641,46 +1642,26 @@ static int create_rss_raw_qp_tir(struct ib_pd *pd, struct mlx5_ib_qp *qp, u32 outer_l4; size_t min_resp_len; u32 tdn = mucontext->tdn; - struct mlx5_ib_create_qp_rss ucmd = {}; - size_t required_cmd_sz; u8 lb_flag = 0; min_resp_len = offsetof(typeof(resp), bfreg_index) + sizeof(resp.bfreg_index); if (udata->outlen < min_resp_len) return -EINVAL; - required_cmd_sz = offsetof(typeof(ucmd), flags) + sizeof(ucmd.flags); - if (udata->inlen < required_cmd_sz) { - mlx5_ib_dbg(dev, "invalid inlen\n"); - return -EINVAL; - } - - if (udata->inlen > sizeof(ucmd) && - !ib_is_udata_cleared(udata, sizeof(ucmd), - udata->inlen - sizeof(ucmd))) { - mlx5_ib_dbg(dev, "inlen is not supported\n"); - return -EOPNOTSUPP; - } - - if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) { - mlx5_ib_dbg(dev, "copy failed\n"); - return -EFAULT; - } - - if (ucmd.comp_mask) { + if (ucmd->comp_mask) { mlx5_ib_dbg(dev, "invalid comp mask\n"); return -EOPNOTSUPP; } - if (ucmd.flags & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS | + if (ucmd->flags & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS | MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC | MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)) { mlx5_ib_dbg(dev, "invalid flags\n"); return -EOPNOTSUPP; } - if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER && - !(ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)) { + if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_INNER && + !(ucmd->flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)) { mlx5_ib_dbg(dev, "Tunnel offloads must be set for inner RSS\n"); return -EOPNOTSUPP; } @@ -1717,29 +1698,29 @@ static int create_rss_raw_qp_tir(struct ib_pd *pd, struct mlx5_ib_qp *qp, hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); - if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) + if (ucmd->flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) MLX5_SET(tirc, tirc, tunneled_offload_en, 1); MLX5_SET(tirc, tirc, self_lb_block, lb_flag); - if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER) + if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_INNER) hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner); else hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); - switch (ucmd.rx_hash_function) { + switch (ucmd->rx_hash_function) { case MLX5_RX_HASH_FUNC_TOEPLITZ: { void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key); - if (len != ucmd.rx_key_len) { + if (len != ucmd->rx_key_len) { err = -EINVAL; goto err; } MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); - memcpy(rss_key, ucmd.rx_hash_key, len); + memcpy(rss_key, ucmd->rx_hash_key, len); break; } default: @@ -1747,7 +1728,7 @@ static int create_rss_raw_qp_tir(struct ib_pd *pd, struct mlx5_ib_qp *qp, goto err; } - if (!ucmd.rx_hash_fields_mask) { + if (!ucmd->rx_hash_fields_mask) { /* special case when this TIR serves as steering entry without hashing */ if (!init_attr->rwq_ind_tbl->log_ind_tbl_size) goto create_tir; @@ -1755,29 +1736,31 @@ static int create_rss_raw_qp_tir(struct ib_pd *pd, struct mlx5_ib_qp *qp, goto err; } - if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) && - ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) { + if (((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) && + ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) { err = -EINVAL; goto err; } /* If none of IPV4 & IPV6 SRC/DST was set - this bit field is ignored */ - if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) + if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); - else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6)) + else if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6)) MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); - outer_l4 = ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) << 0 | - ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) << 1 | - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) << 2; + outer_l4 = ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) + << 0 | + ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) + << 1 | + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) << 2; /* Check that only one l4 protocol is set */ if (outer_l4 & (outer_l4 - 1)) { @@ -1786,32 +1769,32 @@ static int create_rss_raw_qp_tir(struct ib_pd *pd, struct mlx5_ib_qp *qp, } /* If none of TCP & UDP SRC/DST was set - this bit field is ignored */ - if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) + if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); - else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) + else if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, MLX5_L4_PROT_TYPE_UDP); - if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6)) + if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6)) selected_fields |= MLX5_HASH_FIELD_SEL_SRC_IP; - if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6)) + if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6)) selected_fields |= MLX5_HASH_FIELD_SEL_DST_IP; - if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP)) + if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP)) selected_fields |= MLX5_HASH_FIELD_SEL_L4_SPORT; - if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) + if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT; - if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) + if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) selected_fields |= MLX5_HASH_FIELD_SEL_IPSEC_SPI; MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields); @@ -2513,11 +2496,16 @@ static void process_vendor_flag(struct mlx5_ib_dev *dev, int *flags, int flag, } static int process_vendor_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - struct mlx5_ib_create_qp *ucmd) + void *ucmd, struct ib_qp_init_attr *attr) { struct mlx5_core_dev *mdev = dev->mdev; - int flags = ucmd->flags; bool cond; + int flags; + + if (attr->rwq_ind_tbl) + flags = ((struct mlx5_ib_create_qp_rss *)ucmd)->flags; + else + flags = ((struct mlx5_ib_create_qp *)ucmd)->flags; switch (flags & (MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI)) { case MLX5_QP_FLAG_TYPE_DCI: @@ -2657,21 +2645,32 @@ static size_t process_udata_size(struct ib_qp_init_attr *attr, struct ib_udata *udata) { size_t ucmd = sizeof(struct mlx5_ib_create_qp); + size_t inlen = udata->inlen; if (attr->qp_type == IB_QPT_DRIVER) - return (udata->inlen < ucmd) ? 0 : ucmd; + return (inlen < ucmd) ? 0 : ucmd; - return ucmd; + if (!attr->rwq_ind_tbl) + return ucmd; + + if (inlen < offsetofend(struct mlx5_ib_create_qp_rss, flags)) + return 0; + + ucmd = sizeof(struct mlx5_ib_create_qp_rss); + if (inlen > ucmd && !ib_is_udata_cleared(udata, ucmd, inlen - ucmd)) + return 0; + + return min(ucmd, inlen); } static int create_raw_qp(struct ib_pd *pd, struct mlx5_ib_qp *qp, - struct ib_qp_init_attr *attr, - struct mlx5_ib_create_qp *ucmd, struct ib_udata *udata) + struct ib_qp_init_attr *attr, void *ucmd, + struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(pd->device); if (attr->rwq_ind_tbl) - return create_rss_raw_qp_tir(pd, qp, attr, udata); + return create_rss_raw_qp_tir(pd, qp, attr, ucmd, udata); return create_qp_common(dev, pd, attr, ucmd, udata, qp); } @@ -2707,10 +2706,10 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { - struct mlx5_ib_create_qp ucmd = {}; struct mlx5_ib_dev *dev; struct mlx5_ib_qp *qp; enum ib_qp_type type; + void *ucmd = NULL; u16 xrcdn = 0; int err; @@ -2731,25 +2730,31 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, if (init_attr->qp_type == IB_QPT_GSI) return mlx5_ib_gsi_create_qp(pd, init_attr); - if (udata && !init_attr->rwq_ind_tbl) { + if (udata) { size_t inlen = process_udata_size(init_attr, udata); if (!inlen) return ERR_PTR(-EINVAL); - err = ib_copy_from_udata(&ucmd, udata, inlen); + ucmd = kzalloc(inlen, GFP_KERNEL); + if (!ucmd) + return ERR_PTR(-ENOMEM); + + err = ib_copy_from_udata(ucmd, udata, inlen); if (err) - return ERR_PTR(err); + goto free_ucmd; } qp = kzalloc(sizeof(*qp), GFP_KERNEL); - if (!qp) - return ERR_PTR(-ENOMEM); + if (!qp) { + err = -ENOMEM; + goto free_ucmd; + } qp->type = type; if (udata) { - err = process_vendor_flags(dev, qp, &ucmd); + err = process_vendor_flags(dev, qp, ucmd, init_attr); if (err) goto free_qp; } @@ -2766,20 +2771,21 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, switch (qp->type) { case IB_QPT_RAW_PACKET: - err = create_raw_qp(pd, qp, init_attr, &ucmd, udata); + err = create_raw_qp(pd, qp, init_attr, ucmd, udata); break; case MLX5_IB_QPT_DCT: - err = create_dct(pd, qp, init_attr, &ucmd, udata); + err = create_dct(pd, qp, init_attr, ucmd, udata); break; default: - err = create_qp_common(dev, pd, init_attr, - (udata) ? &ucmd : NULL, udata, qp); + err = create_qp_common(dev, pd, init_attr, ucmd, udata, qp); } if (err) { mlx5_ib_dbg(dev, "create_qp_common failed\n"); goto free_qp; } + kfree(ucmd); + if (is_qp0(init_attr->qp_type)) qp->ibqp.qp_num = 0; else if (is_qp1(init_attr->qp_type)) @@ -2793,6 +2799,8 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, free_qp: kfree(qp); +free_ucmd: + kfree(ucmd); return ERR_PTR(err); } From 76883a6cc1459b2aa1e96e1eaa19aa37221c2406 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:23 +0300 Subject: [PATCH 182/562] RDMA/mlx5: Remove second user copy in create_user_qp Combine copy_from_user() from create_user_qp() and general code. Link: https://lore.kernel.org/r/20200427154636.381474-24-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 34 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 4f69105f082b..495f03905fbf 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -914,13 +914,12 @@ static int adjust_bfregn(struct mlx5_ib_dev *dev, static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_qp *qp, struct ib_udata *udata, - struct ib_qp_init_attr *attr, - u32 **in, + struct ib_qp_init_attr *attr, u32 **in, struct mlx5_ib_create_qp_resp *resp, int *inlen, - struct mlx5_ib_qp_base *base) + struct mlx5_ib_qp_base *base, + struct mlx5_ib_create_qp *ucmd) { struct mlx5_ib_ucontext *context; - struct mlx5_ib_create_qp ucmd; struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer; int page_shift = 0; int uar_index = 0; @@ -934,24 +933,18 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, u16 uid; u32 uar_flags; - err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); - if (err) { - mlx5_ib_dbg(dev, "copy failed\n"); - return err; - } - context = rdma_udata_to_drv_context(udata, struct mlx5_ib_ucontext, ibucontext); - uar_flags = ucmd.flags & (MLX5_QP_FLAG_UAR_PAGE_INDEX | - MLX5_QP_FLAG_BFREG_INDEX); + uar_flags = qp->flags_en & + (MLX5_QP_FLAG_UAR_PAGE_INDEX | MLX5_QP_FLAG_BFREG_INDEX); switch (uar_flags) { case MLX5_QP_FLAG_UAR_PAGE_INDEX: - uar_index = ucmd.bfreg_index; + uar_index = ucmd->bfreg_index; bfregn = MLX5_IB_INVALID_BFREG; break; case MLX5_QP_FLAG_BFREG_INDEX: uar_index = bfregn_to_uar_index(dev, &context->bfregi, - ucmd.bfreg_index, true); + ucmd->bfreg_index, true); if (uar_index < 0) return uar_index; bfregn = MLX5_IB_INVALID_BFREG; @@ -976,12 +969,12 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; - err = set_user_buf_size(dev, qp, &ucmd, base, attr); + err = set_user_buf_size(dev, qp, ucmd, base, attr); if (err) goto err_bfreg; - if (ucmd.buf_addr && ubuffer->buf_size) { - ubuffer->buf_addr = ucmd.buf_addr; + if (ucmd->buf_addr && ubuffer->buf_size) { + ubuffer->buf_addr = ucmd->buf_addr; err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size, &ubuffer->umem, &npages, &page_shift, &ncont, &offset); @@ -1018,7 +1011,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, resp->bfreg_index = MLX5_IB_INVALID_BFREG; qp->bfregn = bfregn; - err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &qp->db); + err = mlx5_ib_db_map_user(context, udata, ucmd->db_addr, &qp->db); if (err) { mlx5_ib_dbg(dev, "map failed\n"); goto err_free; @@ -1991,7 +1984,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, return -EINVAL; } err = create_user_qp(dev, pd, qp, udata, init_attr, &in, - &resp, &inlen, base); + &resp, &inlen, base, ucmd); if (err) mlx5_ib_dbg(dev, "err %d\n", err); } else { @@ -2550,6 +2543,9 @@ static int process_vendor_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE, MLX5_CAP_GEN(mdev, qp_packet_based), qp); + process_vendor_flag(dev, &flags, MLX5_QP_FLAG_BFREG_INDEX, true, qp); + process_vendor_flag(dev, &flags, MLX5_QP_FLAG_UAR_PAGE_INDEX, true, qp); + if (flags) mlx5_ib_dbg(dev, "udata has unsupported flags 0x%X\n", flags); From 03c4077b284056fdb144f84aaa0ac0c80023f597 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:24 +0300 Subject: [PATCH 183/562] RDMA/mlx5: Rely on existence of udata to separate kernel/user flows Instead of keeping special field to separate kernel/user create/destroy flows, rely on existence of udata pointer. All allocation flows are using kzalloc() and leave uninitialized pointers as NULL which makes MLX5_QP_EMPTY and MLX5_QP_KERNEL flows to be the same. Link: https://lore.kernel.org/r/20200427154636.381474-25-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 14 -------------- drivers/infiniband/hw/mlx5/qp.c | 23 ++++++++++------------- 2 files changed, 10 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 82ea01a211dd..df375cb4efbb 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -337,7 +337,6 @@ struct mlx5_ib_rwq { struct ib_umem *umem; size_t buf_size; unsigned int page_shift; - int create_type; struct mlx5_db db; u32 user_index; u32 wqe_count; @@ -346,17 +345,6 @@ struct mlx5_ib_rwq { u32 create_flags; /* Use enum mlx5_ib_wq_flags */ }; -enum { - MLX5_QP_USER, - MLX5_QP_KERNEL, - MLX5_QP_EMPTY -}; - -enum { - MLX5_WQ_USER, - MLX5_WQ_KERNEL -}; - struct mlx5_ib_rwq_ind_table { struct ib_rwq_ind_table ib_rwq_ind_tbl; u32 rqtn; @@ -457,8 +445,6 @@ struct mlx5_ib_qp { */ int bfregn; - int create_type; - struct list_head qps_list; struct list_head cq_recv_list; struct list_head cq_send_list; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 495f03905fbf..74f09cdb4a33 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -897,7 +897,6 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto err_umem; } - rwq->create_type = MLX5_WQ_USER; return 0; err_umem: @@ -1022,7 +1021,6 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, mlx5_ib_dbg(dev, "copy failed\n"); goto err_unmap; } - qp->create_type = MLX5_QP_USER; return 0; @@ -1187,7 +1185,6 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, err = -ENOMEM; goto err_wrid; } - qp->create_type = MLX5_QP_KERNEL; return 0; @@ -1214,8 +1211,10 @@ static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) kvfree(qp->sq.wrid); kvfree(qp->sq.wr_data); kvfree(qp->rq.wrid); - mlx5_db_free(dev->mdev, &qp->db); - mlx5_frag_buf_free(dev->mdev, &qp->buf); + if (qp->db.db) + mlx5_db_free(dev->mdev, &qp->db); + if (qp->buf.frags) + mlx5_frag_buf_free(dev->mdev, &qp->buf); } static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) @@ -2000,8 +1999,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; - - qp->create_type = MLX5_QP_EMPTY; } if (is_sqp(init_attr->qp_type)) @@ -2155,9 +2152,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, return 0; err_create: - if (qp->create_type == MLX5_QP_USER) + if (udata) destroy_qp_user(dev, pd, qp, base, udata); - else if (qp->create_type == MLX5_QP_KERNEL) + else destroy_qp_kernel(dev, qp); err: @@ -2311,7 +2308,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (recv_cq) list_del(&qp->cq_recv_list); - if (qp->create_type == MLX5_QP_KERNEL) { + if (!udata) { __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); if (send_cq != recv_cq) @@ -2331,10 +2328,10 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, base->mqp.qpn); } - if (qp->create_type == MLX5_QP_KERNEL) - destroy_qp_kernel(dev, qp); - else if (qp->create_type == MLX5_QP_USER) + if (udata) destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base, udata); + else + destroy_qp_kernel(dev, qp); } static int create_dct(struct ib_pd *pd, struct mlx5_ib_qp *qp, From 0ce300b15aad7d3940d0625badcfad353041f5a7 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:25 +0300 Subject: [PATCH 184/562] RDMA/mlx5: Delete impossible inlen check The inlen is set to be above zero in all flows before and can't be negative at this stage. Link: https://lore.kernel.org/r/20200427154636.381474-26-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 74f09cdb4a33..5a43128d651b 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2107,11 +2107,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING; } - if (inlen < 0) { - err = -EINVAL; - goto err; - } - if (init_attr->qp_type == IB_QPT_RAW_PACKET || qp->flags & IB_QP_CREATE_SOURCE_QPN) { qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd->sq_buf_addr; @@ -2156,8 +2151,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, destroy_qp_user(dev, pd, qp, base, udata); else destroy_qp_kernel(dev, qp); - -err: kvfree(in); return err; } From 21aad80b17e6d17adf99bf17482a5314bcb0aebb Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:26 +0300 Subject: [PATCH 185/562] RDMA/mlx5: Globally parse DEVX UID Remove duplication in parsing of DEVX UID. Link: https://lore.kernel.org/r/20200427154636.381474-27-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 51 +++++++++++++++++---------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 5a43128d651b..b2174e0817f5 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1916,18 +1916,16 @@ static int get_atomic_mode(struct mlx5_ib_dev *dev, static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct mlx5_ib_create_qp *ucmd, - struct ib_udata *udata, struct mlx5_ib_qp *qp) + struct ib_udata *udata, struct mlx5_ib_qp *qp, + u32 uidx) { struct mlx5_ib_resources *devr = &dev->devr; int inlen = MLX5_ST_SZ_BYTES(create_qp_in); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_create_qp_resp resp = {}; - struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( - udata, struct mlx5_ib_ucontext, ibucontext); struct mlx5_ib_cq *send_cq; struct mlx5_ib_cq *recv_cq; unsigned long flags; - u32 uidx = MLX5_IB_DEFAULT_UIDX; struct mlx5_ib_qp_base *base; int mlx5_st; void *qpc; @@ -1945,12 +1943,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; - if (udata) { - err = get_qp_user_index(ucontext, ucmd, udata->inlen, &uidx); - if (err) - return err; - } - if (qp->flags & IB_QP_CREATE_SOURCE_QPN) qp->underlay_qpn = init_attr->source_qpn; @@ -2329,18 +2321,10 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, static int create_dct(struct ib_pd *pd, struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr, - struct mlx5_ib_create_qp *ucmd, struct ib_udata *udata) + struct mlx5_ib_create_qp *ucmd, u32 uidx) { - struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( - udata, struct mlx5_ib_ucontext, ibucontext); - int err = 0; - u32 uidx = MLX5_IB_DEFAULT_UIDX; void *dctc; - err = get_qp_user_index(ucontext, ucmd, sizeof(*ucmd), &uidx); - if (err) - return err; - qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL); if (!qp->dct.in) return -ENOMEM; @@ -2651,14 +2635,14 @@ static size_t process_udata_size(struct ib_qp_init_attr *attr, static int create_raw_qp(struct ib_pd *pd, struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr, void *ucmd, - struct ib_udata *udata) + struct ib_udata *udata, u32 uidx) { struct mlx5_ib_dev *dev = to_mdev(pd->device); if (attr->rwq_ind_tbl) return create_rss_raw_qp_tir(pd, qp, attr, ucmd, udata); - return create_qp_common(dev, pd, attr, ucmd, udata, qp); + return create_qp_common(dev, pd, attr, ucmd, udata, qp, uidx); } static int check_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, @@ -2688,10 +2672,24 @@ static int check_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return ret; } +static int get_qp_uidx(struct mlx5_ib_qp *qp, struct ib_udata *udata, + struct mlx5_ib_create_qp *ucmd, + struct ib_qp_init_attr *attr, u32 *uidx) +{ + struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); + + if (attr->rwq_ind_tbl) + return 0; + + return get_qp_user_index(ucontext, ucmd, sizeof(*ucmd), uidx); +} + struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { + u32 uidx = MLX5_IB_DEFAULT_UIDX; struct mlx5_ib_dev *dev; struct mlx5_ib_qp *qp; enum ib_qp_type type; @@ -2743,6 +2741,10 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, err = process_vendor_flags(dev, qp, ucmd, init_attr); if (err) goto free_qp; + + err = get_qp_uidx(qp, udata, ucmd, init_attr, &uidx); + if (err) + goto free_qp; } err = process_create_flags(dev, qp, init_attr); if (err) @@ -2757,13 +2759,14 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, switch (qp->type) { case IB_QPT_RAW_PACKET: - err = create_raw_qp(pd, qp, init_attr, ucmd, udata); + err = create_raw_qp(pd, qp, init_attr, ucmd, udata, uidx); break; case MLX5_IB_QPT_DCT: - err = create_dct(pd, qp, init_attr, ucmd, udata); + err = create_dct(pd, qp, init_attr, ucmd, uidx); break; default: - err = create_qp_common(dev, pd, init_attr, ucmd, udata, qp); + err = create_qp_common(dev, pd, init_attr, ucmd, udata, qp, + uidx); } if (err) { mlx5_ib_dbg(dev, "create_qp_common failed\n"); From 04bcc1c2d0d7bfa0bffa5853d9a127fb4f4cd943 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:27 +0300 Subject: [PATCH 186/562] RDMA/mlx5: Separate XRC_TGT QP creation from common flow XRC_TGT QP doesn't fail into kernel or user flow separation. It is initiated by the user, but is created through in-kernel verbs flow and doesn't have PD and udata in similar way to kernel QPs. So let's separate creation of that QP type from the common flow. Link: https://lore.kernel.org/r/20200427154636.381474-28-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 160 +++++++++++++++++++++----------- 1 file changed, 107 insertions(+), 53 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index b2174e0817f5..8890c172f7e5 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -991,8 +991,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto err_umem; } - uid = (attr->qp_type != IB_QPT_XRC_TGT && - attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0; + uid = (attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0; MLX5_SET(create_qp_in, *in, uid, uid); pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); if (ubuffer->umem) @@ -1913,6 +1912,81 @@ static int get_atomic_mode(struct mlx5_ib_dev *dev, return atomic_mode; } +static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, + struct ib_qp_init_attr *attr, + struct mlx5_ib_qp *qp, struct ib_udata *udata, + u32 uidx) +{ + struct mlx5_ib_resources *devr = &dev->devr; + int inlen = MLX5_ST_SZ_BYTES(create_qp_in); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_ib_qp_base *base; + unsigned long flags; + void *qpc; + u32 *in; + int err; + + mutex_init(&qp->mutex); + + if (attr->sq_sig_type == IB_SIGNAL_ALL_WR) + qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_XRC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, to_mpd(devr->p0)->pdn); + + if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) + MLX5_SET(qpc, qpc, block_lb_mc, 1); + if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL) + MLX5_SET(qpc, qpc, cd_master, 1); + if (qp->flags & IB_QP_CREATE_MANAGED_SEND) + MLX5_SET(qpc, qpc, cd_slave_send, 1); + if (qp->flags & IB_QP_CREATE_MANAGED_RECV) + MLX5_SET(qpc, qpc, cd_slave_receive, 1); + + MLX5_SET(qpc, qpc, rq_type, MLX5_SRQ_RQ); + MLX5_SET(qpc, qpc, no_sq, 1); + MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn); + MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn); + MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn); + MLX5_SET(qpc, qpc, xrcd, to_mxrcd(attr->xrcd)->xrcdn); + MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma); + + /* 0xffffff means we ask to work with cqe version 0 */ + if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) + MLX5_SET(qpc, qpc, user_index, uidx); + + if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) { + MLX5_SET(qpc, qpc, end_padding_mode, + MLX5_WQ_END_PAD_MODE_ALIGN); + /* Special case to clean flag */ + qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING; + } + + base = &qp->trans_qp.base; + err = mlx5_core_create_qp(dev, &base->mqp, in, inlen); + kvfree(in); + if (err) { + destroy_qp_user(dev, NULL, qp, base, udata); + return err; + } + + base->container_mibqp = qp; + base->mqp.event = mlx5_ib_qp_event; + + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + list_add_tail(&qp->qps_list, &dev->qp_list); + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); + + return 0; +} + static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct mlx5_ib_create_qp *ucmd, @@ -1958,40 +2032,30 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, return err; } - if (pd) { - if (udata) { - __u32 max_wqes = - 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); - mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", - ucmd->sq_wqe_count); - if (ucmd->rq_wqe_shift != qp->rq.wqe_shift || - ucmd->rq_wqe_count != qp->rq.wqe_cnt) { - mlx5_ib_dbg(dev, "invalid rq params\n"); - return -EINVAL; - } - if (ucmd->sq_wqe_count > max_wqes) { - mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n", - ucmd->sq_wqe_count, max_wqes); - return -EINVAL; - } - err = create_user_qp(dev, pd, qp, udata, init_attr, &in, - &resp, &inlen, base, ucmd); - if (err) - mlx5_ib_dbg(dev, "err %d\n", err); - } else { - err = create_kernel_qp(dev, init_attr, qp, &in, &inlen, - base); - if (err) - mlx5_ib_dbg(dev, "err %d\n", err); - } + if (udata) { + __u32 max_wqes = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); - if (err) - return err; - } else { - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; - } + mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", + ucmd->sq_wqe_count); + if (ucmd->rq_wqe_shift != qp->rq.wqe_shift || + ucmd->rq_wqe_count != qp->rq.wqe_cnt) { + mlx5_ib_dbg(dev, "invalid rq params\n"); + return -EINVAL; + } + if (ucmd->sq_wqe_count > max_wqes) { + mlx5_ib_dbg( + dev, + "requested sq_wqe_count (%d) > max allowed (%d)\n", + ucmd->sq_wqe_count, max_wqes); + return -EINVAL; + } + err = create_user_qp(dev, pd, qp, udata, init_attr, &in, &resp, + &inlen, base, ucmd); + } else + err = create_kernel_qp(dev, init_attr, qp, &in, &inlen, base); + + if (err) + return err; if (is_sqp(init_attr->qp_type)) qp->port = init_attr->port_num; @@ -2054,12 +2118,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, /* Set default resources */ switch (init_attr->qp_type) { - case IB_QPT_XRC_TGT: - MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn); - MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn); - MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn); - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(init_attr->xrcd)->xrcdn); - break; case IB_QPT_XRC_INI: MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn); MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); @@ -2105,16 +2163,12 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, raw_packet_qp_copy_info(qp, &qp->raw_packet_qp); err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata, &resp); - } else { + } else err = mlx5_core_create_qp(dev, &base->mqp, in, inlen); - } - - if (err) { - mlx5_ib_dbg(dev, "create qp failed\n"); - goto err_create; - } kvfree(in); + if (err) + goto err_create; base->container_mibqp = qp; base->mqp.event = mlx5_ib_qp_event; @@ -2143,7 +2197,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, destroy_qp_user(dev, pd, qp, base, udata); else destroy_qp_kernel(dev, qp); - kvfree(in); return err; } @@ -2750,9 +2803,6 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, if (err) goto free_qp; - if (qp->type == IB_QPT_XRC_TGT) - xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; - err = check_qp_attr(dev, qp, init_attr); if (err) goto free_qp; @@ -2764,12 +2814,16 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, case MLX5_IB_QPT_DCT: err = create_dct(pd, qp, init_attr, ucmd, uidx); break; + case IB_QPT_XRC_TGT: + xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; + err = create_xrc_tgt_qp(dev, init_attr, qp, udata, uidx); + break; default: err = create_qp_common(dev, pd, init_attr, ucmd, udata, qp, uidx); } if (err) { - mlx5_ib_dbg(dev, "create_qp_common failed\n"); + mlx5_ib_dbg(dev, "create_qp failed %d\n", err); goto free_qp; } From 98fc1126c4161450f215254409e5539314b54a04 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:28 +0300 Subject: [PATCH 187/562] RDMA/mlx5: Separate to user/kernel create QP flows The kernel and user create QP flows have very little common code, separate them to simplify the future work of creating per-type create_*_qp() functions. Link: https://lore.kernel.org/r/20200427154636.381474-29-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 205 ++++++++++++++++++++++++-------- 1 file changed, 156 insertions(+), 49 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8890c172f7e5..d5061001217e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -911,12 +911,12 @@ static int adjust_bfregn(struct mlx5_ib_dev *dev, bfregn % MLX5_NON_FP_BFREGS_PER_UAR; } -static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, - struct mlx5_ib_qp *qp, struct ib_udata *udata, - struct ib_qp_init_attr *attr, u32 **in, - struct mlx5_ib_create_qp_resp *resp, int *inlen, - struct mlx5_ib_qp_base *base, - struct mlx5_ib_create_qp *ucmd) +static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct mlx5_ib_qp *qp, struct ib_udata *udata, + struct ib_qp_init_attr *attr, u32 **in, + struct mlx5_ib_create_qp_resp *resp, int *inlen, + struct mlx5_ib_qp_base *base, + struct mlx5_ib_create_qp *ucmd) { struct mlx5_ib_ucontext *context; struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer; @@ -1083,11 +1083,10 @@ static void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx) return fragment_end + MLX5_SEND_WQE_BB; } -static int create_kernel_qp(struct mlx5_ib_dev *dev, - struct ib_qp_init_attr *init_attr, - struct mlx5_ib_qp *qp, - u32 **in, int *inlen, - struct mlx5_ib_qp_base *base) +static int _create_kernel_qp(struct mlx5_ib_dev *dev, + struct ib_qp_init_attr *init_attr, + struct mlx5_ib_qp *qp, u32 **in, int *inlen, + struct mlx5_ib_qp_base *base) { int uar_index; void *qpc; @@ -1987,11 +1986,11 @@ static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, return 0; } -static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct mlx5_ib_create_qp *ucmd, - struct ib_udata *udata, struct mlx5_ib_qp *qp, - u32 uidx) +static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct mlx5_ib_create_qp *ucmd, + struct ib_udata *udata, struct mlx5_ib_qp *qp, + u32 uidx) { struct mlx5_ib_resources *devr = &dev->devr; int inlen = MLX5_ST_SZ_BYTES(create_qp_in); @@ -2032,28 +2031,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, return err; } - if (udata) { - __u32 max_wqes = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); + if (ucmd->rq_wqe_shift != qp->rq.wqe_shift || + ucmd->rq_wqe_count != qp->rq.wqe_cnt) + return -EINVAL; - mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", - ucmd->sq_wqe_count); - if (ucmd->rq_wqe_shift != qp->rq.wqe_shift || - ucmd->rq_wqe_count != qp->rq.wqe_cnt) { - mlx5_ib_dbg(dev, "invalid rq params\n"); - return -EINVAL; - } - if (ucmd->sq_wqe_count > max_wqes) { - mlx5_ib_dbg( - dev, - "requested sq_wqe_count (%d) > max allowed (%d)\n", - ucmd->sq_wqe_count, max_wqes); - return -EINVAL; - } - err = create_user_qp(dev, pd, qp, udata, init_attr, &in, &resp, - &inlen, base, ucmd); - } else - err = create_kernel_qp(dev, init_attr, qp, &in, &inlen, base); + if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz))) + return -EINVAL; + err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, &resp, &inlen, + base, ucmd); if (err) return err; @@ -2064,12 +2050,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, st, mlx5_st); MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); - - if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR) - MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn); - else - MLX5_SET(qpc, qpc, latency_sensitive, 1); - + MLX5_SET(qpc, qpc, pd, to_mpd(pd)->pdn); if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) MLX5_SET(qpc, qpc, wq_signature, 1); @@ -2145,10 +2126,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) MLX5_SET(qpc, qpc, user_index, uidx); - /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */ - if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) - MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1); - if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING && init_attr->qp_type != IB_QPT_RAW_PACKET) { MLX5_SET(qpc, qpc, end_padding_mode, @@ -2200,6 +2177,133 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, return err; } +static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct ib_qp_init_attr *attr, struct mlx5_ib_qp *qp, + u32 uidx) +{ + struct mlx5_ib_resources *devr = &dev->devr; + int inlen = MLX5_ST_SZ_BYTES(create_qp_in); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_ib_cq *send_cq; + struct mlx5_ib_cq *recv_cq; + unsigned long flags; + struct mlx5_ib_qp_base *base; + int mlx5_st; + void *qpc; + u32 *in; + int err; + + mutex_init(&qp->mutex); + spin_lock_init(&qp->sq.lock); + spin_lock_init(&qp->rq.lock); + + mlx5_st = to_mlx5_st(qp->type); + if (mlx5_st < 0) + return -EINVAL; + + if (attr->sq_sig_type == IB_SIGNAL_ALL_WR) + qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; + + base = &qp->trans_qp.base; + + qp->has_rq = qp_has_rq(attr); + err = set_rq_size(dev, &attr->cap, qp->has_rq, qp, NULL); + if (err) { + mlx5_ib_dbg(dev, "err %d\n", err); + return err; + } + + err = _create_kernel_qp(dev, attr, qp, &in, &inlen, base); + if (err) + return err; + + if (is_sqp(attr->qp_type)) + qp->port = attr->port_num; + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + + MLX5_SET(qpc, qpc, st, mlx5_st); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + + if (attr->qp_type != MLX5_IB_QPT_REG_UMR) + MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn); + else + MLX5_SET(qpc, qpc, latency_sensitive, 1); + + + if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) + MLX5_SET(qpc, qpc, block_lb_mc, 1); + + if (qp->rq.wqe_cnt) { + MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4); + MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt)); + } + + MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, attr)); + + if (qp->sq.wqe_cnt) + MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt)); + else + MLX5_SET(qpc, qpc, no_sq, 1); + + if (attr->srq) { + MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn); + MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, + to_msrq(attr->srq)->msrq.srqn); + } else { + MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); + MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, + to_msrq(devr->s1)->msrq.srqn); + } + + if (attr->send_cq) + MLX5_SET(qpc, qpc, cqn_snd, to_mcq(attr->send_cq)->mcq.cqn); + + if (attr->recv_cq) + MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(attr->recv_cq)->mcq.cqn); + + MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma); + + /* 0xffffff means we ask to work with cqe version 0 */ + if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) + MLX5_SET(qpc, qpc, user_index, uidx); + + /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */ + if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) + MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1); + + err = mlx5_core_create_qp(dev, &base->mqp, in, inlen); + kvfree(in); + if (err) + goto err_create; + + base->container_mibqp = qp; + base->mqp.event = mlx5_ib_qp_event; + + get_cqs(qp->type, attr->send_cq, attr->recv_cq, + &send_cq, &recv_cq); + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + mlx5_ib_lock_cqs(send_cq, recv_cq); + /* Maintain device to QPs access, needed for further handling via reset + * flow + */ + list_add_tail(&qp->qps_list, &dev->qp_list); + /* Maintain CQ to QPs access, needed for further handling via reset flow + */ + if (send_cq) + list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp); + if (recv_cq) + list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp); + mlx5_ib_unlock_cqs(send_cq, recv_cq); + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); + + return 0; + +err_create: + destroy_qp_kernel(dev, qp); + return err; +} + static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq) __acquires(&send_cq->lock) __acquires(&recv_cq->lock) { @@ -2695,7 +2799,7 @@ static int create_raw_qp(struct ib_pd *pd, struct mlx5_ib_qp *qp, if (attr->rwq_ind_tbl) return create_rss_raw_qp_tir(pd, qp, attr, ucmd, udata); - return create_qp_common(dev, pd, attr, ucmd, udata, qp, uidx); + return create_user_qp(dev, pd, attr, ucmd, udata, qp, uidx); } static int check_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, @@ -2819,8 +2923,11 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, err = create_xrc_tgt_qp(dev, init_attr, qp, udata, uidx); break; default: - err = create_qp_common(dev, pd, init_attr, ucmd, udata, qp, - uidx); + if (udata) + err = create_user_qp(dev, pd, init_attr, ucmd, udata, + qp, uidx); + else + err = create_kernel_qp(dev, pd, init_attr, qp, uidx); } if (err) { mlx5_ib_dbg(dev, "create_qp failed %d\n", err); From 747c519cdbe4a3f6a616d50c19bcb97413abe384 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:29 +0300 Subject: [PATCH 188/562] RDMA/mlx5: Reduce amount of duplication in QP destroy Delete both PD argument and checks if udata was provided, in favour of unified destroy QP functions. Link: https://lore.kernel.org/r/20200427154636.381474-30-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 70 +++++++++++++++------------------ 1 file changed, 31 insertions(+), 39 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d5061001217e..6b390b0e43af 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1038,25 +1038,36 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, return err; } -static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd, - struct mlx5_ib_qp *qp, struct mlx5_ib_qp_base *base, - struct ib_udata *udata) +static void destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + struct mlx5_ib_qp_base *base, struct ib_udata *udata) { - struct mlx5_ib_ucontext *context = - rdma_udata_to_drv_context( - udata, - struct mlx5_ib_ucontext, - ibucontext); + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); - mlx5_ib_db_unmap_user(context, &qp->db); - ib_umem_release(base->ubuffer.umem); + if (udata) { + /* User QP */ + mlx5_ib_db_unmap_user(context, &qp->db); + ib_umem_release(base->ubuffer.umem); - /* - * Free only the BFREGs which are handled by the kernel. - * BFREGs of UARs allocated dynamically are handled by user. - */ - if (qp->bfregn != MLX5_IB_INVALID_BFREG) - mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn); + /* + * Free only the BFREGs which are handled by the kernel. + * BFREGs of UARs allocated dynamically are handled by user. + */ + if (qp->bfregn != MLX5_IB_INVALID_BFREG) + mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn); + return; + } + + /* Kernel QP */ + kvfree(qp->sq.wqe_head); + kvfree(qp->sq.w_list); + kvfree(qp->sq.wrid); + kvfree(qp->sq.wr_data); + kvfree(qp->rq.wrid); + if (qp->db.db) + mlx5_db_free(dev->mdev, &qp->db); + if (qp->buf.frags) + mlx5_frag_buf_free(dev->mdev, &qp->buf); } /* get_sq_edge - Get the next nearby edge. @@ -1202,19 +1213,6 @@ static int _create_kernel_qp(struct mlx5_ib_dev *dev, return err; } -static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) -{ - kvfree(qp->sq.wqe_head); - kvfree(qp->sq.w_list); - kvfree(qp->sq.wrid); - kvfree(qp->sq.wr_data); - kvfree(qp->rq.wrid); - if (qp->db.db) - mlx5_db_free(dev->mdev, &qp->db); - if (qp->buf.frags) - mlx5_frag_buf_free(dev->mdev, &qp->buf); -} - static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) { if (attr->srq || (qp->type == IB_QPT_XRC_TGT) || @@ -1972,7 +1970,7 @@ static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, err = mlx5_core_create_qp(dev, &base->mqp, in, inlen); kvfree(in); if (err) { - destroy_qp_user(dev, NULL, qp, base, udata); + destroy_qp(dev, qp, base, udata); return err; } @@ -2170,10 +2168,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, return 0; err_create: - if (udata) - destroy_qp_user(dev, pd, qp, base, udata); - else - destroy_qp_kernel(dev, qp); + destroy_qp(dev, qp, base, udata); return err; } @@ -2300,7 +2295,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, return 0; err_create: - destroy_qp_kernel(dev, qp); + destroy_qp(dev, qp, base, NULL); return err; } @@ -2470,10 +2465,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, base->mqp.qpn); } - if (udata) - destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base, udata); - else - destroy_qp_kernel(dev, qp); + destroy_qp(dev, qp, base, udata); } static int create_dct(struct ib_pd *pd, struct mlx5_ib_qp *qp, From f78d358cec9088ed77b5129c44f858cdfdb1e8c9 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:30 +0300 Subject: [PATCH 189/562] RDMA/mlx5: Group all create QP parameters to simplify in-kernel interfaces The amount of parameters passed in and out between internal mlx5 create QP functions is too large to easily follow the flow. Change it by grouping all create QP parameter into one structure. Link: https://lore.kernel.org/r/20200427154636.381474-31-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 152 +++++++++++++++++--------------- 1 file changed, 83 insertions(+), 69 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 6b390b0e43af..3807e1687cb2 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1610,14 +1610,24 @@ static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *q to_mpd(qp->ibqp.pd)->uid); } -static int create_rss_raw_qp_tir(struct ib_pd *pd, struct mlx5_ib_qp *qp, - struct ib_qp_init_attr *init_attr, - struct mlx5_ib_create_qp_rss *ucmd, - struct ib_udata *udata) +struct mlx5_create_qp_params { + struct ib_udata *udata; + size_t inlen; + void *ucmd; + u8 is_rss_raw : 1; + struct ib_qp_init_attr *attr; + u32 uidx; +}; + +static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct mlx5_ib_qp *qp, + struct mlx5_create_qp_params *params) { + struct ib_qp_init_attr *init_attr = params->attr; + struct mlx5_ib_create_qp_rss *ucmd = params->ucmd; + struct ib_udata *udata = params->udata; struct mlx5_ib_ucontext *mucontext = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); - struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_create_qp_resp resp = {}; int inlen; int outlen; @@ -1632,7 +1642,8 @@ static int create_rss_raw_qp_tir(struct ib_pd *pd, struct mlx5_ib_qp *qp, u32 tdn = mucontext->tdn; u8 lb_flag = 0; - min_resp_len = offsetof(typeof(resp), bfreg_index) + sizeof(resp.bfreg_index); + min_resp_len = + offsetof(typeof(resp), bfreg_index) + sizeof(resp.bfreg_index); if (udata->outlen < min_resp_len) return -EINVAL; @@ -1909,11 +1920,12 @@ static int get_atomic_mode(struct mlx5_ib_dev *dev, return atomic_mode; } -static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, - struct ib_qp_init_attr *attr, - struct mlx5_ib_qp *qp, struct ib_udata *udata, - u32 uidx) +static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + struct mlx5_create_qp_params *params) { + struct ib_qp_init_attr *attr = params->attr; + struct ib_udata *udata = params->udata; + u32 uidx = params->uidx; struct mlx5_ib_resources *devr = &dev->devr; int inlen = MLX5_ST_SZ_BYTES(create_qp_in); struct mlx5_core_dev *mdev = dev->mdev; @@ -1985,11 +1997,13 @@ static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, } static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct mlx5_ib_create_qp *ucmd, - struct ib_udata *udata, struct mlx5_ib_qp *qp, - u32 uidx) + struct mlx5_ib_qp *qp, + struct mlx5_create_qp_params *params) { + struct ib_qp_init_attr *init_attr = params->attr; + struct mlx5_ib_create_qp *ucmd = params->ucmd; + struct ib_udata *udata = params->udata; + u32 uidx = params->uidx; struct mlx5_ib_resources *devr = &dev->devr; int inlen = MLX5_ST_SZ_BYTES(create_qp_in); struct mlx5_core_dev *mdev = dev->mdev; @@ -2173,9 +2187,11 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, } static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, - struct ib_qp_init_attr *attr, struct mlx5_ib_qp *qp, - u32 uidx) + struct mlx5_ib_qp *qp, + struct mlx5_create_qp_params *params) { + struct ib_qp_init_attr *attr = params->attr; + u32 uidx = params->uidx; struct mlx5_ib_resources *devr = &dev->devr; int inlen = MLX5_ST_SZ_BYTES(create_qp_in); struct mlx5_core_dev *mdev = dev->mdev; @@ -2469,9 +2485,11 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, } static int create_dct(struct ib_pd *pd, struct mlx5_ib_qp *qp, - struct ib_qp_init_attr *attr, - struct mlx5_ib_create_qp *ucmd, u32 uidx) + struct mlx5_create_qp_params *params) { + struct ib_qp_init_attr *attr = params->attr; + struct mlx5_ib_create_qp *ucmd = params->ucmd; + u32 uidx = params->uidx; void *dctc; qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL); @@ -2782,16 +2800,14 @@ static size_t process_udata_size(struct ib_qp_init_attr *attr, return min(ucmd, inlen); } -static int create_raw_qp(struct ib_pd *pd, struct mlx5_ib_qp *qp, - struct ib_qp_init_attr *attr, void *ucmd, - struct ib_udata *udata, u32 uidx) +static int create_raw_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct mlx5_ib_qp *qp, + struct mlx5_create_qp_params *params) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); + if (params->is_rss_raw) + return create_rss_raw_qp_tir(dev, pd, qp, params); - if (attr->rwq_ind_tbl) - return create_rss_raw_qp_tir(pd, qp, attr, ucmd, udata); - - return create_user_qp(dev, pd, attr, ucmd, udata, qp, uidx); + return create_user_qp(dev, pd, qp, params); } static int check_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, @@ -2821,60 +2837,59 @@ static int check_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return ret; } -static int get_qp_uidx(struct mlx5_ib_qp *qp, struct ib_udata *udata, - struct mlx5_ib_create_qp *ucmd, - struct ib_qp_init_attr *attr, u32 *uidx) +static int get_qp_uidx(struct mlx5_ib_qp *qp, + struct mlx5_create_qp_params *params) { + struct mlx5_ib_create_qp *ucmd = params->ucmd; + struct ib_udata *udata = params->udata; struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); - if (attr->rwq_ind_tbl) + if (params->is_rss_raw) return 0; - return get_qp_user_index(ucontext, ucmd, sizeof(*ucmd), uidx); + return get_qp_user_index(ucontext, ucmd, sizeof(*ucmd), ¶ms->uidx); } -struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, +struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr, struct ib_udata *udata) { - u32 uidx = MLX5_IB_DEFAULT_UIDX; + struct mlx5_create_qp_params params = {}; struct mlx5_ib_dev *dev; struct mlx5_ib_qp *qp; enum ib_qp_type type; - void *ucmd = NULL; u16 xrcdn = 0; int err; dev = pd ? to_mdev(pd->device) : - to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device); + to_mdev(to_mxrcd(attr->xrcd)->ibxrcd.device); - err = check_qp_type(dev, init_attr, &type); - if (err) { - mlx5_ib_dbg(dev, "Unsupported QP type %d\n", - init_attr->qp_type); - return ERR_PTR(err); - } - - err = check_valid_flow(dev, pd, init_attr, udata); + err = check_qp_type(dev, attr, &type); if (err) return ERR_PTR(err); - if (init_attr->qp_type == IB_QPT_GSI) - return mlx5_ib_gsi_create_qp(pd, init_attr); + err = check_valid_flow(dev, pd, attr, udata); + if (err) + return ERR_PTR(err); + + if (attr->qp_type == IB_QPT_GSI) + return mlx5_ib_gsi_create_qp(pd, attr); + + params.udata = udata; + params.uidx = MLX5_IB_DEFAULT_UIDX; + params.attr = attr; + params.is_rss_raw = !!attr->rwq_ind_tbl; if (udata) { - size_t inlen = - process_udata_size(init_attr, udata); - - if (!inlen) + params.inlen = process_udata_size(attr, udata); + if (!params.inlen) return ERR_PTR(-EINVAL); - ucmd = kzalloc(inlen, GFP_KERNEL); - if (!ucmd) + params.ucmd = kzalloc(params.inlen, GFP_KERNEL); + if (!params.ucmd) return ERR_PTR(-ENOMEM); - err = ib_copy_from_udata(ucmd, udata, inlen); + err = ib_copy_from_udata(params.ucmd, udata, params.inlen); if (err) goto free_ucmd; } @@ -2887,50 +2902,49 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, qp->type = type; if (udata) { - err = process_vendor_flags(dev, qp, ucmd, init_attr); + err = process_vendor_flags(dev, qp, params.ucmd, attr); if (err) goto free_qp; - err = get_qp_uidx(qp, udata, ucmd, init_attr, &uidx); + err = get_qp_uidx(qp, ¶ms); if (err) goto free_qp; } - err = process_create_flags(dev, qp, init_attr); + err = process_create_flags(dev, qp, attr); if (err) goto free_qp; - err = check_qp_attr(dev, qp, init_attr); + err = check_qp_attr(dev, qp, attr); if (err) goto free_qp; switch (qp->type) { case IB_QPT_RAW_PACKET: - err = create_raw_qp(pd, qp, init_attr, ucmd, udata, uidx); + err = create_raw_qp(dev, pd, qp, ¶ms); break; case MLX5_IB_QPT_DCT: - err = create_dct(pd, qp, init_attr, ucmd, uidx); + err = create_dct(pd, qp, ¶ms); break; case IB_QPT_XRC_TGT: - xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; - err = create_xrc_tgt_qp(dev, init_attr, qp, udata, uidx); + xrcdn = to_mxrcd(attr->xrcd)->xrcdn; + err = create_xrc_tgt_qp(dev, qp, ¶ms); break; default: if (udata) - err = create_user_qp(dev, pd, init_attr, ucmd, udata, - qp, uidx); + err = create_user_qp(dev, pd, qp, ¶ms); else - err = create_kernel_qp(dev, pd, init_attr, qp, uidx); + err = create_kernel_qp(dev, pd, qp, ¶ms); } if (err) { - mlx5_ib_dbg(dev, "create_qp failed %d\n", err); + mlx5_ib_err(dev, "create_qp failed %d\n", err); goto free_qp; } - kfree(ucmd); + kfree(params.ucmd); - if (is_qp0(init_attr->qp_type)) + if (is_qp0(attr->qp_type)) qp->ibqp.qp_num = 0; - else if (is_qp1(init_attr->qp_type)) + else if (is_qp1(attr->qp_type)) qp->ibqp.qp_num = 1; else qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn; @@ -2942,7 +2956,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, free_qp: kfree(qp); free_ucmd: - kfree(ucmd); + kfree(params.ucmd); return ERR_PTR(err); } From 5d6fffed1cfd0c368a9089acb9fcc902c649c31c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:31 +0300 Subject: [PATCH 190/562] RDMA/mlx5: Promote RSS RAW QP flags check to higher level Move check that user didn't supplied RSS RAW QP unsupported command flags to the function that checks all such flags. Link: https://lore.kernel.org/r/20200427154636.381474-32-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 3807e1687cb2..8daa8bc6b9c7 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1652,13 +1652,6 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct ib_pd *pd, return -EOPNOTSUPP; } - if (ucmd->flags & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS | - MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC | - MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)) { - mlx5_ib_dbg(dev, "invalid flags\n"); - return -EOPNOTSUPP; - } - if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_INNER && !(ucmd->flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)) { mlx5_ib_dbg(dev, "Tunnel offloads must be set for inner RSS\n"); @@ -2687,11 +2680,20 @@ static int process_vendor_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, process_vendor_flag(dev, &flags, MLX5_QP_FLAG_BFREG_INDEX, true, qp); process_vendor_flag(dev, &flags, MLX5_QP_FLAG_UAR_PAGE_INDEX, true, qp); + cond = qp->flags_en & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS | + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC | + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC); + if (attr->rwq_ind_tbl && cond) { + mlx5_ib_dbg(dev, "RSS RAW QP has unsupported flags 0x%X\n", + cond); + return -EINVAL; + } + if (flags) mlx5_ib_dbg(dev, "udata has unsupported flags 0x%X\n", flags); return (flags) ? -EINVAL : 0; -} + } static void process_create_flag(struct mlx5_ib_dev *dev, int *flags, int flag, bool cond, struct mlx5_ib_qp *qp) From 6f2cf76e6ec7885de116cfc9c08057f2f7873629 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:32 +0300 Subject: [PATCH 191/562] RDMA/mlx5: Handle udate outlen checks in one place Place in one function all udata size checks. This will allow us move ib_copy_to_udata() in general place and ensure that it will be performed after call to the FW. Link: https://lore.kernel.org/r/20200427154636.381474-33-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 48 ++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8daa8bc6b9c7..0d06706e6ce1 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1613,6 +1613,7 @@ static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *q struct mlx5_create_qp_params { struct ib_udata *udata; size_t inlen; + size_t outlen; void *ucmd; u8 is_rss_raw : 1; struct ib_qp_init_attr *attr; @@ -1638,15 +1639,9 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct ib_pd *pd, void *hfso; u32 selected_fields = 0; u32 outer_l4; - size_t min_resp_len; u32 tdn = mucontext->tdn; u8 lb_flag = 0; - min_resp_len = - offsetof(typeof(resp), bfreg_index) + sizeof(resp.bfreg_index); - if (udata->outlen < min_resp_len) - return -EINVAL; - if (ucmd->comp_mask) { mlx5_ib_dbg(dev, "invalid comp mask\n"); return -EOPNOTSUPP; @@ -2780,26 +2775,43 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return (create_flags) ? -EINVAL : 0; } -static size_t process_udata_size(struct ib_qp_init_attr *attr, - struct ib_udata *udata) +static int process_udata_size(struct mlx5_ib_dev *dev, + struct mlx5_create_qp_params *params) { size_t ucmd = sizeof(struct mlx5_ib_create_qp); + struct ib_qp_init_attr *attr = params->attr; + struct ib_udata *udata = params->udata; + size_t outlen = udata->outlen; size_t inlen = udata->inlen; - if (attr->qp_type == IB_QPT_DRIVER) - return (inlen < ucmd) ? 0 : ucmd; + params->outlen = min(outlen, sizeof(struct mlx5_ib_create_qp_resp)); + if (attr->qp_type == IB_QPT_DRIVER) { + params->inlen = (inlen < ucmd) ? 0 : ucmd; + goto out; + } - if (!attr->rwq_ind_tbl) - return ucmd; + if (!params->is_rss_raw) { + params->inlen = ucmd; + goto out; + } + /* RSS RAW QP */ if (inlen < offsetofend(struct mlx5_ib_create_qp_rss, flags)) - return 0; + return -EINVAL; + + if (outlen < offsetofend(struct mlx5_ib_create_qp_resp, bfreg_index)) + return -EINVAL; ucmd = sizeof(struct mlx5_ib_create_qp_rss); if (inlen > ucmd && !ib_is_udata_cleared(udata, ucmd, inlen - ucmd)) - return 0; + return -EINVAL; - return min(ucmd, inlen); + params->inlen = min(ucmd, inlen); +out: + if (!params->inlen) + mlx5_ib_dbg(dev, "udata is too small or not cleared\n"); + + return (params->inlen) ? 0 : -EINVAL; } static int create_raw_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, @@ -2883,9 +2895,9 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr, params.is_rss_raw = !!attr->rwq_ind_tbl; if (udata) { - params.inlen = process_udata_size(attr, udata); - if (!params.inlen) - return ERR_PTR(-EINVAL); + err = process_udata_size(dev, ¶ms); + if (err) + return ERR_PTR(err); params.ucmd = kzalloc(params.inlen, GFP_KERNEL); if (!params.ucmd) From 08d53976609aec17f28872423d3a3e86ad1a3ec4 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:33 +0300 Subject: [PATCH 192/562] RDMA/mlx5: Copy response to the user in one place Update all the places in create QP flows to copy response to the user in one place. Link: https://lore.kernel.org/r/20200427154636.381474-34-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 113 +++++++++++++++----------------- 1 file changed, 52 insertions(+), 61 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0d06706e6ce1..9ca742189281 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1015,17 +1015,8 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto err_free; } - err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp))); - if (err) { - mlx5_ib_dbg(dev, "copy failed\n"); - goto err_unmap; - } - return 0; -err_unmap: - mlx5_ib_db_unmap_user(context, &qp->db); - err_free: kvfree(*in); @@ -1551,14 +1542,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn : rq->base.mqp.qpn; - err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp))); - if (err) - goto err_destroy_tir; - return 0; -err_destroy_tir: - destroy_raw_packet_qp_tir(dev, rq, qp->flags_en, pd); err_destroy_rq: destroy_raw_packet_qp_rq(dev, rq); err_destroy_sq: @@ -1618,6 +1603,7 @@ struct mlx5_create_qp_params { u8 is_rss_raw : 1; struct ib_qp_init_attr *attr; u32 uidx; + struct mlx5_ib_create_qp_resp resp; }; static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct ib_pd *pd, @@ -1629,7 +1615,6 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct ib_udata *udata = params->udata; struct mlx5_ib_ucontext *mucontext = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); - struct mlx5_ib_create_qp_resp resp = {}; int inlen; int outlen; int err; @@ -1662,12 +1647,6 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (qp->flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST; - err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); - if (err) { - mlx5_ib_dbg(dev, "copy failed\n"); - return -EINVAL; - } - inlen = MLX5_ST_SZ_BYTES(create_tir_in); outlen = MLX5_ST_SZ_BYTES(create_tir_out); in = kvzalloc(inlen + outlen, GFP_KERNEL); @@ -1803,34 +1782,30 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto err; if (mucontext->devx_uid) { - resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; - resp.tirn = qp->rss_qp.tirn; + params->resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; + params->resp.tirn = qp->rss_qp.tirn; if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) { - resp.tir_icm_addr = + params->resp.tir_icm_addr = MLX5_GET(create_tir_out, out, icm_address_31_0); - resp.tir_icm_addr |= (u64)MLX5_GET(create_tir_out, out, - icm_address_39_32) - << 32; - resp.tir_icm_addr |= (u64)MLX5_GET(create_tir_out, out, - icm_address_63_40) - << 40; - resp.comp_mask |= + params->resp.tir_icm_addr |= + (u64)MLX5_GET(create_tir_out, out, + icm_address_39_32) + << 32; + params->resp.tir_icm_addr |= + (u64)MLX5_GET(create_tir_out, out, + icm_address_63_40) + << 40; + params->resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR; } } - err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); - if (err) - goto err_copy; - kvfree(in); /* qpn is reserved for that QP */ qp->trans_qp.base.mqp.qpn = 0; qp->is_rss = true; return 0; -err_copy: - mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn, mucontext->devx_uid); err: kvfree(in); return err; @@ -1995,7 +1970,6 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_resources *devr = &dev->devr; int inlen = MLX5_ST_SZ_BYTES(create_qp_in); struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_ib_create_qp_resp resp = {}; struct mlx5_ib_cq *send_cq; struct mlx5_ib_cq *recv_cq; unsigned long flags; @@ -2038,8 +2012,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz))) return -EINVAL; - err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, &resp, &inlen, - base, ucmd); + err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, ¶ms->resp, + &inlen, base, ucmd); if (err) return err; @@ -2139,7 +2113,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd->sq_buf_addr; raw_packet_qp_copy_info(qp, &qp->raw_packet_qp); err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata, - &resp); + ¶ms->resp); } else err = mlx5_core_create_qp(dev, &base->mqp, in, inlen); @@ -2865,6 +2839,25 @@ static int get_qp_uidx(struct mlx5_ib_qp *qp, return get_qp_user_index(ucontext, ucmd, sizeof(*ucmd), ¶ms->uidx); } +static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp) +{ + struct mlx5_ib_dev *dev = to_mdev(mqp->ibqp.device); + + if (mqp->state == IB_QPS_RTR) { + int err; + + err = mlx5_core_destroy_dct(dev, &mqp->dct.mdct); + if (err) { + mlx5_ib_warn(dev, "failed to destroy DCT %d\n", err); + return err; + } + } + + kfree(mqp->dct.in); + kfree(mqp); + return 0; +} + struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr, struct ib_udata *udata) { @@ -2955,6 +2948,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr, } kfree(params.ucmd); + params.ucmd = NULL; if (is_qp0(attr->qp_type)) qp->ibqp.qp_num = 0; @@ -2965,8 +2959,24 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr, qp->trans_qp.xrcdn = xrcdn; + if (udata) + /* + * It is safe to copy response for all user create QP flows, + * including MLX5_IB_QPT_DCT, which doesn't need it. + * In that case, resp will be filled with zeros. + */ + err = ib_copy_to_udata(udata, ¶ms.resp, params.outlen); + if (err) + goto destroy_qp; + return &qp->ibqp; +destroy_qp: + if (qp->type == MLX5_IB_QPT_DCT) + mlx5_ib_destroy_dct(qp); + else + destroy_qp_common(dev, qp, udata); + qp = NULL; free_qp: kfree(qp); free_ucmd: @@ -2974,25 +2984,6 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr, return ERR_PTR(err); } -static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp) -{ - struct mlx5_ib_dev *dev = to_mdev(mqp->ibqp.device); - - if (mqp->state == IB_QPS_RTR) { - int err; - - err = mlx5_core_destroy_dct(dev, &mqp->dct.mdct); - if (err) { - mlx5_ib_warn(dev, "failed to destroy DCT %d\n", err); - return err; - } - } - - kfree(mqp->dct.in); - kfree(mqp); - return 0; -} - int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(qp->device); From 6367da46d3cb03ff717457875bd01dda7b02a1ff Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:34 +0300 Subject: [PATCH 193/562] RDMA/mlx5: Remove redundant destroy QP call After major refactoring in create QP flow, it is no needed to call to destroy QP in XRC_TGT flow. Link: https://lore.kernel.org/r/20200427154636.381474-35-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 9ca742189281..d7983a951e8d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1887,7 +1887,6 @@ static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct mlx5_create_qp_params *params) { struct ib_qp_init_attr *attr = params->attr; - struct ib_udata *udata = params->udata; u32 uidx = params->uidx; struct mlx5_ib_resources *devr = &dev->devr; int inlen = MLX5_ST_SZ_BYTES(create_qp_in); @@ -1944,10 +1943,8 @@ static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, base = &qp->trans_qp.base; err = mlx5_core_create_qp(dev, &base->mqp, in, inlen); kvfree(in); - if (err) { - destroy_qp(dev, qp, base, udata); + if (err) return err; - } base->container_mibqp = qp; base->mqp.event = mlx5_ib_qp_event; From 968f0b6f9c01bdf772a4c04ee1fe08971d65af14 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 27 Apr 2020 18:46:35 +0300 Subject: [PATCH 194/562] RDMA/mlx5: Consolidate into special function all create QP calls Finish separation to blocks of mlx5_ib_create_qp() functions, so all internal create QP implementation are located in one place. Link: https://lore.kernel.org/r/20200427154636.381474-36-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 85 +++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 36 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d7983a951e8d..18c0a25da47a 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1953,6 +1953,7 @@ static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, list_add_tail(&qp->qps_list, &dev->qp_list); spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); + qp->trans_qp.xrcdn = to_mxrcd(attr->xrcd)->xrcdn; return 0; } @@ -2785,14 +2786,54 @@ static int process_udata_size(struct mlx5_ib_dev *dev, return (params->inlen) ? 0 : -EINVAL; } -static int create_raw_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, - struct mlx5_ib_qp *qp, - struct mlx5_create_qp_params *params) +static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct mlx5_ib_qp *qp, + struct mlx5_create_qp_params *params) { - if (params->is_rss_raw) - return create_rss_raw_qp_tir(dev, pd, qp, params); + int err; - return create_user_qp(dev, pd, qp, params); + if (params->is_rss_raw) { + err = create_rss_raw_qp_tir(dev, pd, qp, params); + goto out; + } + + if (qp->type == MLX5_IB_QPT_DCT) { + err = create_dct(pd, qp, params); + goto out; + } + + if (qp->type == IB_QPT_XRC_TGT) { + err = create_xrc_tgt_qp(dev, qp, params); + goto out; + } + + if (params->udata) + err = create_user_qp(dev, pd, qp, params); + else + err = create_kernel_qp(dev, pd, qp, params); + +out: + if (err) { + mlx5_ib_err(dev, "Create QP type %d failed\n", qp->type); + return err; + } + + if (is_qp0(qp->type)) + qp->ibqp.qp_num = 0; + else if (is_qp1(qp->type)) + qp->ibqp.qp_num = 1; + else + qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn; + + mlx5_ib_dbg(dev, + "QP type %d, ib qpn 0x%X, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n", + qp->type, qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn, + params->attr->recv_cq ? to_mcq(params->attr->recv_cq)->mcq.cqn : + -1, + params->attr->send_cq ? to_mcq(params->attr->send_cq)->mcq.cqn : + -1); + + return 0; } static int check_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, @@ -2862,7 +2903,6 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr, struct mlx5_ib_dev *dev; struct mlx5_ib_qp *qp; enum ib_qp_type type; - u16 xrcdn = 0; int err; dev = pd ? to_mdev(pd->device) : @@ -2922,40 +2962,13 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr, if (err) goto free_qp; - switch (qp->type) { - case IB_QPT_RAW_PACKET: - err = create_raw_qp(dev, pd, qp, ¶ms); - break; - case MLX5_IB_QPT_DCT: - err = create_dct(pd, qp, ¶ms); - break; - case IB_QPT_XRC_TGT: - xrcdn = to_mxrcd(attr->xrcd)->xrcdn; - err = create_xrc_tgt_qp(dev, qp, ¶ms); - break; - default: - if (udata) - err = create_user_qp(dev, pd, qp, ¶ms); - else - err = create_kernel_qp(dev, pd, qp, ¶ms); - } - if (err) { - mlx5_ib_err(dev, "create_qp failed %d\n", err); + err = create_qp(dev, pd, qp, ¶ms); + if (err) goto free_qp; - } kfree(params.ucmd); params.ucmd = NULL; - if (is_qp0(attr->qp_type)) - qp->ibqp.qp_num = 0; - else if (is_qp1(attr->qp_type)) - qp->ibqp.qp_num = 1; - else - qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn; - - qp->trans_qp.xrcdn = xrcdn; - if (udata) /* * It is safe to copy response for all user create QP flows, From 0eacc574aae7300bf46c10c7116c3ba5825505b7 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Mon, 27 Apr 2020 18:46:36 +0300 Subject: [PATCH 195/562] RDMA/mlx5: Verify that QP is created with RQ or SQ RAW packet QP and underlay QP must be created with either RQ or SQ, check that. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Link: https://lore.kernel.org/r/20200427154636.381474-37-leon@kernel.org Signed-off-by: Aharon Landau Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 18c0a25da47a..14f4f0982e4e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1482,6 +1482,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, u16 uid = to_mpd(pd)->uid; u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {}; + if (!qp->sq.wqe_cnt && !qp->rq.wqe_cnt) + return -EINVAL; if (qp->sq.wqe_cnt) { err = create_raw_packet_qp_tis(dev, qp, sq, tdn, pd); if (err) From fa5d010c5630b143b802e0477e87bba0656829cf Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:42 +0300 Subject: [PATCH 196/562] RDMA: Group create AH arguments in struct Following patch adds additional argument to the create AH function, so it make sense to group ah_attr and flags arguments in struct. Link: https://lore.kernel.org/r/20200430192146.12863-13-maorg@mellanox.com Signed-off-by: Maor Gottlieb Acked-by: Devesh Sharma Acked-by: Gal Pressman Acked-by: Weihang Li Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 5 ++++- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 8 +++++--- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 2 +- drivers/infiniband/hw/efa/efa.h | 3 +-- drivers/infiniband/hw/efa/efa_verbs.c | 6 +++--- drivers/infiniband/hw/hns/hns_roce_ah.c | 5 +++-- drivers/infiniband/hw/hns/hns_roce_device.h | 4 ++-- drivers/infiniband/hw/mlx4/ah.c | 11 +++++++---- drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 +- drivers/infiniband/hw/mlx5/ah.c | 5 +++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- drivers/infiniband/hw/mthca/mthca_provider.c | 9 +++++---- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 3 ++- drivers/infiniband/hw/ocrdma/ocrdma_ah.h | 2 +- drivers/infiniband/hw/qedr/verbs.c | 4 ++-- drivers/infiniband/hw/qedr/verbs.h | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 5 +++-- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h | 2 +- drivers/infiniband/sw/rdmavt/ah.c | 11 ++++++----- drivers/infiniband/sw/rdmavt/ah.h | 4 ++-- drivers/infiniband/sw/rxe/rxe_verbs.c | 9 +++++---- include/rdma/ib_verbs.h | 9 +++++++-- 22 files changed, 66 insertions(+), 47 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 3bfadd8effcc..86be8a54a2d6 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -502,6 +502,7 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, u32 flags, struct ib_udata *udata) { + struct rdma_ah_init_attr init_attr = {}; struct ib_device *device = pd->device; struct ib_ah *ah; int ret; @@ -521,8 +522,10 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, ah->pd = pd; ah->type = ah_attr->type; ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL); + init_attr.ah_attr = ah_attr; + init_attr.flags = flags; - ret = device->ops.create_ah(ah, ah_attr, flags, udata); + ret = device->ops.create_ah(ah, &init_attr, udata); if (ret) { kfree(ah); return ERR_PTR(ret); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index d98348e82422..5a7c090204c5 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -631,11 +631,12 @@ static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype) return nw_type; } -int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { struct ib_pd *ib_pd = ib_ah->pd; struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); struct bnxt_re_dev *rdev = pd->rdev; const struct ib_gid_attr *sgid_attr; @@ -673,7 +674,8 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr, memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN); rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, - !(flags & RDMA_CREATE_AH_SLEEPABLE)); + !(init_attr->flags & + RDMA_CREATE_AH_SLEEPABLE)); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate HW AH"); return rc; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 18dd46f46cf4..204c0849ba28 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -170,7 +170,7 @@ enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev, u8 port_num); int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); void bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); -int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index aa7396a1588a..45d519edb4c3 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -153,8 +153,7 @@ int efa_mmap(struct ib_ucontext *ibucontext, struct vm_area_struct *vma); void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry); int efa_create_ah(struct ib_ah *ibah, - struct rdma_ah_attr *ah_attr, - u32 flags, + struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); void efa_destroy_ah(struct ib_ah *ibah, u32 flags); int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 5c57098a4aee..454b01b21e6a 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1639,10 +1639,10 @@ static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah) } int efa_create_ah(struct ib_ah *ibah, - struct rdma_ah_attr *ah_attr, - u32 flags, + struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; struct efa_dev *dev = to_edev(ibah->device); struct efa_com_create_ah_params params = {}; struct efa_ibv_create_ah_resp resp = {}; @@ -1650,7 +1650,7 @@ int efa_create_ah(struct ib_ah *ibah, struct efa_ah *ah = to_eah(ibah); int err; - if (!(flags & RDMA_CREATE_AH_SLEEPABLE)) { + if (!(init_attr->flags & RDMA_CREATE_AH_SLEEPABLE)) { ibdev_dbg(&dev->ibdev, "Create address handle is not supported in atomic context\n"); err = -EOPNOTSUPP; diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 8a522e14ef62..5b2f9314edd3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -39,13 +39,14 @@ #define HNS_ROCE_VLAN_SL_BIT_MASK 7 #define HNS_ROCE_VLAN_SL_SHIFT 13 -int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); const struct ib_gid_attr *gid_attr; struct device *dev = hr_dev->dev; struct hns_roce_ah *ah = to_hr_ah(ibah); + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); u16 vlan_id = 0xffff; bool vlan_en = false; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index ecbfeb6dbdd4..e1032cec2b12 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1208,8 +1208,8 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, unsigned long obj, int cnt, int rr); -int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata); +int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata); int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags); diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 02a169f8027b..5f8f8d5c0ce0 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -141,10 +141,11 @@ static int create_iboe_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr) return 0; } -int mlx4_ib_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) - +int mlx4_ib_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; + if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) return -EINVAL; @@ -167,12 +168,14 @@ int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, int slave_sgid_index, u8 *s_mac, u16 vlan_tag) { struct rdma_ah_attr slave_attr = *ah_attr; + struct rdma_ah_init_attr init_attr = {}; struct mlx4_ib_ah *mah = to_mah(ah); int ret; slave_attr.grh.sgid_attr = NULL; slave_attr.grh.sgid_index = slave_sgid_index; - ret = mlx4_ib_create_ah(ah, &slave_attr, 0, NULL); + init_attr.ah_attr = &slave_attr; + ret = mlx4_ib_create_ah(ah, &init_attr, NULL); if (ret) return ret; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index d188573187fa..182a237b87f7 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -752,7 +752,7 @@ int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); -int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, int slave_sgid_index, u8 *s_mac, u16 vlan_tag); diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 80642dd359bc..9b59348d51b5 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -68,10 +68,11 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, } } -int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; struct mlx5_ib_ah *ah = to_mah(ibah); struct mlx5_ib_dev *dev = to_mdev(ibah->device); enum rdma_ah_attr_type ah_type = ah_attr->type; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index df375cb4efbb..7dffc87601eb 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1153,7 +1153,7 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db) void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); -int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 69a3e4f62fb1..bc3e3d741ca3 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -388,14 +388,15 @@ static void mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) mthca_pd_free(to_mdev(pd->device), to_mpd(pd)); } -static int mthca_ah_create(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +static int mthca_ah_create(struct ib_ah *ibah, + struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { struct mthca_ah *ah = to_mah(ibah); - return mthca_create_ah(to_mdev(ibah->device), to_mpd(ibah->pd), ah_attr, - ah); + return mthca_create_ah(to_mdev(ibah->device), to_mpd(ibah->pd), + init_attr->ah_attr, ah); } static void mthca_ah_destroy(struct ib_ah *ah, u32 flags) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 2b7f00ac41b0..6eea02b18968 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -155,7 +155,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, return status; } -int ocrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, +int ocrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { u32 *ahid_addr; @@ -165,6 +165,7 @@ int ocrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, u16 vlan_tag = 0xffff; const struct ib_gid_attr *sgid_attr; struct ocrdma_pd *pd = get_ocrdma_pd(ibah->pd); + struct rdma_ah_attr *attr = init_attr->ah_attr; struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device); if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) || diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 9780afcde780..8b73b3489f3a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -51,7 +51,7 @@ enum { OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */ }; -int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index a5bd3adaf90a..d6b94a713573 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2750,12 +2750,12 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) return 0; } -int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, +int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { struct qedr_ah *ah = get_qedr_ah(ibah); - rdma_copy_ah_attr(&ah->attr, attr); + rdma_copy_ah_attr(&ah->attr, init_attr->ah_attr); return 0; } diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 18027844eb87..5e02387e068d 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -70,7 +70,7 @@ int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_recv_wr); -int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, +int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); void qedr_destroy_ah(struct ib_ah *ibah, u32 flags); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index faf7ecd7b3fa..ccbded2d26ce 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -509,9 +509,10 @@ void pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) * * @return: 0 on success, otherwise errno. */ -int pvrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int pvrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; struct pvrdma_dev *dev = to_vdev(ibah->device); struct pvrdma_ah *ah = to_vah(ibah); const struct ib_global_route *grh; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index e4a48f5c0c85..267702226f10 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -414,7 +414,7 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); -int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags); diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index ee02c6176007..40480add7dd3 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -98,14 +98,14 @@ EXPORT_SYMBOL(rvt_check_ah); * * Return: 0 on success */ -int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 create_flags, struct ib_udata *udata) +int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { struct rvt_ah *ah = ibah_to_rvtah(ibah); struct rvt_dev_info *dev = ib_to_rvt(ibah->device); unsigned long flags; - if (rvt_check_ah(ibah->device, ah_attr)) + if (rvt_check_ah(ibah->device, init_attr->ah_attr)) return -EINVAL; spin_lock_irqsave(&dev->n_ahs_lock, flags); @@ -117,10 +117,11 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, dev->n_ahs_allocated++; spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - rdma_copy_ah_attr(&ah->attr, ah_attr); + rdma_copy_ah_attr(&ah->attr, init_attr->ah_attr); if (dev->driver_f.notify_new_ah) - dev->driver_f.notify_new_ah(ibah->device, ah_attr, ah); + dev->driver_f.notify_new_ah(ibah->device, + init_attr->ah_attr, ah); return 0; } diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h index bbb4d3bdec4e..40b7123fec76 100644 --- a/drivers/infiniband/sw/rdmavt/ah.h +++ b/drivers/infiniband/sw/rdmavt/ah.h @@ -50,8 +50,8 @@ #include -int rvt_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, - u32 create_flags, struct ib_udata *udata); +int rvt_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata); void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags); int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 9dd4bd7aea92..b8a22af724e8 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -195,15 +195,16 @@ static void rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) rxe_drop_ref(pd); } -static int rxe_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, - u32 flags, struct ib_udata *udata) +static int rxe_create_ah(struct ib_ah *ibah, + struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { int err; struct rxe_dev *rxe = to_rdev(ibah->device); struct rxe_ah *ah = to_rah(ibah); - err = rxe_av_chk_attr(rxe, attr); + err = rxe_av_chk_attr(rxe, init_attr->ah_attr); if (err) return err; @@ -211,7 +212,7 @@ static int rxe_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, if (err) return err; - rxe_init_av(attr, &ah->av); + rxe_init_av(init_attr->ah_attr, &ah->av); return 0; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index bbc5cfb57cd2..20ea26810349 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -880,6 +880,11 @@ struct ib_mr_status { */ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult); +struct rdma_ah_init_attr { + struct rdma_ah_attr *ah_attr; + u32 flags; +}; + enum rdma_ah_attr_type { RDMA_AH_ATTR_TYPE_UNDEFINED, RDMA_AH_ATTR_TYPE_IB, @@ -2403,8 +2408,8 @@ struct ib_device_ops { void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata); void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); - int (*create_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata); + int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, + struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); void (*destroy_ah)(struct ib_ah *ah, u32 flags); From bd3920eac133103f0d4aa5fc62290e6df9a6c6da Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:43 +0300 Subject: [PATCH 197/562] RDMA/core: Add LAG functionality Add support to get the RoCE LAG xmit slave by building skb of the RoCE packet and call to master_get_xmit_slave. If driver wants to get the slave assume all slaves are available, then need to set RDMA_LAG_FLAGS_HASH_ALL_SLAVES in flags. Link: https://lore.kernel.org/r/20200430192146.12863-14-maorg@mellanox.com Signed-off-by: Maor Gottlieb Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/Makefile | 2 +- drivers/infiniband/core/lag.c | 136 +++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 1 + include/rdma/lag.h | 23 ++++++ 4 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/core/lag.c create mode 100644 include/rdma/lag.h diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index d1b14887960e..870f0fcd54d5 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -12,7 +12,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ multicast.o mad.o smi.o agent.o mad_rmpp.o \ nldev.o restrack.o counters.o ib_core_uverbs.o \ - trace.o + trace.o lag.o ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c new file mode 100644 index 000000000000..a29533626a7c --- /dev/null +++ b/drivers/infiniband/core/lag.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2020 Mellanox Technologies. All rights reserved. + */ + +#include +#include +#include + +static struct sk_buff *rdma_build_skb(struct ib_device *device, + struct net_device *netdev, + struct rdma_ah_attr *ah_attr, + gfp_t flags) +{ + struct ipv6hdr *ip6h; + struct sk_buff *skb; + struct ethhdr *eth; + struct iphdr *iph; + struct udphdr *uh; + u8 smac[ETH_ALEN]; + bool is_ipv4; + int hdr_len; + + is_ipv4 = ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw); + hdr_len = ETH_HLEN + sizeof(struct udphdr) + LL_RESERVED_SPACE(netdev); + hdr_len += is_ipv4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr); + + skb = alloc_skb(hdr_len, flags); + if (!skb) + return NULL; + + skb->dev = netdev; + skb_reserve(skb, hdr_len); + skb_push(skb, sizeof(struct udphdr)); + skb_reset_transport_header(skb); + uh = udp_hdr(skb); + uh->source = htons(0xC000); + uh->dest = htons(ROCE_V2_UDP_DPORT); + uh->len = htons(sizeof(struct udphdr)); + + if (is_ipv4) { + skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + iph->frag_off = 0; + iph->version = 4; + iph->protocol = IPPROTO_UDP; + iph->ihl = 0x5; + iph->tot_len = htons(sizeof(struct udphdr) + sizeof(struct + iphdr)); + memcpy(&iph->saddr, ah_attr->grh.sgid_attr->gid.raw + 12, + sizeof(struct in_addr)); + memcpy(&iph->daddr, ah_attr->grh.dgid.raw + 12, + sizeof(struct in_addr)); + } else { + skb_push(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + ip6h = ipv6_hdr(skb); + ip6h->version = 6; + ip6h->nexthdr = IPPROTO_UDP; + memcpy(&ip6h->flow_lbl, &ah_attr->grh.flow_label, + sizeof(*ip6h->flow_lbl)); + memcpy(&ip6h->saddr, ah_attr->grh.sgid_attr->gid.raw, + sizeof(struct in6_addr)); + memcpy(&ip6h->daddr, ah_attr->grh.dgid.raw, + sizeof(struct in6_addr)); + } + + skb_push(skb, sizeof(struct ethhdr)); + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + skb->protocol = eth->h_proto = htons(is_ipv4 ? ETH_P_IP : ETH_P_IPV6); + rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, NULL, smac); + memcpy(eth->h_source, smac, ETH_ALEN); + memcpy(eth->h_dest, ah_attr->roce.dmac, ETH_ALEN); + + return skb; +} + +static struct net_device *rdma_get_xmit_slave_udp(struct ib_device *device, + struct net_device *master, + struct rdma_ah_attr *ah_attr, + gfp_t flags) +{ + struct net_device *slave; + struct sk_buff *skb; + + skb = rdma_build_skb(device, master, ah_attr, flags); + if (!skb) + return ERR_PTR(-ENOMEM); + + rcu_read_lock(); + slave = netdev_get_xmit_slave(master, skb, + !!(device->lag_flags & + RDMA_LAG_FLAGS_HASH_ALL_SLAVES)); + if (slave) + dev_hold(slave); + rcu_read_unlock(); + kfree_skb(skb); + return slave; +} + +void rdma_lag_put_ah_roce_slave(struct net_device *xmit_slave) +{ + if (xmit_slave) + dev_put(xmit_slave); +} + +struct net_device *rdma_lag_get_ah_roce_slave(struct ib_device *device, + struct rdma_ah_attr *ah_attr, + gfp_t flags) +{ + struct net_device *slave = NULL; + struct net_device *master; + + if (!(ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE && + ah_attr->grh.sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)) + return NULL; + + rcu_read_lock(); + master = rdma_read_gid_attr_ndev_rcu(ah_attr->grh.sgid_attr); + if (IS_ERR(master)) { + rcu_read_unlock(); + return master; + } + dev_hold(master); + rcu_read_unlock(); + + if (!netif_is_bond_master(master)) + goto put; + + slave = rdma_get_xmit_slave_udp(device, master, ah_attr, flags); +put: + dev_put(master); + return slave; +} diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 20ea26810349..e6c18ec0365a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2714,6 +2714,7 @@ struct ib_device { /* Used by iWarp CM */ char iw_ifname[IFNAMSIZ]; u32 iw_driver_flags; + u32 lag_flags; }; struct ib_client_nl_info; diff --git a/include/rdma/lag.h b/include/rdma/lag.h new file mode 100644 index 000000000000..7c06ec9b2eef --- /dev/null +++ b/include/rdma/lag.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2020 Mellanox Technologies. All rights reserved. + */ + +#ifndef _RDMA_LAG_H_ +#define _RDMA_LAG_H_ + +#include + +struct ib_device; +struct rdma_ah_attr; + +enum rdma_lag_flags { + RDMA_LAG_FLAGS_HASH_ALL_SLAVES = 1 << 0 +}; + +void rdma_lag_put_ah_roce_slave(struct net_device *xmit_slave); +struct net_device *rdma_lag_get_ah_roce_slave(struct ib_device *device, + struct rdma_ah_attr *ah_attr, + gfp_t flags); + +#endif /* _RDMA_LAG_H_ */ From 51aab12631dd7700385d275846ca49dc0b8c2124 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:44 +0300 Subject: [PATCH 198/562] RDMA/core: Get xmit slave for LAG Add a call to rdma_lag_get_ah_roce_slave() when the address handle is created. Lower driver can use it to select the QP's affinity port. Link: https://lore.kernel.org/r/20200430192146.12863-15-maorg@mellanox.com Signed-off-by: Maor Gottlieb Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 61 +++++++++++++++++++++++---------- include/rdma/ib_verbs.h | 2 ++ 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 86be8a54a2d6..bf0249f76ae9 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "core_priv.h" #include @@ -500,7 +501,8 @@ rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr, static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, - struct ib_udata *udata) + struct ib_udata *udata, + struct net_device *xmit_slave) { struct rdma_ah_init_attr init_attr = {}; struct ib_device *device = pd->device; @@ -524,6 +526,7 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL); init_attr.ah_attr = ah_attr; init_attr.flags = flags; + init_attr.xmit_slave = xmit_slave; ret = device->ops.create_ah(ah, &init_attr, udata); if (ret) { @@ -550,15 +553,22 @@ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags) { const struct ib_gid_attr *old_sgid_attr; + struct net_device *slave; struct ib_ah *ah; int ret; ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr); if (ret) return ERR_PTR(ret); - - ah = _rdma_create_ah(pd, ah_attr, flags, NULL); - + slave = rdma_lag_get_ah_roce_slave(pd->device, ah_attr, + (flags & RDMA_CREATE_AH_SLEEPABLE) ? + GFP_KERNEL : GFP_ATOMIC); + if (IS_ERR(slave)) { + rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); + return (void *)slave; + } + ah = _rdma_create_ah(pd, ah_attr, flags, NULL, slave); + rdma_lag_put_ah_roce_slave(slave); rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); return ah; } @@ -597,7 +607,8 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd, } } - ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, udata); + ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, + udata, NULL); out: rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); @@ -1636,11 +1647,35 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, const struct ib_gid_attr *old_sgid_attr_alt_av; int ret; + attr->xmit_slave = NULL; if (attr_mask & IB_QP_AV) { ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr, &old_sgid_attr_av); if (ret) return ret; + + if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && + is_qp_type_connected(qp)) { + struct net_device *slave; + + /* + * If the user provided the qp_attr then we have to + * resolve it. Kerne users have to provide already + * resolved rdma_ah_attr's. + */ + if (udata) { + ret = ib_resolve_eth_dmac(qp->device, + &attr->ah_attr); + if (ret) + goto out_av; + } + slave = rdma_lag_get_ah_roce_slave(qp->device, + &attr->ah_attr, + GFP_KERNEL); + if (IS_ERR(slave)) + goto out_av; + attr->xmit_slave = slave; + } } if (attr_mask & IB_QP_ALT_PATH) { /* @@ -1667,18 +1702,6 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, } } - /* - * If the user provided the qp_attr then we have to resolve it. Kernel - * users have to provide already resolved rdma_ah_attr's - */ - if (udata && (attr_mask & IB_QP_AV) && - attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && - is_qp_type_connected(qp)) { - ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); - if (ret) - goto out; - } - if (rdma_ib_or_roce(qp->device, port)) { if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) { dev_warn(&qp->device->dev, @@ -1720,8 +1743,10 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_ALT_PATH) rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av); out_av: - if (attr_mask & IB_QP_AV) + if (attr_mask & IB_QP_AV) { + rdma_lag_put_ah_roce_slave(attr->xmit_slave); rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av); + } return ret; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e6c18ec0365a..8d29f2f79da8 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -883,6 +883,7 @@ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult); struct rdma_ah_init_attr { struct rdma_ah_attr *ah_attr; u32 flags; + struct net_device *xmit_slave; }; enum rdma_ah_attr_type { @@ -1272,6 +1273,7 @@ struct ib_qp_attr { u8 alt_port_num; u8 alt_timeout; u32 rate_limit; + struct net_device *xmit_slave; }; enum ib_wr_opcode { From 5163b2743ae00bf428a8a7e06839943b2f3965ed Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:45 +0300 Subject: [PATCH 199/562] RDMA/mlx5: Refactor affinity related code Move affinity related code in modify qp to function. It's a preparation for next patch the extend the affinity calculation to consider the xmit slave. Link: https://lore.kernel.org/r/20200430192146.12863-16-maorg@mellanox.com Signed-off-by: Maor Gottlieb Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 90 +++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 37 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 14f4f0982e4e..14bfdfc8ab96 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3582,33 +3582,61 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return 0; } -static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev, - struct mlx5_ib_pd *pd, - struct mlx5_ib_qp_base *qp_base, - u8 port_num, struct ib_udata *udata) +static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev, + struct ib_udata *udata) { struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); - unsigned int tx_port_affinity; + u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1; + atomic_t *tx_port_affinity; - if (ucontext) { - tx_port_affinity = (unsigned int)atomic_add_return( - 1, &ucontext->tx_port_affinity) % - MLX5_MAX_PORTS + - 1; + if (ucontext) + tx_port_affinity = &ucontext->tx_port_affinity; + else + tx_port_affinity = &dev->port[port_num].roce.tx_port_affinity; + + return (unsigned int)atomic_add_return(1, tx_port_affinity) % + MLX5_MAX_PORTS + 1; +} + +static bool qp_supports_affinity(struct ib_qp *qp) +{ + struct mlx5_ib_qp *mqp = to_mqp(qp); + + if ((qp->qp_type == IB_QPT_RC) || + (qp->qp_type == IB_QPT_UD && + !(mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)) || + (qp->qp_type == IB_QPT_UC) || + (qp->qp_type == IB_QPT_RAW_PACKET) || + (qp->qp_type == IB_QPT_XRC_INI) || + (qp->qp_type == IB_QPT_XRC_TGT)) + return true; + return false; +} + +static unsigned int get_tx_affinity(struct ib_qp *qp, u8 init, + struct ib_udata *udata) +{ + struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_qp *mqp = to_mqp(qp); + struct mlx5_ib_qp_base *qp_base; + unsigned int tx_affinity; + + if (!(dev->lag_active && init && qp_supports_affinity(qp))) + return 0; + + tx_affinity = get_tx_affinity_rr(dev, udata); + + qp_base = &mqp->trans_qp.base; + if (ucontext) mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x ucontext %p\n", - tx_port_affinity, qp_base->mqp.qpn, ucontext); - } else { - tx_port_affinity = - (unsigned int)atomic_add_return( - 1, &dev->port[port_num].roce.tx_port_affinity) % - MLX5_MAX_PORTS + - 1; + tx_affinity, qp_base->mqp.qpn, ucontext); + else mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x\n", - tx_port_affinity, qp_base->mqp.qpn); - } - - return tx_port_affinity; + tx_affinity, qp_base->mqp.qpn); + return tx_affinity; } static int __mlx5_ib_qp_set_counter(struct ib_qp *qp, @@ -3718,22 +3746,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } } - if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) { - if ((ibqp->qp_type == IB_QPT_RC) || - (ibqp->qp_type == IB_QPT_UD && - !(qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)) || - (ibqp->qp_type == IB_QPT_UC) || - (ibqp->qp_type == IB_QPT_RAW_PACKET) || - (ibqp->qp_type == IB_QPT_XRC_INI) || - (ibqp->qp_type == IB_QPT_XRC_TGT)) { - if (dev->lag_active) { - u8 p = mlx5_core_native_port_num(dev->mdev) - 1; - tx_affinity = get_tx_affinity(dev, pd, base, p, - udata); - context->flags |= cpu_to_be32(tx_affinity << 24); - } - } - } + tx_affinity = get_tx_affinity(ibqp, + cur_state == IB_QPS_RESET && + new_state == IB_QPS_INIT, udata); + context->flags |= cpu_to_be32(tx_affinity << 24); if (is_sqp(ibqp->qp_type)) { context->mtu_msgmax = (IB_MTU_256 << 5) | 8; From cfc1a89e449c02207952c72a4c0394691fdedf43 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:46 +0300 Subject: [PATCH 200/562] RDMA/mlx5: Set lag tx affinity according to slave The patch sets the lag tx affinity of the data QPs and the GSI QPs according to the LAG xmit slave. For GSI QPs, in case the link layer is Ethenet (RoCE) we create two GSI QPs, one for each physical port. When the driver selects the GSI QP, it will consider the port affinity result. For connected QPs, the driver sets the affinity of the xmit slave. The above, ensures that RC QP and it's corresponding GSI QP will transmit from the same physical port. Link: https://lore.kernel.org/r/20200430192146.12863-17-maorg@mellanox.com Signed-off-by: Maor Gottlieb Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/ah.c | 9 +++-- drivers/infiniband/hw/mlx5/gsi.c | 33 ++++++++++++++---- drivers/infiniband/hw/mlx5/main.c | 2 ++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/qp.c | 52 +++++++++++++++++++--------- include/linux/mlx5/mlx5_ifc.h | 4 ++- include/linux/mlx5/qp.h | 2 ++ 7 files changed, 76 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 9b59348d51b5..cc858f658567 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -33,8 +33,9 @@ #include "mlx5_ib.h" static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, - struct rdma_ah_attr *ah_attr) + struct rdma_ah_init_attr *init_attr) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; enum ib_gid_type gid_type; if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { @@ -51,6 +52,10 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4); if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { + if (init_attr->xmit_slave) + ah->xmit_port = + mlx5_lag_get_slave_port(dev->mdev, + init_attr->xmit_slave); gid_type = ah_attr->grh.sgid_attr->gid_type; memcpy(ah->av.rmac, ah_attr->roce.dmac, @@ -98,7 +103,7 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, return err; } - create_ib_ah(dev, ah, ah_attr); + create_ib_ah(dev, ah, init_attr); return 0; } diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 1afbf03d1a98..40d418153891 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -119,10 +119,17 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, struct mlx5_ib_gsi_qp *gsi; struct ib_qp_init_attr hw_init_attr = *init_attr; const u8 port_num = init_attr->port_num; - const int num_pkeys = pd->device->attrs.max_pkeys; - const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0; + int num_qps = 0; int ret; + if (mlx5_ib_deth_sqpn_cap(dev)) { + if (MLX5_CAP_GEN(dev->mdev, + port_type) == MLX5_CAP_PORT_TYPE_IB) + num_qps = pd->device->attrs.max_pkeys; + else if (dev->lag_active) + num_qps = MLX5_MAX_PORTS; + } + gsi = kzalloc(sizeof(*gsi), GFP_KERNEL); if (!gsi) return ERR_PTR(-ENOMEM); @@ -261,7 +268,7 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi) } static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp, - u16 qp_index) + u16 pkey_index) { struct mlx5_ib_dev *dev = to_mdev(qp->device); struct ib_qp_attr attr; @@ -270,7 +277,7 @@ static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp, mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT; attr.qp_state = IB_QPS_INIT; - attr.pkey_index = qp_index; + attr.pkey_index = pkey_index; attr.qkey = IB_QP1_QKEY; attr.port_num = gsi->port_num; ret = ib_modify_qp(qp, &attr, mask); @@ -304,12 +311,17 @@ static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index) { struct ib_device *device = gsi->rx_qp->device; struct mlx5_ib_dev *dev = to_mdev(device); + int pkey_index = qp_index; + struct mlx5_ib_qp *mqp; struct ib_qp *qp; unsigned long flags; u16 pkey; int ret; - ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey); + if (MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_IB) + pkey_index = 0; + + ret = ib_query_pkey(device, gsi->port_num, pkey_index, &pkey); if (ret) { mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n", gsi->port_num, qp_index); @@ -338,7 +350,10 @@ static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index) return; } - ret = modify_to_rts(gsi, qp, qp_index); + mqp = to_mqp(qp); + if (dev->lag_active) + mqp->gsi_lag_port = qp_index + 1; + ret = modify_to_rts(gsi, qp, pkey_index); if (ret) goto err_destroy_qp; @@ -457,11 +472,15 @@ static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi, static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr) { struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device); + struct mlx5_ib_ah *ah = to_mah(wr->ah); int qp_index = wr->pkey_index; - if (!mlx5_ib_deth_sqpn_cap(dev)) + if (!gsi->num_qps) return gsi->rx_qp; + if (dev->lag_active && ah->xmit_port) + qp_index = ah->xmit_port - 1; + if (qp_index >= gsi->num_qps) return NULL; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 80ae8f04bfd5..e7fb290c9d8d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include #include "mlx5_ib.h" @@ -6567,6 +6568,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->ib_dev.phys_port_cnt = dev->num_ports; dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev); dev->ib_dev.dev.parent = mdev->device; + dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES; mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 7dffc87601eb..f250753319d0 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -461,6 +461,7 @@ struct mlx5_ib_qp { * but not take effective */ u32 counter_pending; + u16 gsi_lag_port; }; struct mlx5_ib_cq_buf { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 14bfdfc8ab96..810bbd52daec 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3218,10 +3218,12 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PKEY_INDEX | - MLX5_QP_OPTPAR_PRI_PORT, + MLX5_QP_OPTPAR_PRI_PORT | + MLX5_QP_OPTPAR_LAG_TX_AFF, [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PKEY_INDEX | - MLX5_QP_OPTPAR_PRI_PORT, + MLX5_QP_OPTPAR_PRI_PORT | + MLX5_QP_OPTPAR_LAG_TX_AFF, [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX | MLX5_QP_OPTPAR_Q_KEY | MLX5_QP_OPTPAR_PRI_PORT, @@ -3229,17 +3231,20 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PKEY_INDEX | - MLX5_QP_OPTPAR_PRI_PORT, + MLX5_QP_OPTPAR_PRI_PORT | + MLX5_QP_OPTPAR_LAG_TX_AFF, }, [MLX5_QP_STATE_RTR] = { [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RWE | - MLX5_QP_OPTPAR_PKEY_INDEX, + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_LAG_TX_AFF, [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | MLX5_QP_OPTPAR_RWE | - MLX5_QP_OPTPAR_PKEY_INDEX, + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_LAG_TX_AFF, [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX | MLX5_QP_OPTPAR_Q_KEY, [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX | @@ -3248,7 +3253,8 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RWE | - MLX5_QP_OPTPAR_PKEY_INDEX, + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_LAG_TX_AFF, }, }, [MLX5_QP_STATE_RTR] = { @@ -3601,11 +3607,8 @@ static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev, static bool qp_supports_affinity(struct ib_qp *qp) { - struct mlx5_ib_qp *mqp = to_mqp(qp); - if ((qp->qp_type == IB_QPT_RC) || - (qp->qp_type == IB_QPT_UD && - !(mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)) || + (qp->qp_type == IB_QPT_UD) || (qp->qp_type == IB_QPT_UC) || (qp->qp_type == IB_QPT_RAW_PACKET) || (qp->qp_type == IB_QPT_XRC_INI) || @@ -3614,7 +3617,9 @@ static bool qp_supports_affinity(struct ib_qp *qp) return false; } -static unsigned int get_tx_affinity(struct ib_qp *qp, u8 init, +static unsigned int get_tx_affinity(struct ib_qp *qp, + const struct ib_qp_attr *attr, + int attr_mask, u8 init, struct ib_udata *udata) { struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( @@ -3624,10 +3629,18 @@ static unsigned int get_tx_affinity(struct ib_qp *qp, u8 init, struct mlx5_ib_qp_base *qp_base; unsigned int tx_affinity; - if (!(dev->lag_active && init && qp_supports_affinity(qp))) + if (!(dev->lag_active && qp_supports_affinity(qp))) return 0; - tx_affinity = get_tx_affinity_rr(dev, udata); + if (mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1) + tx_affinity = mqp->gsi_lag_port; + else if (init) + tx_affinity = get_tx_affinity_rr(dev, udata); + else if ((attr_mask & IB_QP_AV) && attr->xmit_slave) + tx_affinity = + mlx5_lag_get_slave_port(dev->mdev, attr->xmit_slave); + else + return 0; qp_base = &mqp->trans_qp.base; if (ucontext) @@ -3712,7 +3725,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_qp_context *context; struct mlx5_ib_pd *pd; enum mlx5_qp_state mlx5_cur, mlx5_new; - enum mlx5_qp_optpar optpar; + enum mlx5_qp_optpar optpar = 0; u32 set_id = 0; int mlx5_st; int err; @@ -3746,10 +3759,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } } - tx_affinity = get_tx_affinity(ibqp, + tx_affinity = get_tx_affinity(ibqp, attr, attr_mask, cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT, udata); - context->flags |= cpu_to_be32(tx_affinity << 24); + if (tx_affinity) { + context->flags |= cpu_to_be32(tx_affinity << 24); + if (new_state == IB_QPS_RTR && + MLX5_CAP_GEN(dev->mdev, init2_lag_tx_port_affinity)) + optpar |= MLX5_QP_OPTPAR_LAG_TX_AFF; + } if (is_sqp(ibqp->qp_type)) { context->mtu_msgmax = (IB_MTU_256 << 5) | 8; @@ -3886,7 +3904,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } op = optab[mlx5_cur][mlx5_new]; - optpar = ib_mask_to_mlx5_opt(attr_mask); + optpar |= ib_mask_to_mlx5_opt(attr_mask); optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fb243848132d..c1ba89198335 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1321,7 +1321,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 stat_rate_support[0x10]; u8 reserved_at_1f0[0x1]; u8 pci_sync_for_fw_update_event[0x1]; - u8 reserved_at_1f2[0xa]; + u8 reserved_at_1f2[0x6]; + u8 init2_lag_tx_port_affinity[0x1]; + u8 reserved_at_1fa[0x3]; u8 cqe_version[0x4]; u8 compact_address_vector[0x1]; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index f23eb18526fe..b9facdb9b9bd 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -66,6 +66,7 @@ enum mlx5_qp_optpar { MLX5_QP_OPTPAR_RETRY_COUNT = 1 << 12, MLX5_QP_OPTPAR_RNR_RETRY = 1 << 13, MLX5_QP_OPTPAR_ACK_TIMEOUT = 1 << 14, + MLX5_QP_OPTPAR_LAG_TX_AFF = 1 << 15, MLX5_QP_OPTPAR_PRI_PORT = 1 << 16, MLX5_QP_OPTPAR_SRQN = 1 << 18, MLX5_QP_OPTPAR_CQN_RCV = 1 << 19, @@ -321,6 +322,7 @@ struct mlx5_av { struct mlx5_ib_ah { struct ib_ah ibah; struct mlx5_av av; + u8 xmit_port; }; static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah) From b2ea69b3b4430642c98eea2c2d08419f2f02124d Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 20 Apr 2020 09:22:11 +0300 Subject: [PATCH 201/562] RDMA/efa: Report create CQ error counter Create CQ errors are already being counted, report them along all other counters. Link: https://lore.kernel.org/r/20200420062213.44577-2-galpress@amazon.com Reviewed-by: Firas JahJah Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 454b01b21e6a..43cc93b19569 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -41,6 +41,7 @@ struct efa_user_mmap_entry { op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \ op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \ op(EFA_CREATE_QP_ERR, "create_qp_err") \ + op(EFA_CREATE_CQ_ERR, "create_cq_err") \ op(EFA_REG_MR_ERR, "reg_mr_err") \ op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \ op(EFA_CREATE_AH_ERR, "create_ah_err") @@ -1753,6 +1754,7 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd); stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->sw_stats.alloc_pd_err); stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->sw_stats.create_qp_err); + stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->sw_stats.create_cq_err); stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->sw_stats.reg_mr_err); stats->value[EFA_ALLOC_UCONTEXT_ERR] = atomic64_read(&s->sw_stats.alloc_ucontext_err); stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->sw_stats.create_ah_err); From eca5757f804f046dfaab4e9d3ea39af1f2523990 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 20 Apr 2020 09:22:12 +0300 Subject: [PATCH 202/562] RDMA/efa: Count mmap failures Add a new stat that counts mmap failures, which might help when debugging different issues. Link: https://lore.kernel.org/r/20200420062213.44577-3-galpress@amazon.com Reviewed-by: Firas JahJah Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa.h | 3 ++- drivers/infiniband/hw/efa/efa_verbs.c | 9 +++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index 45d519edb4c3..1889dd172a25 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_H_ @@ -40,6 +40,7 @@ struct efa_sw_stats { atomic64_t reg_mr_err; atomic64_t alloc_ucontext_err; atomic64_t create_ah_err; + atomic64_t mmap_err; }; /* Don't use anything other than atomic64 */ diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 43cc93b19569..1f8162b2067d 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -44,7 +44,8 @@ struct efa_user_mmap_entry { op(EFA_CREATE_CQ_ERR, "create_cq_err") \ op(EFA_REG_MR_ERR, "reg_mr_err") \ op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \ - op(EFA_CREATE_AH_ERR, "create_ah_err") + op(EFA_CREATE_AH_ERR, "create_ah_err") \ + op(EFA_MMAP_ERR, "mmap_err") #define EFA_STATS_ENUM(ename, name) ename, #define EFA_STATS_STR(ename, name) [ename] = name, @@ -1569,6 +1570,7 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, ibdev_dbg(&dev->ibdev, "pgoff[%#lx] does not have valid entry\n", vma->vm_pgoff); + atomic64_inc(&dev->stats.sw_stats.mmap_err); return -EINVAL; } entry = to_emmap(rdma_entry); @@ -1604,12 +1606,14 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, err = -EINVAL; } - if (err) + if (err) { ibdev_dbg( &dev->ibdev, "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", entry->address, rdma_entry->npages * PAGE_SIZE, entry->mmap_flag, err); + atomic64_inc(&dev->stats.sw_stats.mmap_err); + } rdma_user_mmap_entry_put(rdma_entry); return err; @@ -1758,6 +1762,7 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->sw_stats.reg_mr_err); stats->value[EFA_ALLOC_UCONTEXT_ERR] = atomic64_read(&s->sw_stats.alloc_ucontext_err); stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->sw_stats.create_ah_err); + stats->value[EFA_MMAP_ERR] = atomic64_read(&s->sw_stats.mmap_err); return ARRAY_SIZE(efa_stats_names); } From f86e34374a05635332229d1928796d04017ddf16 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 20 Apr 2020 09:22:13 +0300 Subject: [PATCH 203/562] RDMA/efa: Count admin commands errors Add a new stat that counts admin commands failures, which might help when debugging different issues. Link: https://lore.kernel.org/r/20200420062213.44577-4-galpress@amazon.com Reviewed-by: Daniel Kranzdorf Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_com.c | 5 ++++- drivers/infiniband/hw/efa/efa_com.h | 3 ++- drivers/infiniband/hw/efa/efa_verbs.c | 2 ++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 7fce69f5568f..336bc2c57bb1 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -631,17 +631,20 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx)); up(&aq->avail_cmds); + atomic64_inc(&aq->stats.cmd_err); return PTR_ERR(comp_ctx); } err = efa_com_wait_and_process_admin_cq(comp_ctx, aq); - if (err) + if (err) { ibdev_err_ratelimited( aq->efa_dev, "Failed to process command %s (opcode %u) comp_status %d err %d\n", efa_com_cmd_str(cmd->aq_common_descriptor.opcode), cmd->aq_common_descriptor.opcode, comp_ctx->comp_status, err); + atomic64_inc(&aq->stats.cmd_err); + } up(&aq->avail_cmds); diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h index c67dd8109d1c..5e4c88877ddb 100644 --- a/drivers/infiniband/hw/efa/efa_com.h +++ b/drivers/infiniband/hw/efa/efa_com.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_COM_H_ @@ -47,6 +47,7 @@ struct efa_com_admin_sq { struct efa_com_stats_admin { atomic64_t submitted_cmd; atomic64_t completed_cmd; + atomic64_t cmd_err; atomic64_t no_completion; }; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 1f8162b2067d..08313f7c73bc 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -37,6 +37,7 @@ struct efa_user_mmap_entry { op(EFA_RX_DROPS, "rx_drops") \ op(EFA_SUBMITTED_CMDS, "submitted_cmds") \ op(EFA_COMPLETED_CMDS, "completed_cmds") \ + op(EFA_CMDS_ERR, "cmds_err") \ op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \ op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \ op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \ @@ -1752,6 +1753,7 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, as = &dev->edev.aq.stats; stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd); stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd); + stats->value[EFA_CMDS_ERR] = atomic64_read(&as->cmd_err); stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion); s = &dev->stats; From 0c4bc91d664953780990b0d1d8d1a65f9256474d Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Tue, 21 Apr 2020 11:14:17 -0400 Subject: [PATCH 204/562] scsi: scsi_debug: Randomize command completion time Add a new command line option (e.g. random=1) and sysfs attribute that causes subsequent command completion times to be between the current command delay setting and 0. A uniformly distributed 32 bit, kernel provided integer is used for this purpose. Since the existing 'delay' whose units are jiffies (typically milliseconds) and 'ndelay' (units: nanoseconds) options (and sysfs attributes) span a range greater than 32 bits, some scaling is required. The purpose of this patch is to widen the range of testing cases that are visited in long running tests. Put simply: rarely struct race conditions are more likely to be found when this facility is used. The default is the previous case in which all command completions were roughly equal to (if not, slightly longer) than the value given by the 'delay' or 'ndelay' settings (or their defaults). This option's default is equivalent to setting 'random=0' . [mkp: use kstrtobool()] Link: https://lore.kernel.org/r/20200421151424.32668-2-dgilbert@interlog.com Reviewed-by: Hannes Reinecke Signed-off-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 42 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 4c6c448dc2df..4509087347d3 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -39,6 +39,7 @@ #include #include #include +#include #include @@ -126,6 +127,7 @@ static const char *sdebug_version_date = "20190125"; #define DEF_PHYSBLK_EXP 0 #define DEF_OPT_XFERLEN_EXP 0 #define DEF_PTYPE TYPE_DISK +#define DEF_RANDOM false #define DEF_REMOVABLE false #define DEF_SCSI_LEVEL 7 /* INQUIRY, byte2 [6->SPC-4; 7->SPC-5] */ #define DEF_SECTOR_SIZE 512 @@ -656,6 +658,7 @@ static unsigned int sdebug_unmap_max_blocks = DEF_UNMAP_MAX_BLOCKS; static unsigned int sdebug_unmap_max_desc = DEF_UNMAP_MAX_DESC; static unsigned int sdebug_write_same_length = DEF_WRITESAME_LENGTH; static int sdebug_uuid_ctl = DEF_UUID_CTL; +static bool sdebug_random = DEF_RANDOM; static bool sdebug_removable = DEF_REMOVABLE; static bool sdebug_clustering; static bool sdebug_host_lock = DEF_HOST_LOCK; @@ -4355,9 +4358,21 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, ktime_t kt; if (delta_jiff > 0) { - kt = ns_to_ktime((u64)delta_jiff * (NSEC_PER_SEC / HZ)); - } else - kt = ndelay; + u64 ns = jiffies_to_nsecs(delta_jiff); + + if (sdebug_random && ns < U32_MAX) { + ns = prandom_u32_max((u32)ns); + } else if (sdebug_random) { + ns >>= 12; /* scale to 4 usec precision */ + if (ns < U32_MAX) /* over 4 hours max */ + ns = prandom_u32_max((u32)ns); + ns <<= 12; + } + kt = ns_to_ktime(ns); + } else { /* ndelay has a 4.2 second max */ + kt = sdebug_random ? prandom_u32_max((u32)ndelay) : + (u32)ndelay; + } if (!sd_dp->init_hrt) { sd_dp->init_hrt = true; sqcp->sd_dp = sd_dp; @@ -4452,6 +4467,7 @@ module_param_named(opts, sdebug_opts, int, S_IRUGO | S_IWUSR); module_param_named(physblk_exp, sdebug_physblk_exp, int, S_IRUGO); module_param_named(opt_xferlen_exp, sdebug_opt_xferlen_exp, int, S_IRUGO); module_param_named(ptype, sdebug_ptype, int, S_IRUGO | S_IWUSR); +module_param_named(random, sdebug_random, bool, S_IRUGO | S_IWUSR); module_param_named(removable, sdebug_removable, bool, S_IRUGO | S_IWUSR); module_param_named(scsi_level, sdebug_scsi_level, int, S_IRUGO); module_param_named(sector_size, sdebug_sector_size, int, S_IRUGO); @@ -4512,6 +4528,7 @@ MODULE_PARM_DESC(opts, "1->noise, 2->medium_err, 4->timeout, 8->recovered_err... MODULE_PARM_DESC(physblk_exp, "physical block exponent (def=0)"); MODULE_PARM_DESC(opt_xferlen_exp, "optimal transfer length granularity exponent (def=physblk_exp)"); MODULE_PARM_DESC(ptype, "SCSI peripheral type(def=0[disk])"); +MODULE_PARM_DESC(random, "If set, uniformly randomize command duration between 0 and delay_in_ns"); MODULE_PARM_DESC(removable, "claim to have removable media (def=0)"); MODULE_PARM_DESC(scsi_level, "SCSI level to simulate(def=7[SPC-5])"); MODULE_PARM_DESC(sector_size, "logical block size in bytes (def=512)"); @@ -5102,6 +5119,24 @@ static ssize_t map_show(struct device_driver *ddp, char *buf) } static DRIVER_ATTR_RO(map); +static ssize_t random_show(struct device_driver *ddp, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", sdebug_random); +} + +static ssize_t random_store(struct device_driver *ddp, const char *buf, + size_t count) +{ + bool v; + + if (kstrtobool(buf, &v)) + return -EINVAL; + + sdebug_random = v; + return count; +} +static DRIVER_ATTR_RW(random); + static ssize_t removable_show(struct device_driver *ddp, char *buf) { return scnprintf(buf, PAGE_SIZE, "%d\n", sdebug_removable ? 1 : 0); @@ -5212,6 +5247,7 @@ static struct attribute *sdebug_drv_attrs[] = { &driver_attr_guard.attr, &driver_attr_ato.attr, &driver_attr_map.attr, + &driver_attr_random.attr, &driver_attr_removable.attr, &driver_attr_host_lock.attr, &driver_attr_ndelay.attr, From 87c715dcde633f4cc4690a24a240e838181e6a9d Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Tue, 21 Apr 2020 11:14:18 -0400 Subject: [PATCH 205/562] scsi: scsi_debug: Add per_host_store option The scsi_debug driver has always been restricted to using one ramdisk image (or none) for its storage. This means that thousands of scsi_debug devices can be created without exhausting the host machine's RAM. The downside is that all scsi_debug devices share the same ramdisk image. This option changes the way a following write to the add_host parameter (or an add_host in the module/driver invocation) operates. For each new host that is created while per_host_store is true, a new store (of dev-size_mb MiB) is created and associated with all the LUs that belong to that new host. The user (who will need root permissions) needs to take care not to exhaust all the machine's available RAM. One reason for doing this is to check that (partial) disk to disk copies based on scsi_debug devices have actually copied accurately. To test this the add_host= parameter where is 2 or greater can be used when the scsi_debug module is loaded. Let us assume that /dev/sdb and /dev/sg1 are the same scsi_debug device, while /dev/sdc and /dev/sg2 are the same scsi_debug device. With per_host_store=1 add_host=2 they will have different ramdisk images. Then the following pseudocode could be executed to check if the sgh_dd copy worked: dd if=/dev/urandom of=/dev/sdb sgh_dd if=/dev/sg1 of=/dev/sg2 [plus option(s) to test] cmp /dev/sdb /dev/sdc If the cmp fails then the copy has failed (or some other mechanism wrote to /dev/sdb or /dev/sdc in the interim). [mkp: use kstrtobool()] Link: https://lore.kernel.org/r/20200421151424.32668-3-dgilbert@interlog.com Signed-off-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 706 ++++++++++++++++++++++++++------------ 1 file changed, 493 insertions(+), 213 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 4509087347d3..0eea828d6292 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -40,6 +40,7 @@ #include #include #include +#include #include @@ -109,6 +110,7 @@ static const char *sdebug_version_date = "20190125"; #define DEF_DEV_SIZE_MB 8 #define DEF_DIF 0 #define DEF_DIX 0 +#define DEF_PER_HOST_STORE false #define DEF_D_SENSE 0 #define DEF_EVERY_NTH 0 #define DEF_FAKE_RW 0 @@ -245,6 +247,8 @@ static const char *sdebug_version_date = "20190125"; #define SDEBUG_MAX_CMD_LEN 32 +#define SDEB_XA_NOT_IN_USE XA_MARK_1 + struct sdebug_dev_info { struct list_head dev_list; @@ -261,11 +265,20 @@ struct sdebug_dev_info { struct sdebug_host_info { struct list_head host_list; + int si_idx; /* sdeb_store_info (per host) xarray index */ struct Scsi_Host *shost; struct device dev; struct list_head dev_info_list; }; +/* There is an xarray of pointers to this struct's objects, one per host */ +struct sdeb_store_info { + rwlock_t macc_lck; /* for atomic media access on this store */ + u8 *storep; /* user data storage (ram) */ + struct t10_pi_tuple *dif_storep; /* protection info */ + void *map_storep; /* provisioning map */ +}; + #define to_sdebug_host(d) \ container_of(d, struct sdebug_host_info, dev) @@ -432,6 +445,13 @@ static int resp_comp_write(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_write_buffer(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_sync_cache(struct scsi_cmnd *, struct sdebug_dev_info *); +static int sdebug_do_add_host(bool mk_new_store); +static int sdebug_add_host_helper(int per_host_idx); +static void sdebug_do_remove_host(bool the_end); +static int sdebug_add_store(void); +static void sdebug_erase_store(int idx, struct sdeb_store_info *sip); +static void sdebug_erase_all_stores(bool apart_from_first); + /* * The following are overflow arrays for cdbs that "hit" the same index in * the opcode_info_arr array. The most time sensitive (or commonly used) cdb @@ -617,7 +637,8 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = { {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, }; -static int sdebug_add_host = DEF_NUM_HOST; +static int sdebug_num_hosts; +static int sdebug_add_host = DEF_NUM_HOST; /* in sysfs this is relative */ static int sdebug_ato = DEF_ATO; static int sdebug_cdb_len = DEF_CDB_LEN; static int sdebug_jdelay = DEF_JDELAY; /* if > 0 then unit is jiffies */ @@ -659,6 +680,7 @@ static unsigned int sdebug_unmap_max_desc = DEF_UNMAP_MAX_DESC; static unsigned int sdebug_write_same_length = DEF_WRITESAME_LENGTH; static int sdebug_uuid_ctl = DEF_UUID_CTL; static bool sdebug_random = DEF_RANDOM; +static bool sdebug_per_host_store = DEF_PER_HOST_STORE; static bool sdebug_removable = DEF_REMOVABLE; static bool sdebug_clustering; static bool sdebug_host_lock = DEF_HOST_LOCK; @@ -682,9 +704,11 @@ static int sdebug_sectors_per; /* sectors per cylinder */ static LIST_HEAD(sdebug_host_list); static DEFINE_SPINLOCK(sdebug_host_list_lock); -static unsigned char *fake_storep; /* ramdisk storage */ -static struct t10_pi_tuple *dif_storep; /* protection info */ -static void *map_storep; /* provisioning map */ +static struct xarray per_store_arr; +static struct xarray *per_store_ap = &per_store_arr; +static int sdeb_first_idx = -1; /* invalid index ==> none created */ +static int sdeb_most_recent_idx = -1; +static DEFINE_RWLOCK(sdeb_fake_rw_lck); /* need a RW lock when fake_rw=1 */ static unsigned long map_size; static int num_aborts; @@ -700,6 +724,9 @@ static int submit_queues = DEF_SUBMIT_QUEUES; /* > 1 for multi-queue (mq) */ static struct sdebug_queue *sdebug_q_arr; /* ptr to array of submit queues */ static DEFINE_RWLOCK(atomic_rw); +static DEFINE_RWLOCK(atomic_rw2); + +static rwlock_t *ramdisk_lck_a[2]; static char sdebug_proc_name[] = MY_NAME; static const char *my_name = MY_NAME; @@ -731,18 +758,25 @@ static inline bool scsi_debug_lbp(void) (sdebug_lbpu || sdebug_lbpws || sdebug_lbpws10); } -static void *lba2fake_store(unsigned long long lba) +static void *lba2fake_store(struct sdeb_store_info *sip, + unsigned long long lba) { - lba = do_div(lba, sdebug_store_sectors); + struct sdeb_store_info *lsip = sip; - return fake_storep + lba * sdebug_sector_size; + lba = do_div(lba, sdebug_store_sectors); + if (!sip || !sip->storep) { + WARN_ON_ONCE(true); + lsip = xa_load(per_store_ap, 0); /* should never be NULL */ + } + return lsip->storep + lba * sdebug_sector_size; } -static struct t10_pi_tuple *dif_store(sector_t sector) +static struct t10_pi_tuple *dif_store(struct sdeb_store_info *sip, + sector_t sector) { sector = sector_div(sector, sdebug_store_sectors); - return dif_storep + sector; + return sip->dif_storep + sector; } static void sdebug_max_tgts_luns(void) @@ -1044,7 +1078,7 @@ static int p_fill_from_dev_buffer(struct scsi_cmnd *scp, const void *arr, __func__, off_dst, scsi_bufflen(scp), act_len, scsi_get_resid(scp)); n = scsi_bufflen(scp) - (off_dst + act_len); - scsi_set_resid(scp, min(scsi_get_resid(scp), n)); + scsi_set_resid(scp, min_t(int, scsi_get_resid(scp), n)); return 0; } @@ -1536,7 +1570,7 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) } put_unaligned_be16(0x2100, arr + n); /* SPL-4 no version claimed */ ret = fill_from_dev_buffer(scp, arr, - min(alloc_len, SDEBUG_LONG_INQ_SZ)); + min_t(int, alloc_len, SDEBUG_LONG_INQ_SZ)); kfree(arr); return ret; } @@ -1691,7 +1725,7 @@ static int resp_readcap16(struct scsi_cmnd *scp, } return fill_from_dev_buffer(scp, arr, - min(alloc_len, SDEBUG_READCAP16_ARR_SZ)); + min_t(int, alloc_len, SDEBUG_READCAP16_ARR_SZ)); } #define SDEBUG_MAX_TGTPGS_ARR_SZ 1412 @@ -1765,9 +1799,9 @@ static int resp_report_tgtpgs(struct scsi_cmnd *scp, * - The constructed command length * - The maximum array size */ - rlen = min(alen,n); + rlen = min_t(int, alen, n); ret = fill_from_dev_buffer(scp, arr, - min(rlen, SDEBUG_MAX_TGTPGS_ARR_SZ)); + min_t(int, rlen, SDEBUG_MAX_TGTPGS_ARR_SZ)); kfree(arr); return ret; } @@ -2269,7 +2303,7 @@ static int resp_mode_sense(struct scsi_cmnd *scp, arr[0] = offset - 1; else put_unaligned_be16((offset - 2), arr + 0); - return fill_from_dev_buffer(scp, arr, min(alloc_len, offset)); + return fill_from_dev_buffer(scp, arr, min_t(int, alloc_len, offset)); } #define SDEBUG_MAX_MSELECT_SZ 512 @@ -2454,9 +2488,9 @@ static int resp_log_sense(struct scsi_cmnd *scp, mk_sense_invalid_fld(scp, SDEB_IN_CDB, 3, -1); return check_condition_result; } - len = min(get_unaligned_be16(arr + 2) + 4, alloc_len); + len = min_t(int, get_unaligned_be16(arr + 2) + 4, alloc_len); return fill_from_dev_buffer(scp, arr, - min(len, SDEBUG_MAX_INQ_ARR_SZ)); + min_t(int, len, SDEBUG_MAX_INQ_ARR_SZ)); } static inline int check_device_access_params(struct scsi_cmnd *scp, @@ -2479,14 +2513,21 @@ static inline int check_device_access_params(struct scsi_cmnd *scp, return 0; } +static inline struct sdeb_store_info *devip2sip(struct sdebug_dev_info *devip) +{ + return sdebug_fake_rw ? + NULL : xa_load(per_store_ap, devip->sdbg_host->si_idx); +} + /* Returns number of bytes copied or -1 if error. */ -static int do_device_access(struct scsi_cmnd *scmd, u32 sg_skip, u64 lba, - u32 num, bool do_write) +static int do_device_access(struct sdeb_store_info *sip, struct scsi_cmnd *scp, + u32 sg_skip, u64 lba, u32 num, bool do_write) { int ret; u64 block, rest = 0; - struct scsi_data_buffer *sdb = &scmd->sdb; enum dma_data_direction dir; + struct scsi_data_buffer *sdb = &scp->sdb; + u8 *fsp; if (do_write) { dir = DMA_TO_DEVICE; @@ -2495,24 +2536,25 @@ static int do_device_access(struct scsi_cmnd *scmd, u32 sg_skip, u64 lba, dir = DMA_FROM_DEVICE; } - if (!sdb->length) + if (!sdb->length || !sip) return 0; - if (scmd->sc_data_direction != dir) + if (scp->sc_data_direction != dir) return -1; + fsp = sip->storep; block = do_div(lba, sdebug_store_sectors); if (block + num > sdebug_store_sectors) rest = block + num - sdebug_store_sectors; ret = sg_copy_buffer(sdb->table.sgl, sdb->table.nents, - fake_storep + (block * sdebug_sector_size), + fsp + (block * sdebug_sector_size), (num - rest) * sdebug_sector_size, sg_skip, do_write); if (ret != (num - rest) * sdebug_sector_size) return ret; if (rest) { ret += sg_copy_buffer(sdb->table.sgl, sdb->table.nents, - fake_storep, rest * sdebug_sector_size, + fsp, rest * sdebug_sector_size, sg_skip + ((num - rest) * sdebug_sector_size), do_write); } @@ -2520,34 +2562,47 @@ static int do_device_access(struct scsi_cmnd *scmd, u32 sg_skip, u64 lba, return ret; } -/* If lba2fake_store(lba,num) compares equal to arr(num), then copy top half of - * arr into lba2fake_store(lba,num) and return true. If comparison fails then +/* Returns number of bytes copied or -1 if error. */ +static int do_dout_fetch(struct scsi_cmnd *scp, u32 num, u8 *doutp) +{ + struct scsi_data_buffer *sdb = &scp->sdb; + + if (!sdb->length) + return 0; + if (scp->sc_data_direction != DMA_TO_DEVICE) + return -1; + return sg_copy_buffer(sdb->table.sgl, sdb->table.nents, doutp, + num * sdebug_sector_size, 0, true); +} + +/* If sip->storep+lba compares equal to arr(num), then copy top half of + * arr into sip->storep+lba and return true. If comparison fails then * return false. */ -static bool comp_write_worker(u64 lba, u32 num, const u8 *arr) +static bool comp_write_worker(struct sdeb_store_info *sip, u64 lba, u32 num, + const u8 *arr) { bool res; u64 block, rest = 0; u32 store_blks = sdebug_store_sectors; u32 lb_size = sdebug_sector_size; + u8 *fsp = sip->storep; block = do_div(lba, store_blks); if (block + num > store_blks) rest = block + num - store_blks; - res = !memcmp(fake_storep + (block * lb_size), arr, - (num - rest) * lb_size); + res = !memcmp(fsp + (block * lb_size), arr, (num - rest) * lb_size); if (!res) return res; if (rest) - res = memcmp(fake_storep, arr + ((num - rest) * lb_size), + res = memcmp(fsp, arr + ((num - rest) * lb_size), rest * lb_size); if (!res) return res; arr += num * lb_size; - memcpy(fake_storep + (block * lb_size), arr, (num - rest) * lb_size); + memcpy(fsp + (block * lb_size), arr, (num - rest) * lb_size); if (rest) - memcpy(fake_storep, arr + ((num - rest) * lb_size), - rest * lb_size); + memcpy(fsp, arr + ((num - rest) * lb_size), rest * lb_size); return res; } @@ -2590,24 +2645,27 @@ static int dif_verify(struct t10_pi_tuple *sdt, const void *data, return 0; } -static void dif_copy_prot(struct scsi_cmnd *SCpnt, sector_t sector, +static void dif_copy_prot(struct scsi_cmnd *scp, sector_t sector, unsigned int sectors, bool read) { size_t resid; void *paddr; + struct sdeb_store_info *sip = devip2sip((struct sdebug_dev_info *) + scp->device->hostdata); + struct t10_pi_tuple *dif_storep = sip->dif_storep; const void *dif_store_end = dif_storep + sdebug_store_sectors; struct sg_mapping_iter miter; /* Bytes of protection data to copy into sgl */ resid = sectors * sizeof(*dif_storep); - sg_miter_start(&miter, scsi_prot_sglist(SCpnt), - scsi_prot_sg_count(SCpnt), SG_MITER_ATOMIC | - (read ? SG_MITER_TO_SG : SG_MITER_FROM_SG)); + sg_miter_start(&miter, scsi_prot_sglist(scp), + scsi_prot_sg_count(scp), SG_MITER_ATOMIC | + (read ? SG_MITER_TO_SG : SG_MITER_FROM_SG)); while (sg_miter_next(&miter) && resid > 0) { - size_t len = min(miter.length, resid); - void *start = dif_store(sector); + size_t len = min_t(size_t, miter.length, resid); + void *start = dif_store(sip, sector); size_t rest = 0; if (dif_store_end < start + len) @@ -2633,30 +2691,33 @@ static void dif_copy_prot(struct scsi_cmnd *SCpnt, sector_t sector, sg_miter_stop(&miter); } -static int prot_verify_read(struct scsi_cmnd *SCpnt, sector_t start_sec, +static int prot_verify_read(struct scsi_cmnd *scp, sector_t start_sec, unsigned int sectors, u32 ei_lba) { unsigned int i; - struct t10_pi_tuple *sdt; sector_t sector; + struct sdeb_store_info *sip = devip2sip((struct sdebug_dev_info *) + scp->device->hostdata); + struct t10_pi_tuple *sdt; for (i = 0; i < sectors; i++, ei_lba++) { int ret; sector = start_sec + i; - sdt = dif_store(sector); + sdt = dif_store(sip, sector); if (sdt->app_tag == cpu_to_be16(0xffff)) continue; - ret = dif_verify(sdt, lba2fake_store(sector), sector, ei_lba); + ret = dif_verify(sdt, lba2fake_store(sip, sector), sector, + ei_lba); if (ret) { dif_errors++; return ret; } } - dif_copy_prot(SCpnt, start_sec, sectors, true); + dif_copy_prot(scp, start_sec, sectors, true); dix_reads++; return 0; @@ -2664,14 +2725,16 @@ static int prot_verify_read(struct scsi_cmnd *SCpnt, sector_t start_sec, static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) { - u8 *cmd = scp->cmnd; - struct sdebug_queued_cmd *sqcp; - u64 lba; + bool check_prot; u32 num; u32 ei_lba; - unsigned long iflags; int ret; - bool check_prot; + unsigned long iflags; + u64 lba; + struct sdeb_store_info *sip = devip2sip(devip); + rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; + u8 *cmd = scp->cmnd; + struct sdebug_queued_cmd *sqcp; switch (cmd[0]) { case READ_16: @@ -2753,21 +2816,21 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) return check_condition_result; } - read_lock_irqsave(&atomic_rw, iflags); + read_lock_irqsave(macc_lckp, iflags); /* DIX + T10 DIF */ if (unlikely(sdebug_dix && scsi_prot_sg_count(scp))) { int prot_ret = prot_verify_read(scp, lba, num, ei_lba); if (prot_ret) { - read_unlock_irqrestore(&atomic_rw, iflags); + read_unlock_irqrestore(macc_lckp, iflags); mk_sense_buffer(scp, ABORTED_COMMAND, 0x10, prot_ret); return illegal_condition_result; } } - ret = do_device_access(scp, 0, lba, num, false); - read_unlock_irqrestore(&atomic_rw, iflags); + ret = do_device_access(sip, scp, 0, lba, num, false); + read_unlock_irqrestore(macc_lckp, iflags); if (unlikely(ret == -1)) return DID_ERROR << 16; @@ -2905,7 +2968,8 @@ static sector_t map_index_to_lba(unsigned long index) return lba; } -static unsigned int map_state(sector_t lba, unsigned int *num) +static unsigned int map_state(struct sdeb_store_info *sip, sector_t lba, + unsigned int *num) { sector_t end; unsigned int mapped; @@ -2913,19 +2977,20 @@ static unsigned int map_state(sector_t lba, unsigned int *num) unsigned long next; index = lba_to_map_index(lba); - mapped = test_bit(index, map_storep); + mapped = test_bit(index, sip->map_storep); if (mapped) - next = find_next_zero_bit(map_storep, map_size, index); + next = find_next_zero_bit(sip->map_storep, map_size, index); else - next = find_next_bit(map_storep, map_size, index); + next = find_next_bit(sip->map_storep, map_size, index); end = min_t(sector_t, sdebug_store_sectors, map_index_to_lba(next)); *num = end - lba; return mapped; } -static void map_region(sector_t lba, unsigned int len) +static void map_region(struct sdeb_store_info *sip, sector_t lba, + unsigned int len) { sector_t end = lba + len; @@ -2933,15 +2998,17 @@ static void map_region(sector_t lba, unsigned int len) unsigned long index = lba_to_map_index(lba); if (index < map_size) - set_bit(index, map_storep); + set_bit(index, sip->map_storep); lba = map_index_to_lba(index + 1); } } -static void unmap_region(sector_t lba, unsigned int len) +static void unmap_region(struct sdeb_store_info *sip, sector_t lba, + unsigned int len) { sector_t end = lba + len; + u8 *fsp = sip->storep; while (lba < end) { unsigned long index = lba_to_map_index(lba); @@ -2949,17 +3016,16 @@ static void unmap_region(sector_t lba, unsigned int len) if (lba == map_index_to_lba(index) && lba + sdebug_unmap_granularity <= end && index < map_size) { - clear_bit(index, map_storep); + clear_bit(index, sip->map_storep); if (sdebug_lbprz) { /* for LBPRZ=2 return 0xff_s */ - memset(fake_storep + - lba * sdebug_sector_size, + memset(fsp + lba * sdebug_sector_size, (sdebug_lbprz & 1) ? 0 : 0xff, sdebug_sector_size * sdebug_unmap_granularity); } - if (dif_storep) { - memset(dif_storep + lba, 0xff, - sizeof(*dif_storep) * + if (sip->dif_storep) { + memset(sip->dif_storep + lba, 0xff, + sizeof(*sip->dif_storep) * sdebug_unmap_granularity); } } @@ -2969,13 +3035,15 @@ static void unmap_region(sector_t lba, unsigned int len) static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) { - u8 *cmd = scp->cmnd; - u64 lba; + bool check_prot; u32 num; u32 ei_lba; - unsigned long iflags; int ret; - bool check_prot; + unsigned long iflags; + u64 lba; + struct sdeb_store_info *sip = devip2sip(devip); + rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; + u8 *cmd = scp->cmnd; switch (cmd[0]) { case WRITE_16: @@ -3031,23 +3099,23 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) ret = check_device_access_params(scp, lba, num, true); if (ret) return ret; - write_lock_irqsave(&atomic_rw, iflags); + write_lock_irqsave(macc_lckp, iflags); /* DIX + T10 DIF */ if (unlikely(sdebug_dix && scsi_prot_sg_count(scp))) { int prot_ret = prot_verify_write(scp, lba, num, ei_lba); if (prot_ret) { - write_unlock_irqrestore(&atomic_rw, iflags); + write_unlock_irqrestore(macc_lckp, iflags); mk_sense_buffer(scp, ILLEGAL_REQUEST, 0x10, prot_ret); return illegal_condition_result; } } - ret = do_device_access(scp, 0, lba, num, true); + ret = do_device_access(sip, scp, 0, lba, num, true); if (unlikely(scsi_debug_lbp())) - map_region(lba, num); - write_unlock_irqrestore(&atomic_rw, iflags); + map_region(sip, lba, num); + write_unlock_irqrestore(macc_lckp, iflags); if (unlikely(-1 == ret)) return DID_ERROR << 16; else if (unlikely(sdebug_verbose && @@ -3088,6 +3156,8 @@ static int resp_write_scat(struct scsi_cmnd *scp, u8 *cmd = scp->cmnd; u8 *lrdp = NULL; u8 *up; + struct sdeb_store_info *sip = devip2sip(devip); + rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; u8 wrprotect; u16 lbdof, num_lrd, k; u32 num, num_by, bt_len, lbdof_blen, sg_off, cum_lb; @@ -3156,7 +3226,7 @@ static int resp_write_scat(struct scsi_cmnd *scp, goto err_out; } - write_lock_irqsave(&atomic_rw, iflags); + write_lock_irqsave(macc_lckp, iflags); sg_off = lbdof_blen; /* Spec says Buffer xfer Length field in number of LBs in dout */ cum_lb = 0; @@ -3199,9 +3269,9 @@ static int resp_write_scat(struct scsi_cmnd *scp, } } - ret = do_device_access(scp, sg_off, lba, num, true); + ret = do_device_access(sip, scp, sg_off, lba, num, true); if (unlikely(scsi_debug_lbp())) - map_region(lba, num); + map_region(sip, lba, num); if (unlikely(-1 == ret)) { ret = DID_ERROR << 16; goto err_out_unlock; @@ -3239,7 +3309,7 @@ static int resp_write_scat(struct scsi_cmnd *scp, } ret = 0; err_out_unlock: - write_unlock_irqrestore(&atomic_rw, iflags); + write_unlock_irqrestore(macc_lckp, iflags); err_out: kfree(lrdp); return ret; @@ -3248,27 +3318,32 @@ static int resp_write_scat(struct scsi_cmnd *scp, static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, u32 ei_lba, bool unmap, bool ndob) { - int ret; unsigned long iflags; unsigned long long i; - u32 lb_size = sdebug_sector_size; u64 block, lbaa; + u32 lb_size = sdebug_sector_size; + int ret; + struct sdeb_store_info *sip = devip2sip((struct sdebug_dev_info *) + scp->device->hostdata); + rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; u8 *fs1p; + u8 *fsp; ret = check_device_access_params(scp, lba, num, true); if (ret) return ret; - write_lock_irqsave(&atomic_rw, iflags); + write_lock_irqsave(macc_lckp, iflags); if (unmap && scsi_debug_lbp()) { - unmap_region(lba, num); + unmap_region(sip, lba, num); goto out; } lbaa = lba; block = do_div(lbaa, sdebug_store_sectors); /* if ndob then zero 1 logical block, else fetch 1 logical block */ - fs1p = fake_storep + (block * lb_size); + fsp = sip->storep; + fs1p = fsp + (block * lb_size); if (ndob) { memset(fs1p, 0, lb_size); ret = 0; @@ -3276,7 +3351,7 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, ret = fetch_to_dev_buffer(scp, fs1p, lb_size); if (-1 == ret) { - write_unlock_irqrestore(&atomic_rw, iflags); + write_unlock_irqrestore(&sip->macc_lck, iflags); return DID_ERROR << 16; } else if (sdebug_verbose && !ndob && (ret < lb_size)) sdev_printk(KERN_INFO, scp->device, @@ -3287,12 +3362,12 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, for (i = 1 ; i < num ; i++) { lbaa = lba + i; block = do_div(lbaa, sdebug_store_sectors); - memmove(fake_storep + (block * lb_size), fs1p, lb_size); + memmove(fsp + (block * lb_size), fs1p, lb_size); } if (scsi_debug_lbp()) - map_region(lba, num); + map_region(sip, lba, num); out: - write_unlock_irqrestore(&atomic_rw, iflags); + write_unlock_irqrestore(macc_lckp, iflags); return 0; } @@ -3404,7 +3479,8 @@ static int resp_comp_write(struct scsi_cmnd *scp, { u8 *cmd = scp->cmnd; u8 *arr; - u8 *fake_storep_hold; + struct sdeb_store_info *sip = devip2sip(devip); + rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; u64 lba; u32 dnum; u32 lb_size = sdebug_sector_size; @@ -3438,14 +3514,9 @@ static int resp_comp_write(struct scsi_cmnd *scp, return check_condition_result; } - write_lock_irqsave(&atomic_rw, iflags); + write_lock_irqsave(macc_lckp, iflags); - /* trick do_device_access() to fetch both compare and write buffers - * from data-in into arr. Safe (atomic) since write_lock held. */ - fake_storep_hold = fake_storep; - fake_storep = arr; - ret = do_device_access(scp, 0, 0, dnum, true); - fake_storep = fake_storep_hold; + ret = do_dout_fetch(scp, dnum, arr); if (ret == -1) { retval = DID_ERROR << 16; goto cleanup; @@ -3453,15 +3524,15 @@ static int resp_comp_write(struct scsi_cmnd *scp, sdev_printk(KERN_INFO, scp->device, "%s: compare_write: cdb " "indicated=%u, IO sent=%d bytes\n", my_name, dnum * lb_size, ret); - if (!comp_write_worker(lba, num, arr)) { + if (!comp_write_worker(sip, lba, num, arr)) { mk_sense_buffer(scp, MISCOMPARE, MISCOMPARE_VERIFY_ASC, 0); retval = check_condition_result; goto cleanup; } if (scsi_debug_lbp()) - map_region(lba, num); + map_region(sip, lba, num); cleanup: - write_unlock_irqrestore(&atomic_rw, iflags); + write_unlock_irqrestore(macc_lckp, iflags); kfree(arr); return retval; } @@ -3476,6 +3547,8 @@ static int resp_unmap(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) { unsigned char *buf; struct unmap_block_desc *desc; + struct sdeb_store_info *sip = devip2sip(devip); + rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; unsigned int i, payload_len, descriptors; int ret; unsigned long iflags; @@ -3506,7 +3579,7 @@ static int resp_unmap(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) desc = (void *)&buf[8]; - write_lock_irqsave(&atomic_rw, iflags); + write_lock_irqsave(macc_lckp, iflags); for (i = 0 ; i < descriptors ; i++) { unsigned long long lba = get_unaligned_be64(&desc[i].lba); @@ -3516,13 +3589,13 @@ static int resp_unmap(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) if (ret) goto out; - unmap_region(lba, num); + unmap_region(sip, lba, num); } ret = 0; out: - write_unlock_irqrestore(&atomic_rw, iflags); + write_unlock_irqrestore(macc_lckp, iflags); kfree(buf); return ret; @@ -3534,10 +3607,11 @@ static int resp_get_lba_status(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) { u8 *cmd = scp->cmnd; + struct sdeb_store_info *sip = devip2sip(devip); u64 lba; u32 alloc_len, mapped, num; - u8 arr[SDEBUG_GET_LBA_STATUS_LEN]; int ret; + u8 arr[SDEBUG_GET_LBA_STATUS_LEN]; lba = get_unaligned_be64(cmd + 2); alloc_len = get_unaligned_be32(cmd + 10); @@ -3550,7 +3624,7 @@ static int resp_get_lba_status(struct scsi_cmnd *scp, return ret; if (scsi_debug_lbp()) - mapped = map_state(lba, &num); + mapped = map_state(sip, lba, &num); else { mapped = 1; /* following just in case virtual_gb changed */ @@ -4147,8 +4221,7 @@ static int scsi_debug_host_reset(struct scsi_cmnd *SCpnt) return SUCCESS; } -static void __init sdebug_build_parts(unsigned char *ramp, - unsigned long store_size) +static void sdebug_build_parts(unsigned char *ramp, unsigned long store_size) { struct msdos_partition *pp; int starts[SDEBUG_MAX_PARTS + 2]; @@ -4464,6 +4537,8 @@ module_param_named(num_parts, sdebug_num_parts, int, S_IRUGO); module_param_named(num_tgts, sdebug_num_tgts, int, S_IRUGO | S_IWUSR); module_param_named(opt_blks, sdebug_opt_blks, int, S_IRUGO); module_param_named(opts, sdebug_opts, int, S_IRUGO | S_IWUSR); +module_param_named(per_host_store, sdebug_per_host_store, bool, + S_IRUGO | S_IWUSR); module_param_named(physblk_exp, sdebug_physblk_exp, int, S_IRUGO); module_param_named(opt_xferlen_exp, sdebug_opt_xferlen_exp, int, S_IRUGO); module_param_named(ptype, sdebug_ptype, int, S_IRUGO | S_IWUSR); @@ -4525,6 +4600,7 @@ MODULE_PARM_DESC(num_parts, "number of partitions(def=0)"); MODULE_PARM_DESC(num_tgts, "number of targets per host to simulate(def=1)"); MODULE_PARM_DESC(opt_blks, "optimal transfer length in blocks (def=1024)"); MODULE_PARM_DESC(opts, "1->noise, 2->medium_err, 4->timeout, 8->recovered_err... (def=0)"); +MODULE_PARM_DESC(per_host_store, "If set, next positive add_host will get new store"); MODULE_PARM_DESC(physblk_exp, "physical block exponent (def=0)"); MODULE_PARM_DESC(opt_xferlen_exp, "optimal transfer length granularity exponent (def=physblk_exp)"); MODULE_PARM_DESC(ptype, "SCSI peripheral type(def=0[disk])"); @@ -4593,6 +4669,7 @@ static int scsi_debug_show_info(struct seq_file *m, struct Scsi_Host *host) { int f, j, l; struct sdebug_queue *sqp; + struct sdebug_host_info *sdhp; seq_printf(m, "scsi_debug adapter driver, version %s [%s]\n", SDEBUG_VERSION, sdebug_version_date); @@ -4628,6 +4705,34 @@ static int scsi_debug_show_info(struct seq_file *m, struct Scsi_Host *host) "first,last bits", f, l); } } + + seq_printf(m, "this host_no=%d\n", host->host_no); + if (!xa_empty(per_store_ap)) { + bool niu; + int idx; + unsigned long l_idx; + struct sdeb_store_info *sip; + + seq_puts(m, "\nhost list:\n"); + j = 0; + list_for_each_entry(sdhp, &sdebug_host_list, host_list) { + idx = sdhp->si_idx; + seq_printf(m, " %d: host_no=%d, si_idx=%d\n", j, + sdhp->shost->host_no, idx); + ++j; + } + seq_printf(m, "\nper_store array [most_recent_idx=%d]:\n", + sdeb_most_recent_idx); + j = 0; + xa_for_each(per_store_ap, l_idx, sip) { + niu = xa_get_mark(per_store_ap, l_idx, + SDEB_XA_NOT_IN_USE); + idx = (int)l_idx; + seq_printf(m, " %d: idx=%d%s\n", j, idx, + (niu ? " not_in_use" : "")); + ++j; + } + } return 0; } @@ -4783,25 +4888,41 @@ static ssize_t fake_rw_show(struct device_driver *ddp, char *buf) static ssize_t fake_rw_store(struct device_driver *ddp, const char *buf, size_t count) { - int n; + int n, idx; if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n >= 0)) { + bool want_store = (n == 0); + struct sdebug_host_info *sdhp; + n = (n > 0); sdebug_fake_rw = (sdebug_fake_rw > 0); - if (sdebug_fake_rw != n) { - if ((0 == n) && (NULL == fake_storep)) { - unsigned long sz = - (unsigned long)sdebug_dev_size_mb * - 1048576; + if (sdebug_fake_rw == n) + return count; /* not transitioning so do nothing */ - fake_storep = vzalloc(sz); - if (NULL == fake_storep) { - pr_err("out of memory, 9\n"); - return -ENOMEM; + if (want_store) { /* 1 --> 0 transition, set up store */ + if (sdeb_first_idx < 0) { + idx = sdebug_add_store(); + if (idx < 0) + return idx; + } else { + idx = sdeb_first_idx; + xa_clear_mark(per_store_ap, idx, + SDEB_XA_NOT_IN_USE); + } + /* make all hosts use same store */ + list_for_each_entry(sdhp, &sdebug_host_list, + host_list) { + if (sdhp->si_idx != idx) { + xa_set_mark(per_store_ap, sdhp->si_idx, + SDEB_XA_NOT_IN_USE); + sdhp->si_idx = idx; } } - sdebug_fake_rw = n; + sdeb_most_recent_idx = idx; + } else { /* 0 --> 1 transition is trigger for shrink */ + sdebug_erase_all_stores(true /* apart from first */); } + sdebug_fake_rw = n; return count; } return -EINVAL; @@ -4849,6 +4970,24 @@ static ssize_t dev_size_mb_show(struct device_driver *ddp, char *buf) } static DRIVER_ATTR_RO(dev_size_mb); +static ssize_t per_host_store_show(struct device_driver *ddp, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", sdebug_per_host_store); +} + +static ssize_t per_host_store_store(struct device_driver *ddp, const char *buf, + size_t count) +{ + bool v; + + if (kstrtobool(buf, &v)) + return -EINVAL; + + sdebug_per_host_store = v; + return count; +} +static DRIVER_ATTR_RW(per_host_store); + static ssize_t num_parts_show(struct device_driver *ddp, char *buf) { return scnprintf(buf, PAGE_SIZE, "%d\n", sdebug_num_parts); @@ -5001,26 +5140,42 @@ static DRIVER_ATTR_RW(virtual_gb); static ssize_t add_host_show(struct device_driver *ddp, char *buf) { - return scnprintf(buf, PAGE_SIZE, "%d\n", sdebug_add_host); + /* absolute number of hosts currently active is what is shown */ + return scnprintf(buf, PAGE_SIZE, "%d\n", sdebug_num_hosts); } -static int sdebug_add_adapter(void); -static void sdebug_remove_adapter(void); - static ssize_t add_host_store(struct device_driver *ddp, const char *buf, size_t count) { + bool found; + unsigned long idx; + struct sdeb_store_info *sip; + bool want_phs = (sdebug_fake_rw == 0) && sdebug_per_host_store; int delta_hosts; if (sscanf(buf, "%d", &delta_hosts) != 1) return -EINVAL; if (delta_hosts > 0) { do { - sdebug_add_adapter(); + found = false; + if (want_phs) { + xa_for_each_marked(per_store_ap, idx, sip, + SDEB_XA_NOT_IN_USE) { + sdeb_most_recent_idx = (int)idx; + found = true; + break; + } + if (found) /* re-use case */ + sdebug_add_host_helper((int)idx); + else + sdebug_do_add_host(true); + } else { + sdebug_do_add_host(false); + } } while (--delta_hosts); } else if (delta_hosts < 0) { do { - sdebug_remove_adapter(); + sdebug_do_remove_host(false); } while (++delta_hosts); } return count; @@ -5104,14 +5259,19 @@ static DRIVER_ATTR_RO(ato); static ssize_t map_show(struct device_driver *ddp, char *buf) { - ssize_t count; + ssize_t count = 0; if (!scsi_debug_lbp()) return scnprintf(buf, PAGE_SIZE, "0-%u\n", sdebug_store_sectors); - count = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", - (int)map_size, map_storep); + if (sdebug_fake_rw == 0 && !xa_empty(per_store_ap)) { + struct sdeb_store_info *sip = xa_load(per_store_ap, 0); + + if (sip) + count = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", + (int)map_size, sip->map_storep); + } buf[count++] = '\n'; buf[count] = '\0'; @@ -5218,7 +5378,7 @@ static DRIVER_ATTR_RW(cdb_len); /sys/bus/pseudo/drivers/scsi_debug directory. The advantage of these files (over those found in the /sys/module/scsi_debug/parameters directory) is that auxiliary actions can be triggered when an attribute - is changed. For example see: sdebug_add_host_store() above. + is changed. For example see: add_host_store() above. */ static struct attribute *sdebug_drv_attrs[] = { @@ -5238,6 +5398,7 @@ static struct attribute *sdebug_drv_attrs[] = { &driver_attr_scsi_level.attr, &driver_attr_virtual_gb.attr, &driver_attr_add_host.attr, + &driver_attr_per_host_store.attr, &driver_attr_vpd_use_hostno.attr, &driver_attr_sector_size.attr, &driver_attr_statistics.attr, @@ -5262,11 +5423,13 @@ static struct device *pseudo_primary; static int __init scsi_debug_init(void) { + bool want_store = (sdebug_fake_rw == 0); unsigned long sz; - int host_to_add; - int k; - int ret; + int k, ret, hosts_to_add; + int idx = -1; + ramdisk_lck_a[0] = &atomic_rw; + ramdisk_lck_a[1] = &atomic_rw2; atomic_set(&retired_max_queue, 0); if (sdebug_ndelay >= 1000 * 1000 * 1000) { @@ -5362,36 +5525,6 @@ static int __init scsi_debug_init(void) sdebug_cylinders_per = (unsigned long)sdebug_capacity / (sdebug_sectors_per * sdebug_heads); } - - if (sdebug_fake_rw == 0) { - fake_storep = vzalloc(sz); - if (NULL == fake_storep) { - pr_err("out of memory, 1\n"); - ret = -ENOMEM; - goto free_q_arr; - } - if (sdebug_num_parts > 0) - sdebug_build_parts(fake_storep, sz); - } - - if (sdebug_dix) { - int dif_size; - - dif_size = sdebug_store_sectors * sizeof(struct t10_pi_tuple); - dif_storep = vmalloc(dif_size); - - pr_err("dif_storep %u bytes @ %p\n", dif_size, dif_storep); - - if (dif_storep == NULL) { - pr_err("out of mem. (DIX)\n"); - ret = -ENOMEM; - goto free_vm; - } - - memset(dif_storep, 0xff, dif_size); - } - - /* Logical Block Provisioning */ if (scsi_debug_lbp()) { sdebug_unmap_max_blocks = clamp(sdebug_unmap_max_blocks, 0U, 0xffffffffU); @@ -5407,26 +5540,16 @@ static int __init scsi_debug_init(void) sdebug_unmap_alignment) { pr_err("ERR: unmap_granularity <= unmap_alignment\n"); ret = -EINVAL; - goto free_vm; + goto free_q_arr; } - - map_size = lba_to_map_index(sdebug_store_sectors - 1) + 1; - map_storep = vmalloc(array_size(sizeof(long), - BITS_TO_LONGS(map_size))); - - pr_info("%lu provisioning blocks\n", map_size); - - if (map_storep == NULL) { - pr_err("out of mem. (MAP)\n"); - ret = -ENOMEM; - goto free_vm; + } + xa_init_flags(per_store_ap, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); + if (want_store) { + idx = sdebug_add_store(); + if (idx < 0) { + ret = idx; + goto free_q_arr; } - - bitmap_zero(map_storep, map_size); - - /* Map first 1KB for partition table */ - if (sdebug_num_parts) - map_region(0, 2); } pseudo_primary = root_device_register("pseudo_0"); @@ -5446,18 +5569,28 @@ static int __init scsi_debug_init(void) goto bus_unreg; } - host_to_add = sdebug_add_host; + hosts_to_add = sdebug_add_host; sdebug_add_host = 0; - for (k = 0; k < host_to_add; k++) { - if (sdebug_add_adapter()) { - pr_err("sdebug_add_adapter failed k=%d\n", k); - break; + for (k = 0; k < hosts_to_add; k++) { + if (want_store && k == 0) { + ret = sdebug_add_host_helper(idx); + if (ret < 0) { + pr_err("add_host_helper k=%d, error=%d\n", + k, -ret); + break; + } + } else { + ret = sdebug_do_add_host(want_store && + sdebug_per_host_store); + if (ret < 0) { + pr_err("add_host k=%d error=%d\n", k, -ret); + break; + } } } - if (sdebug_verbose) - pr_info("built %d host(s)\n", sdebug_add_host); + pr_info("built %d host(s)\n", sdebug_num_hosts); return 0; @@ -5466,9 +5599,7 @@ static int __init scsi_debug_init(void) dev_unreg: root_device_unregister(pseudo_primary); free_vm: - vfree(map_storep); - vfree(dif_storep); - vfree(fake_storep); + sdebug_erase_store(idx, NULL); free_q_arr: kfree(sdebug_q_arr); return ret; @@ -5476,20 +5607,18 @@ static int __init scsi_debug_init(void) static void __exit scsi_debug_exit(void) { - int k = sdebug_add_host; + int k = sdebug_num_hosts; stop_all_queued(); for (; k; k--) - sdebug_remove_adapter(); + sdebug_do_remove_host(true); free_all_queued(); driver_unregister(&sdebug_driverfs_driver); bus_unregister(&pseudo_lld_bus); root_device_unregister(pseudo_primary); - vfree(map_storep); - vfree(dif_storep); - vfree(fake_storep); - kfree(sdebug_q_arr); + sdebug_erase_all_stores(false); + xa_destroy(per_store_ap); } device_initcall(scsi_debug_init); @@ -5503,29 +5632,146 @@ static void sdebug_release_adapter(struct device *dev) kfree(sdbg_host); } -static int sdebug_add_adapter(void) +/* idx must be valid, if sip is NULL then it will be obtained using idx */ +static void sdebug_erase_store(int idx, struct sdeb_store_info *sip) { - int k, devs_per_host; - int error = 0; + if (idx < 0) + return; + if (!sip) { + if (xa_empty(per_store_ap)) + return; + sip = xa_load(per_store_ap, idx); + if (!sip) + return; + } + vfree(sip->map_storep); + vfree(sip->dif_storep); + vfree(sip->storep); + xa_erase(per_store_ap, idx); + kfree(sip); +} + +/* Assume apart_from_first==false only in shutdown case. */ +static void sdebug_erase_all_stores(bool apart_from_first) +{ + unsigned long idx; + struct sdeb_store_info *sip = NULL; + + xa_for_each(per_store_ap, idx, sip) { + if (apart_from_first) + apart_from_first = false; + else + sdebug_erase_store(idx, sip); + } + if (apart_from_first) + sdeb_most_recent_idx = sdeb_first_idx; +} + +/* + * Returns store xarray new element index (idx) if >=0 else negated errno. + * Limit the number of stores to 65536. + */ +static int sdebug_add_store(void) +{ + int res; + u32 n_idx; + unsigned long iflags; + unsigned long sz = (unsigned long)sdebug_dev_size_mb * 1048576; + struct sdeb_store_info *sip = NULL; + struct xa_limit xal = { .max = 1 << 16, .min = 0 }; + + sip = kzalloc(sizeof(*sip), GFP_KERNEL); + if (!sip) + return -ENOMEM; + + xa_lock_irqsave(per_store_ap, iflags); + res = __xa_alloc(per_store_ap, &n_idx, sip, xal, GFP_ATOMIC); + if (unlikely(res < 0)) { + xa_unlock_irqrestore(per_store_ap, iflags); + kfree(sip); + pr_warn("%s: xa_alloc() errno=%d\n", __func__, -res); + return res; + } + sdeb_most_recent_idx = n_idx; + if (sdeb_first_idx < 0) + sdeb_first_idx = n_idx; + xa_unlock_irqrestore(per_store_ap, iflags); + + res = -ENOMEM; + sip->storep = vzalloc(sz); + if (!sip->storep) { + pr_err("user data oom\n"); + goto err; + } + if (sdebug_num_parts > 0) + sdebug_build_parts(sip->storep, sz); + + /* DIF/DIX: what T10 calls Protection Information (PI) */ + if (sdebug_dix) { + int dif_size; + + dif_size = sdebug_store_sectors * sizeof(struct t10_pi_tuple); + sip->dif_storep = vmalloc(dif_size); + + pr_info("dif_storep %u bytes @ %pK\n", dif_size, + sip->dif_storep); + + if (!sip->dif_storep) { + pr_err("DIX oom\n"); + goto err; + } + memset(sip->dif_storep, 0xff, dif_size); + } + /* Logical Block Provisioning */ + if (scsi_debug_lbp()) { + map_size = lba_to_map_index(sdebug_store_sectors - 1) + 1; + sip->map_storep = vmalloc(array_size(sizeof(long), + BITS_TO_LONGS(map_size))); + + pr_info("%lu provisioning blocks\n", map_size); + + if (!sip->map_storep) { + pr_err("LBP map oom\n"); + goto err; + } + + bitmap_zero(sip->map_storep, map_size); + + /* Map first 1KB for partition table */ + if (sdebug_num_parts) + map_region(sip, 0, 2); + } + + rwlock_init(&sip->macc_lck); + return (int)n_idx; +err: + sdebug_erase_store((int)n_idx, sip); + pr_warn("%s: failed, errno=%d\n", __func__, -res); + return res; +} + +static int sdebug_add_host_helper(int per_host_idx) +{ + int k, devs_per_host, idx; + int error = -ENOMEM; struct sdebug_host_info *sdbg_host; struct sdebug_dev_info *sdbg_devinfo, *tmp; sdbg_host = kzalloc(sizeof(*sdbg_host), GFP_KERNEL); - if (sdbg_host == NULL) { - pr_err("out of memory at line %d\n", __LINE__); + if (!sdbg_host) return -ENOMEM; - } + idx = (per_host_idx < 0) ? sdeb_first_idx : per_host_idx; + if (xa_get_mark(per_store_ap, idx, SDEB_XA_NOT_IN_USE)) + xa_clear_mark(per_store_ap, idx, SDEB_XA_NOT_IN_USE); + sdbg_host->si_idx = idx; INIT_LIST_HEAD(&sdbg_host->dev_info_list); devs_per_host = sdebug_num_tgts * sdebug_max_luns; for (k = 0; k < devs_per_host; k++) { sdbg_devinfo = sdebug_device_create(sdbg_host, GFP_KERNEL); - if (!sdbg_devinfo) { - pr_err("out of memory at line %d\n", __LINE__); - error = -ENOMEM; + if (!sdbg_devinfo) goto clean; - } } spin_lock(&sdebug_host_list_lock); @@ -5535,15 +5781,14 @@ static int sdebug_add_adapter(void) sdbg_host->dev.bus = &pseudo_lld_bus; sdbg_host->dev.parent = pseudo_primary; sdbg_host->dev.release = &sdebug_release_adapter; - dev_set_name(&sdbg_host->dev, "adapter%d", sdebug_add_host); + dev_set_name(&sdbg_host->dev, "adapter%d", sdebug_num_hosts); error = device_register(&sdbg_host->dev); - if (error) goto clean; - ++sdebug_add_host; - return error; + ++sdebug_num_hosts; + return 0; clean: list_for_each_entry_safe(sdbg_devinfo, tmp, &sdbg_host->dev_info_list, @@ -5551,28 +5796,61 @@ static int sdebug_add_adapter(void) list_del(&sdbg_devinfo->dev_list); kfree(sdbg_devinfo); } - kfree(sdbg_host); + pr_warn("%s: failed, errno=%d\n", __func__, -error); return error; } -static void sdebug_remove_adapter(void) +static int sdebug_do_add_host(bool mk_new_store) { + int ph_idx = sdeb_most_recent_idx; + + if (mk_new_store) { + ph_idx = sdebug_add_store(); + if (ph_idx < 0) + return ph_idx; + } + return sdebug_add_host_helper(ph_idx); +} + +static void sdebug_do_remove_host(bool the_end) +{ + int idx = -1; struct sdebug_host_info *sdbg_host = NULL; + struct sdebug_host_info *sdbg_host2; spin_lock(&sdebug_host_list_lock); if (!list_empty(&sdebug_host_list)) { sdbg_host = list_entry(sdebug_host_list.prev, struct sdebug_host_info, host_list); - list_del(&sdbg_host->host_list); + idx = sdbg_host->si_idx; } + if (!the_end && idx >= 0) { + bool unique = true; + + list_for_each_entry(sdbg_host2, &sdebug_host_list, host_list) { + if (sdbg_host2 == sdbg_host) + continue; + if (idx == sdbg_host2->si_idx) { + unique = false; + break; + } + } + if (unique) { + xa_set_mark(per_store_ap, idx, SDEB_XA_NOT_IN_USE); + if (idx == sdeb_most_recent_idx) + --sdeb_most_recent_idx; + } + } + if (sdbg_host) + list_del(&sdbg_host->host_list); spin_unlock(&sdebug_host_list_lock); if (!sdbg_host) return; device_unregister(&sdbg_host->dev); - --sdebug_add_host; + --sdebug_num_hosts; } static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth) @@ -5631,6 +5909,7 @@ static int scsi_debug_queuecommand(struct Scsi_Host *shost, const struct opcode_info_t *oip; const struct opcode_info_t *r_oip; struct sdebug_dev_info *devip; + u8 *cmd = scp->cmnd; int (*r_pfp)(struct scsi_cmnd *, struct sdebug_dev_info *); int (*pfp)(struct scsi_cmnd *, struct sdebug_dev_info *) = NULL; @@ -5902,8 +6181,9 @@ static int sdebug_driver_probe(struct device *dev) pr_err("scsi_add_host failed\n"); error = -ENODEV; scsi_host_put(hpnt); - } else + } else { scsi_scan_host(hpnt); + } return error; } From c3e2fe9222d428f115baeba2f6b3637b3aa444cd Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Tue, 21 Apr 2020 11:14:19 -0400 Subject: [PATCH 206/562] scsi: scsi_debug: Implement VERIFY(10), add VERIFY(16) With the addition of the per_host_store option, the ability to check whether two different ramdisk images are the same or not becomes practical. Prior to this patch VERIFY(10) always returned true (i.e. the SCSI GOOD status) without checking. This option adds support for BYTCHK equal to 0, 1 and 3. If the comparison fails, then a sense key of MISCOMPARE is returned as per the T10 standards. Also add support for the VERIFY(16) command. Link: https://lore.kernel.org/r/20200421151424.32668-4-dgilbert@interlog.com Reviewed-by: Hannes Reinecke Signed-off-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 101 +++++++++++++++++++++++++++++++++++--- 1 file changed, 94 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 0eea828d6292..da9b5ccf6621 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -354,7 +354,7 @@ enum sdeb_opcode_index { SDEB_I_SERV_ACT_OUT_16 = 13, /* add ...SERV_ACT_OUT_12 if needed */ SDEB_I_MAINT_IN = 14, SDEB_I_MAINT_OUT = 15, - SDEB_I_VERIFY = 16, /* 10 only */ + SDEB_I_VERIFY = 16, /* VERIFY(10), VERIFY(16) */ SDEB_I_VARIABLE_LEN = 17, /* READ(32), WRITE(32), WR_SCAT(32) */ SDEB_I_RESERVE = 18, /* 6, 10 */ SDEB_I_RELEASE = 19, /* 6, 10 */ @@ -397,7 +397,8 @@ static const unsigned char opcode_ind_arr[256] = { 0, SDEB_I_VARIABLE_LEN, /* 0x80; 0x80->0x9f: 16 byte cdbs */ 0, 0, 0, 0, 0, SDEB_I_ATA_PT, 0, 0, - SDEB_I_READ, SDEB_I_COMP_WRITE, SDEB_I_WRITE, 0, 0, 0, 0, 0, + SDEB_I_READ, SDEB_I_COMP_WRITE, SDEB_I_WRITE, 0, + 0, 0, 0, SDEB_I_VERIFY, 0, SDEB_I_SYNC_CACHE, 0, SDEB_I_WRITE_SAME, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, SDEB_I_SERV_ACT_IN_16, SDEB_I_SERV_ACT_OUT_16, /* 0xa0; 0xa0->0xbf: 12 byte cdbs */ @@ -439,6 +440,7 @@ static int resp_report_tgtpgs(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_unmap(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_rsup_opcodes(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_rsup_tmfs(struct scsi_cmnd *, struct sdebug_dev_info *); +static int resp_verify(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_write_same_10(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_write_same_16(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_comp_write(struct scsi_cmnd *, struct sdebug_dev_info *); @@ -490,6 +492,12 @@ static const struct opcode_info_t write_iarr[] = { 0xbf, 0xc7, 0, 0, 0, 0} }, }; +static const struct opcode_info_t verify_iarr[] = { + {0, 0x2f, 0, F_D_OUT_MAYBE | FF_MEDIA_IO, resp_verify,/* VERIFY(10) */ + NULL, {10, 0xf7, 0xff, 0xff, 0xff, 0xff, 0xbf, 0xff, 0xff, 0xc7, + 0, 0, 0, 0, 0, 0} }, +}; + static const struct opcode_info_t sa_in_16_iarr[] = { {0, 0x9e, 0x12, F_SA_LOW | F_D_IN, resp_get_lba_status, NULL, {16, 0x12, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, @@ -590,9 +598,10 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = { /* 15 */ {0, 0, 0, F_INV_OP | FF_RESPOND, NULL, NULL, /* MAINT OUT */ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, - {0, 0x2f, 0, F_D_OUT_MAYBE | FF_MEDIA_IO, NULL, NULL, /* VERIFY(10) */ - {10, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc7, - 0, 0, 0, 0, 0, 0} }, + {ARRAY_SIZE(verify_iarr), 0x8f, 0, + F_D_OUT_MAYBE | FF_MEDIA_IO, resp_verify, /* VERIFY(16) */ + verify_iarr, {16, 0xf6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xc7} }, {ARRAY_SIZE(vl_iarr), 0x7f, 0x9, F_SA_HIGH | F_D_IN | FF_MEDIA_IO, resp_read_dt0, vl_iarr, /* VARIABLE LENGTH, READ(32) */ {32, 0xc7, 0, 0, 0, 0, 0x3f, 0x18, 0x0, 0x9, 0xfe, 0, 0xff, 0xff, @@ -2579,7 +2588,7 @@ static int do_dout_fetch(struct scsi_cmnd *scp, u32 num, u8 *doutp) * arr into sip->storep+lba and return true. If comparison fails then * return false. */ static bool comp_write_worker(struct sdeb_store_info *sip, u64 lba, u32 num, - const u8 *arr) + const u8 *arr, bool compare_only) { bool res; u64 block, rest = 0; @@ -2599,6 +2608,8 @@ static bool comp_write_worker(struct sdeb_store_info *sip, u64 lba, u32 num, rest * lb_size); if (!res) return res; + if (compare_only) + return true; arr += num * lb_size; memcpy(fsp + (block * lb_size), arr, (num - rest) * lb_size); if (rest) @@ -3524,7 +3535,7 @@ static int resp_comp_write(struct scsi_cmnd *scp, sdev_printk(KERN_INFO, scp->device, "%s: compare_write: cdb " "indicated=%u, IO sent=%d bytes\n", my_name, dnum * lb_size, ret); - if (!comp_write_worker(sip, lba, num, arr)) { + if (!comp_write_worker(sip, lba, num, arr, false)) { mk_sense_buffer(scp, MISCOMPARE, MISCOMPARE_VERIFY_ASC, 0); retval = check_condition_result; goto cleanup; @@ -3771,6 +3782,82 @@ static int resp_report_luns(struct scsi_cmnd *scp, return res; } +static int resp_verify(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) +{ + bool is_bytchk3 = false; + u8 bytchk; + int ret, j; + u32 vnum, a_num, off; + const u32 lb_size = sdebug_sector_size; + unsigned long iflags; + u64 lba; + u8 *arr; + u8 *cmd = scp->cmnd; + struct sdeb_store_info *sip = devip2sip(devip); + rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; + + bytchk = (cmd[1] >> 1) & 0x3; + if (bytchk == 0) { + return 0; /* always claim internal verify okay */ + } else if (bytchk == 2) { + mk_sense_invalid_fld(scp, SDEB_IN_CDB, 2, 2); + return check_condition_result; + } else if (bytchk == 3) { + is_bytchk3 = true; /* 1 block sent, compared repeatedly */ + } + switch (cmd[0]) { + case VERIFY_16: + lba = get_unaligned_be64(cmd + 2); + vnum = get_unaligned_be32(cmd + 10); + break; + case VERIFY: /* is VERIFY(10) */ + lba = get_unaligned_be32(cmd + 2); + vnum = get_unaligned_be16(cmd + 7); + break; + default: + mk_sense_invalid_opcode(scp); + return check_condition_result; + } + a_num = is_bytchk3 ? 1 : vnum; + /* Treat following check like one for read (i.e. no write) access */ + ret = check_device_access_params(scp, lba, a_num, false); + if (ret) + return ret; + + arr = kcalloc(lb_size, vnum, GFP_ATOMIC); + if (!arr) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, INSUFF_RES_ASC, + INSUFF_RES_ASCQ); + return check_condition_result; + } + /* Not changing store, so only need read access */ + read_lock_irqsave(macc_lckp, iflags); + + ret = do_dout_fetch(scp, a_num, arr); + if (ret == -1) { + ret = DID_ERROR << 16; + goto cleanup; + } else if (sdebug_verbose && (ret < (a_num * lb_size))) { + sdev_printk(KERN_INFO, scp->device, + "%s: %s: cdb indicated=%u, IO sent=%d bytes\n", + my_name, __func__, a_num * lb_size, ret); + } + if (is_bytchk3) { + for (j = 1, off = lb_size; j < vnum; ++j, off += lb_size) + memcpy(arr + off, arr, lb_size); + } + ret = 0; + if (!comp_write_worker(sip, lba, vnum, arr, true)) { + mk_sense_buffer(scp, MISCOMPARE, MISCOMPARE_VERIFY_ASC, 0); + ret = check_condition_result; + goto cleanup; + } +cleanup: + read_unlock_irqrestore(macc_lckp, iflags); + kfree(arr); + return ret; +} + static struct sdebug_queue *get_queue(struct scsi_cmnd *cmnd) { u32 tag = blk_mq_unique_tag(cmnd->request); From 67da413f26afc7522250bf5c9231f6711a9e7dfd Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Tue, 21 Apr 2020 11:14:20 -0400 Subject: [PATCH 207/562] scsi: scsi_debug: Weaken rwlock around ramdisk access The design of this driver is to do any ramdisk access on the same thread that invoked the queuecommand() call. That is assumed to be user space context. The command duration is implemented by setting the delay with a high resolution timer. The hr timer's callback may well be in interrupt context, but it doesn't touch the ramdisk. So try removing the _irqsave()/_irqrestore() portion on the read-write lock that protects ramdisk access. Link: https://lore.kernel.org/r/20200421151424.32668-5-dgilbert@interlog.com Signed-off-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 47 +++++++++++++++------------------------ 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index da9b5ccf6621..ac8f31328632 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -2740,7 +2740,6 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) u32 num; u32 ei_lba; int ret; - unsigned long iflags; u64 lba; struct sdeb_store_info *sip = devip2sip(devip); rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; @@ -2827,21 +2826,21 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) return check_condition_result; } - read_lock_irqsave(macc_lckp, iflags); + read_lock(macc_lckp); /* DIX + T10 DIF */ if (unlikely(sdebug_dix && scsi_prot_sg_count(scp))) { int prot_ret = prot_verify_read(scp, lba, num, ei_lba); if (prot_ret) { - read_unlock_irqrestore(macc_lckp, iflags); + read_unlock(macc_lckp); mk_sense_buffer(scp, ABORTED_COMMAND, 0x10, prot_ret); return illegal_condition_result; } } ret = do_device_access(sip, scp, 0, lba, num, false); - read_unlock_irqrestore(macc_lckp, iflags); + read_unlock(macc_lckp); if (unlikely(ret == -1)) return DID_ERROR << 16; @@ -3050,7 +3049,6 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) u32 num; u32 ei_lba; int ret; - unsigned long iflags; u64 lba; struct sdeb_store_info *sip = devip2sip(devip); rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; @@ -3110,14 +3108,14 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) ret = check_device_access_params(scp, lba, num, true); if (ret) return ret; - write_lock_irqsave(macc_lckp, iflags); + write_lock(macc_lckp); /* DIX + T10 DIF */ if (unlikely(sdebug_dix && scsi_prot_sg_count(scp))) { int prot_ret = prot_verify_write(scp, lba, num, ei_lba); if (prot_ret) { - write_unlock_irqrestore(macc_lckp, iflags); + write_unlock(macc_lckp); mk_sense_buffer(scp, ILLEGAL_REQUEST, 0x10, prot_ret); return illegal_condition_result; } @@ -3126,7 +3124,7 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) ret = do_device_access(sip, scp, 0, lba, num, true); if (unlikely(scsi_debug_lbp())) map_region(sip, lba, num); - write_unlock_irqrestore(macc_lckp, iflags); + write_unlock(macc_lckp); if (unlikely(-1 == ret)) return DID_ERROR << 16; else if (unlikely(sdebug_verbose && @@ -3175,7 +3173,6 @@ static int resp_write_scat(struct scsi_cmnd *scp, u32 lb_size = sdebug_sector_size; u32 ei_lba; u64 lba; - unsigned long iflags; int ret, res; bool is_16; static const u32 lrd_size = 32; /* + parameter list header size */ @@ -3237,7 +3234,7 @@ static int resp_write_scat(struct scsi_cmnd *scp, goto err_out; } - write_lock_irqsave(macc_lckp, iflags); + write_lock(macc_lckp); sg_off = lbdof_blen; /* Spec says Buffer xfer Length field in number of LBs in dout */ cum_lb = 0; @@ -3320,7 +3317,7 @@ static int resp_write_scat(struct scsi_cmnd *scp, } ret = 0; err_out_unlock: - write_unlock_irqrestore(macc_lckp, iflags); + write_unlock(macc_lckp); err_out: kfree(lrdp); return ret; @@ -3329,7 +3326,6 @@ static int resp_write_scat(struct scsi_cmnd *scp, static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, u32 ei_lba, bool unmap, bool ndob) { - unsigned long iflags; unsigned long long i; u64 block, lbaa; u32 lb_size = sdebug_sector_size; @@ -3344,7 +3340,7 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, if (ret) return ret; - write_lock_irqsave(macc_lckp, iflags); + write_lock(macc_lckp); if (unmap && scsi_debug_lbp()) { unmap_region(sip, lba, num); @@ -3362,7 +3358,7 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, ret = fetch_to_dev_buffer(scp, fs1p, lb_size); if (-1 == ret) { - write_unlock_irqrestore(&sip->macc_lck, iflags); + write_unlock(&sip->macc_lck); return DID_ERROR << 16; } else if (sdebug_verbose && !ndob && (ret < lb_size)) sdev_printk(KERN_INFO, scp->device, @@ -3378,7 +3374,7 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, if (scsi_debug_lbp()) map_region(sip, lba, num); out: - write_unlock_irqrestore(macc_lckp, iflags); + write_unlock(macc_lckp); return 0; } @@ -3496,7 +3492,6 @@ static int resp_comp_write(struct scsi_cmnd *scp, u32 dnum; u32 lb_size = sdebug_sector_size; u8 num; - unsigned long iflags; int ret; int retval = 0; @@ -3525,7 +3520,7 @@ static int resp_comp_write(struct scsi_cmnd *scp, return check_condition_result; } - write_lock_irqsave(macc_lckp, iflags); + write_lock(macc_lckp); ret = do_dout_fetch(scp, dnum, arr); if (ret == -1) { @@ -3543,7 +3538,7 @@ static int resp_comp_write(struct scsi_cmnd *scp, if (scsi_debug_lbp()) map_region(sip, lba, num); cleanup: - write_unlock_irqrestore(macc_lckp, iflags); + write_unlock(macc_lckp); kfree(arr); return retval; } @@ -3562,8 +3557,6 @@ static int resp_unmap(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; unsigned int i, payload_len, descriptors; int ret; - unsigned long iflags; - if (!scsi_debug_lbp()) return 0; /* fib and say its done */ @@ -3590,7 +3583,7 @@ static int resp_unmap(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) desc = (void *)&buf[8]; - write_lock_irqsave(macc_lckp, iflags); + write_lock(macc_lckp); for (i = 0 ; i < descriptors ; i++) { unsigned long long lba = get_unaligned_be64(&desc[i].lba); @@ -3606,7 +3599,7 @@ static int resp_unmap(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) ret = 0; out: - write_unlock_irqrestore(macc_lckp, iflags); + write_unlock(macc_lckp); kfree(buf); return ret; @@ -3789,7 +3782,6 @@ static int resp_verify(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) int ret, j; u32 vnum, a_num, off; const u32 lb_size = sdebug_sector_size; - unsigned long iflags; u64 lba; u8 *arr; u8 *cmd = scp->cmnd; @@ -3831,7 +3823,7 @@ static int resp_verify(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) return check_condition_result; } /* Not changing store, so only need read access */ - read_lock_irqsave(macc_lckp, iflags); + read_lock(macc_lckp); ret = do_dout_fetch(scp, a_num, arr); if (ret == -1) { @@ -3853,7 +3845,7 @@ static int resp_verify(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) goto cleanup; } cleanup: - read_unlock_irqrestore(macc_lckp, iflags); + read_unlock(macc_lckp); kfree(arr); return ret; } @@ -4501,9 +4493,6 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, cmnd->result = pfp != NULL ? pfp(cmnd, devip) : 0; if (cmnd->result & SDEG_RES_IMMED_MASK) { - /* - * This is the F_DELAY_OVERR case. No delay. - */ cmnd->result &= ~SDEG_RES_IMMED_MASK; delta_jiff = ndelay = 0; } @@ -6126,7 +6115,7 @@ static int scsi_debug_queuecommand(struct Scsi_Host *shost, pfp = r_pfp; /* if leaf function ptr NULL, try the root's */ fini: - if (F_DELAY_OVERR & flags) + if (F_DELAY_OVERR & flags) /* cmds like INQUIRY respond asap */ return schedule_resp(scp, devip, errsts, pfp, 0, 0); else if ((flags & F_LONG_DELAY) && (sdebug_jdelay > 0 || sdebug_ndelay > 10000)) { From a2aede970a8e12cadb8be779066cd64e6dd37e82 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Tue, 21 Apr 2020 11:14:21 -0400 Subject: [PATCH 208/562] scsi: scsi_debug: Improve command duration calculation Previously the code did the work implied by the given SCSI command and after that it waited for a timer based on the user specified command duration to be exhausted before informing the mid-level that the command was complete. For short command durations, the time to complete the work implied by the SCSI command could be significant compared to the user specified command duration. For example a WRITE of 128 blocks (say 512 bytes each) on a machine that can copy from main memory to main memory at a rate of 10 GB/sec will take around 6.4 microseconds to do that copy. If the user specified a command duration of 5 microseconds (ndelay=5000), should the driver do a further delay of 5 microseconds after the copy or return immediately because 6.4 > 5 ? The action prior to this patch was to always do the timer based delay. After this patch, for ndelay values less than 1 millisecond, this driver will complete the command immediately. And in the case where the user specified delay was 7 microseconds, a timer delay of 600 nanoseconds will be set ((7 - 6.4) * 1000). Link: https://lore.kernel.org/r/20200421151424.32668-6-dgilbert@interlog.com Signed-off-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index ac8f31328632..43b96ab6384c 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -4402,6 +4402,8 @@ static void setup_inject(struct sdebug_queue *sqp, sqcp->inj_cmd_abort = !!(SDEBUG_OPT_CMD_ABORT & sdebug_opts); } +#define INCLUSIVE_TIMING_MAX_NS 1000000 /* 1 millisecond */ + /* Complete the processing of the thread that queued a SCSI command to this * driver. It either completes the command by calling cmnd_done() or * schedules a hr timer or work queue then returns 0. Returns @@ -4413,8 +4415,10 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, struct sdebug_dev_info *), int delta_jiff, int ndelay) { - unsigned long iflags; + bool new_sd_dp; int k, num_in_q, qdepth, inject; + unsigned long iflags; + u64 ns_from_boot = 0; struct sdebug_queue *sqp; struct sdebug_queued_cmd *sqcp; struct scsi_device *sdp; @@ -4430,7 +4434,6 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, if (delta_jiff == 0) goto respond_in_thread; - /* schedule the response at a later time if resources permit */ sqp = get_queue(cmnd); spin_lock_irqsave(&sqp->qc_lock, iflags); if (unlikely(atomic_read(&sqp->blocked))) { @@ -4489,8 +4492,15 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, sd_dp = kzalloc(sizeof(*sd_dp), GFP_ATOMIC); if (sd_dp == NULL) return SCSI_MLQUEUE_HOST_BUSY; + new_sd_dp = true; + } else { + new_sd_dp = false; } + if (ndelay > 0 && ndelay < INCLUSIVE_TIMING_MAX_NS) + ns_from_boot = ktime_get_boottime_ns(); + + /* one of the resp_*() response functions is called here */ cmnd->result = pfp != NULL ? pfp(cmnd, devip) : 0; if (cmnd->result & SDEG_RES_IMMED_MASK) { cmnd->result &= ~SDEG_RES_IMMED_MASK; @@ -4521,6 +4531,22 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, } else { /* ndelay has a 4.2 second max */ kt = sdebug_random ? prandom_u32_max((u32)ndelay) : (u32)ndelay; + if (ndelay < INCLUSIVE_TIMING_MAX_NS) { + u64 d = ktime_get_boottime_ns() - ns_from_boot; + + if (kt <= d) { /* elapsed duration >= kt */ + sqcp->a_cmnd = NULL; + atomic_dec(&devip->num_in_q); + clear_bit(k, sqp->in_use_bm); + if (new_sd_dp) + kfree(sd_dp); + /* call scsi_done() from this thread */ + cmnd->scsi_done(cmnd); + return 0; + } + /* otherwise reduce kt by elapsed time */ + kt -= d; + } } if (!sd_dp->init_hrt) { sd_dp->init_hrt = true; @@ -4534,6 +4560,7 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, if (sdebug_statistics) sd_dp->issuing_cpu = raw_smp_processor_id(); sd_dp->defer_t = SDEB_DEFER_HRT; + /* schedule the invocation of scsi_done() for a later time */ hrtimer_start(&sd_dp->hrt, kt, HRTIMER_MODE_REL_PINNED); } else { /* jdelay < 0, use work queue */ if (!sd_dp->init_wq) { From ed9f3e2513f91553cc7197e8739a38a9bdea5303 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Tue, 21 Apr 2020 11:14:22 -0400 Subject: [PATCH 209/562] scsi: scsi_debug: Implement PRE-FETCH commands Many disks implement the SCSI PRE-FETCH commands. One use case might be a disk-to-disk compare, say between disks A and B. Then this sequence of commands might be used: PRE-FETCH(from B, IMMED), READ(from A), VERIFY (BYTCHK=1 on B with data returned from READ). The PRE-FETCH (which returns quickly due to the IMMED) fetches the data from the media into B's cache which should speed the trailing VERIFY command. The next chunk of the compare might be done in parallel, with A and B reversed. The implementation tries to bring the specified range in main memory into the cache(s) associated with this machine's CPU(s) using the prefetch_range() function. Link: https://lore.kernel.org/r/20200421151424.32668-7-dgilbert@interlog.com Signed-off-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 75 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 70 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 43b96ab6384c..153f38aa4f54 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -41,6 +41,7 @@ #include #include #include +#include #include @@ -367,7 +368,8 @@ enum sdeb_opcode_index { SDEB_I_WRITE_SAME = 26, /* 10, 16 */ SDEB_I_SYNC_CACHE = 27, /* 10, 16 */ SDEB_I_COMP_WRITE = 28, - SDEB_I_LAST_ELEMENT = 29, /* keep this last (previous + 1) */ + SDEB_I_PRE_FETCH = 29, /* 10, 16 */ + SDEB_I_LAST_ELEM_P1 = 30, /* keep this last (previous + 1) */ }; @@ -383,7 +385,7 @@ static const unsigned char opcode_ind_arr[256] = { /* 0x20; 0x20->0x3f: 10 byte cdbs */ 0, 0, 0, 0, 0, SDEB_I_READ_CAPACITY, 0, 0, SDEB_I_READ, 0, SDEB_I_WRITE, 0, 0, 0, 0, SDEB_I_VERIFY, - 0, 0, 0, 0, 0, SDEB_I_SYNC_CACHE, 0, 0, + 0, 0, 0, 0, SDEB_I_PRE_FETCH, SDEB_I_SYNC_CACHE, 0, 0, 0, 0, 0, SDEB_I_WRITE_BUFFER, 0, 0, 0, 0, /* 0x40; 0x40->0x5f: 10 byte cdbs */ 0, SDEB_I_WRITE_SAME, SDEB_I_UNMAP, 0, 0, 0, 0, 0, @@ -399,7 +401,7 @@ static const unsigned char opcode_ind_arr[256] = { 0, 0, 0, 0, 0, SDEB_I_ATA_PT, 0, 0, SDEB_I_READ, SDEB_I_COMP_WRITE, SDEB_I_WRITE, 0, 0, 0, 0, SDEB_I_VERIFY, - 0, SDEB_I_SYNC_CACHE, 0, SDEB_I_WRITE_SAME, 0, 0, 0, 0, + SDEB_I_PRE_FETCH, SDEB_I_SYNC_CACHE, 0, SDEB_I_WRITE_SAME, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, SDEB_I_SERV_ACT_IN_16, SDEB_I_SERV_ACT_OUT_16, /* 0xa0; 0xa0->0xbf: 12 byte cdbs */ SDEB_I_REPORT_LUNS, SDEB_I_ATA_PT, 0, SDEB_I_MAINT_IN, @@ -446,6 +448,7 @@ static int resp_write_same_16(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_comp_write(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_write_buffer(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_sync_cache(struct scsi_cmnd *, struct sdebug_dev_info *); +static int resp_pre_fetch(struct scsi_cmnd *, struct sdebug_dev_info *); static int sdebug_do_add_host(bool mk_new_store); static int sdebug_add_host_helper(int per_host_idx); @@ -544,11 +547,17 @@ static const struct opcode_info_t sync_cache_iarr[] = { 0xff, 0xff, 0xff, 0xff, 0x3f, 0xc7} }, /* SYNC_CACHE (16) */ }; +static const struct opcode_info_t pre_fetch_iarr[] = { + {0, 0x90, 0, F_SYNC_DELAY | F_M_ACCESS, resp_pre_fetch, NULL, + {16, 0x2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x3f, 0xc7} }, /* PRE-FETCH (16) */ +}; + /* This array is accessed via SDEB_I_* values. Make sure all are mapped, * plus the terminating elements for logic that scans this table such as * REPORT SUPPORTED OPERATION CODES. */ -static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = { +static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEM_P1 + 1] = { /* 0 */ {0, 0, 0, F_INV_OP | FF_RESPOND, NULL, NULL, /* unknown opcodes */ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, @@ -640,8 +649,12 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = { {0, 0x89, 0, F_D_OUT | FF_MEDIA_IO, resp_comp_write, NULL, {16, 0xf8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0xff, 0x3f, 0xc7} }, /* COMPARE AND WRITE */ + {ARRAY_SIZE(pre_fetch_iarr), 0x34, 0, F_SYNC_DELAY | F_M_ACCESS, + resp_pre_fetch, pre_fetch_iarr, + {10, 0x2, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xc7, 0, 0, + 0, 0, 0, 0} }, /* PRE-FETCH (10) */ -/* 29 */ +/* 30 */ {0xff, 0, 0, 0, NULL, NULL, /* terminating element */ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, }; @@ -756,6 +769,8 @@ static const int illegal_condition_result = static const int device_qfull_result = (DID_OK << 16) | (COMMAND_COMPLETE << 8) | SAM_STAT_TASK_SET_FULL; +static const int condition_met_result = SAM_STAT_CONDITION_MET; + /* Only do the extra work involved in logical block provisioning if one or * more of the lbpu, lbpws or lbpws10 parameters are given and we are doing @@ -3674,6 +3689,56 @@ static int resp_sync_cache(struct scsi_cmnd *scp, return res; } +/* + * Assuming the LBA+num_blocks is not out-of-range, this function will return + * CONDITION MET if the specified blocks will/have fitted in the cache, and + * a GOOD status otherwise. Model a disk with a big cache and yield + * CONDITION MET. Actually tries to bring range in main memory into the + * cache associated with the CPU(s). + */ +static int resp_pre_fetch(struct scsi_cmnd *scp, + struct sdebug_dev_info *devip) +{ + int res = 0; + u64 lba; + u64 block, rest = 0; + u32 nblks; + u8 *cmd = scp->cmnd; + struct sdeb_store_info *sip = devip2sip(devip); + rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; + u8 *fsp = sip ? sip->storep : NULL; + + if (cmd[0] == PRE_FETCH) { /* 10 byte cdb */ + lba = get_unaligned_be32(cmd + 2); + nblks = get_unaligned_be16(cmd + 7); + } else { /* PRE-FETCH(16) */ + lba = get_unaligned_be64(cmd + 2); + nblks = get_unaligned_be32(cmd + 10); + } + if (lba + nblks > sdebug_capacity) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, LBA_OUT_OF_RANGE, 0); + return check_condition_result; + } + if (!fsp) + goto fini; + /* PRE-FETCH spec says nothing about LBP or PI so skip them */ + block = do_div(lba, sdebug_store_sectors); + if (block + nblks > sdebug_store_sectors) + rest = block + nblks - sdebug_store_sectors; + + /* Try to bring the PRE-FETCH range into CPU's cache */ + read_lock(macc_lckp); + prefetch_range(fsp + (sdebug_sector_size * block), + (nblks - rest) * sdebug_sector_size); + if (rest) + prefetch_range(fsp, rest * sdebug_sector_size); + read_unlock(macc_lckp); +fini: + if (cmd[1] & 0x2) + res = SDEG_RES_IMMED_MASK; + return res | condition_met_result; +} + #define RL_BUCKET_ELEMS 8 /* Even though each pseudo target has a REPORT LUNS "well known logical unit" From 5d80707673581f95506cb2457354705ea4b51c46 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Tue, 21 Apr 2020 11:14:23 -0400 Subject: [PATCH 210/562] scsi: scsi_debug: Re-arrange parameters alphabetically This module has a lot of parameters and when searching for one, the author prefers them in alphabetical order. This can lead to somewhat illogical ordering (e.g. inq_product before inq_vendor). However it is not clear what another sensible total logical ordering would be. Link: https://lore.kernel.org/r/20200421151424.32668-8-dgilbert@interlog.com Signed-off-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 153f38aa4f54..a7b90b11d742 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -4683,32 +4683,34 @@ module_param_named(every_nth, sdebug_every_nth, int, S_IRUGO | S_IWUSR); module_param_named(fake_rw, sdebug_fake_rw, int, S_IRUGO | S_IWUSR); module_param_named(guard, sdebug_guard, uint, S_IRUGO); module_param_named(host_lock, sdebug_host_lock, bool, S_IRUGO | S_IWUSR); -module_param_string(inq_vendor, sdebug_inq_vendor_id, - sizeof(sdebug_inq_vendor_id), S_IRUGO|S_IWUSR); module_param_string(inq_product, sdebug_inq_product_id, - sizeof(sdebug_inq_product_id), S_IRUGO|S_IWUSR); + sizeof(sdebug_inq_product_id), S_IRUGO | S_IWUSR); module_param_string(inq_rev, sdebug_inq_product_rev, - sizeof(sdebug_inq_product_rev), S_IRUGO|S_IWUSR); + sizeof(sdebug_inq_product_rev), S_IRUGO | S_IWUSR); +module_param_string(inq_vendor, sdebug_inq_vendor_id, + sizeof(sdebug_inq_vendor_id), S_IRUGO | S_IWUSR); +module_param_named(lbprz, sdebug_lbprz, int, S_IRUGO); module_param_named(lbpu, sdebug_lbpu, int, S_IRUGO); module_param_named(lbpws, sdebug_lbpws, int, S_IRUGO); module_param_named(lbpws10, sdebug_lbpws10, int, S_IRUGO); -module_param_named(lbprz, sdebug_lbprz, int, S_IRUGO); module_param_named(lowest_aligned, sdebug_lowest_aligned, int, S_IRUGO); module_param_named(max_luns, sdebug_max_luns, int, S_IRUGO | S_IWUSR); module_param_named(max_queue, sdebug_max_queue, int, S_IRUGO | S_IWUSR); -module_param_named(medium_error_start, sdebug_medium_error_start, int, S_IRUGO | S_IWUSR); -module_param_named(medium_error_count, sdebug_medium_error_count, int, S_IRUGO | S_IWUSR); +module_param_named(medium_error_count, sdebug_medium_error_count, int, + S_IRUGO | S_IWUSR); +module_param_named(medium_error_start, sdebug_medium_error_start, int, + S_IRUGO | S_IWUSR); module_param_named(ndelay, sdebug_ndelay, int, S_IRUGO | S_IWUSR); module_param_named(no_lun_0, sdebug_no_lun_0, int, S_IRUGO | S_IWUSR); module_param_named(no_uld, sdebug_no_uld, int, S_IRUGO); module_param_named(num_parts, sdebug_num_parts, int, S_IRUGO); module_param_named(num_tgts, sdebug_num_tgts, int, S_IRUGO | S_IWUSR); module_param_named(opt_blks, sdebug_opt_blks, int, S_IRUGO); +module_param_named(opt_xferlen_exp, sdebug_opt_xferlen_exp, int, S_IRUGO); module_param_named(opts, sdebug_opts, int, S_IRUGO | S_IWUSR); module_param_named(per_host_store, sdebug_per_host_store, bool, S_IRUGO | S_IWUSR); module_param_named(physblk_exp, sdebug_physblk_exp, int, S_IRUGO); -module_param_named(opt_xferlen_exp, sdebug_opt_xferlen_exp, int, S_IRUGO); module_param_named(ptype, sdebug_ptype, int, S_IRUGO | S_IWUSR); module_param_named(random, sdebug_random, bool, S_IRUGO | S_IWUSR); module_param_named(removable, sdebug_removable, bool, S_IRUGO | S_IWUSR); @@ -4721,8 +4723,8 @@ module_param_named(unmap_alignment, sdebug_unmap_alignment, int, S_IRUGO); module_param_named(unmap_granularity, sdebug_unmap_granularity, int, S_IRUGO); module_param_named(unmap_max_blocks, sdebug_unmap_max_blocks, int, S_IRUGO); module_param_named(unmap_max_desc, sdebug_unmap_max_desc, int, S_IRUGO); -module_param_named(virtual_gb, sdebug_virtual_gb, int, S_IRUGO | S_IWUSR); module_param_named(uuid_ctl, sdebug_uuid_ctl, int, S_IRUGO); +module_param_named(virtual_gb, sdebug_virtual_gb, int, S_IRUGO | S_IWUSR); module_param_named(vpd_use_hostno, sdebug_vpd_use_hostno, int, S_IRUGO | S_IWUSR); module_param_named(wp, sdebug_wp, bool, S_IRUGO | S_IWUSR); @@ -4734,7 +4736,7 @@ MODULE_DESCRIPTION("SCSI debug adapter driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(SDEBUG_VERSION); -MODULE_PARM_DESC(add_host, "0..127 hosts allowed(def=1)"); +MODULE_PARM_DESC(add_host, "add n hosts, in sysfs if negative remove host(s) (def=1)"); MODULE_PARM_DESC(ato, "application tag ownership: 0=disk 1=host (def=1)"); MODULE_PARM_DESC(cdb_len, "suggest CDB lengths to drivers (def=10)"); MODULE_PARM_DESC(clustering, "when set enables larger transfers (def=0)"); @@ -4747,30 +4749,30 @@ MODULE_PARM_DESC(every_nth, "timeout every nth command(def=0)"); MODULE_PARM_DESC(fake_rw, "fake reads/writes instead of copying (def=0)"); MODULE_PARM_DESC(guard, "protection checksum: 0=crc, 1=ip (def=0)"); MODULE_PARM_DESC(host_lock, "host_lock is ignored (def=0)"); -MODULE_PARM_DESC(inq_vendor, "SCSI INQUIRY vendor string (def=\"Linux\")"); MODULE_PARM_DESC(inq_product, "SCSI INQUIRY product string (def=\"scsi_debug\")"); MODULE_PARM_DESC(inq_rev, "SCSI INQUIRY revision string (def=\"" SDEBUG_VERSION "\")"); +MODULE_PARM_DESC(inq_vendor, "SCSI INQUIRY vendor string (def=\"Linux\")"); +MODULE_PARM_DESC(lbprz, + "on read unmapped LBs return 0 when 1 (def), return 0xff when 2"); MODULE_PARM_DESC(lbpu, "enable LBP, support UNMAP command (def=0)"); MODULE_PARM_DESC(lbpws, "enable LBP, support WRITE SAME(16) with UNMAP bit (def=0)"); MODULE_PARM_DESC(lbpws10, "enable LBP, support WRITE SAME(10) with UNMAP bit (def=0)"); -MODULE_PARM_DESC(lbprz, - "on read unmapped LBs return 0 when 1 (def), return 0xff when 2"); MODULE_PARM_DESC(lowest_aligned, "lowest aligned lba (def=0)"); MODULE_PARM_DESC(max_luns, "number of LUNs per target to simulate(def=1)"); MODULE_PARM_DESC(max_queue, "max number of queued commands (1 to max(def))"); -MODULE_PARM_DESC(medium_error_start, "starting sector number to return MEDIUM error"); MODULE_PARM_DESC(medium_error_count, "count of sectors to return follow on MEDIUM error"); +MODULE_PARM_DESC(medium_error_start, "starting sector number to return MEDIUM error"); MODULE_PARM_DESC(ndelay, "response delay in nanoseconds (def=0 -> ignore)"); MODULE_PARM_DESC(no_lun_0, "no LU number 0 (def=0 -> have lun 0)"); MODULE_PARM_DESC(no_uld, "stop ULD (e.g. sd driver) attaching (def=0))"); MODULE_PARM_DESC(num_parts, "number of partitions(def=0)"); MODULE_PARM_DESC(num_tgts, "number of targets per host to simulate(def=1)"); MODULE_PARM_DESC(opt_blks, "optimal transfer length in blocks (def=1024)"); -MODULE_PARM_DESC(opts, "1->noise, 2->medium_err, 4->timeout, 8->recovered_err... (def=0)"); -MODULE_PARM_DESC(per_host_store, "If set, next positive add_host will get new store"); -MODULE_PARM_DESC(physblk_exp, "physical block exponent (def=0)"); MODULE_PARM_DESC(opt_xferlen_exp, "optimal transfer length granularity exponent (def=physblk_exp)"); +MODULE_PARM_DESC(opts, "1->noise, 2->medium_err, 4->timeout, 8->recovered_err... (def=0)"); +MODULE_PARM_DESC(per_host_store, "If set, next positive add_host will get new store (def=0)"); +MODULE_PARM_DESC(physblk_exp, "physical block exponent (def=0)"); MODULE_PARM_DESC(ptype, "SCSI peripheral type(def=0[disk])"); MODULE_PARM_DESC(random, "If set, uniformly randomize command duration between 0 and delay_in_ns"); MODULE_PARM_DESC(removable, "claim to have removable media (def=0)"); From 48e3bf1631ea3227ba2e16684df18e6843af84c1 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Tue, 21 Apr 2020 11:14:24 -0400 Subject: [PATCH 211/562] scsi: scsi_debug: Bump to version 1.89 The scsi_debug driver version is visible in: /sys/modules/scsi_debug/version and can thus be used by user space programs to alter the features they try to use. Since the per_host_store and zbc/zone options are significant additions, bump the version number to 1.89 . Link: https://lore.kernel.org/r/20200421151424.32668-9-dgilbert@interlog.com Signed-off-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index a7b90b11d742..119226896d43 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -7,7 +7,7 @@ * anything out of the ordinary is seen. * ^^^^^^^^^^^^^^^^^^^^^^^ Original ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * - * Copyright (C) 2001 - 2018 Douglas Gilbert + * Copyright (C) 2001 - 2020 Douglas Gilbert * * For documentation see http://sg.danny.cz/sg/sdebug26.html */ @@ -60,8 +60,8 @@ #include "scsi_logging.h" /* make sure inq_product_rev string corresponds to this version */ -#define SDEBUG_VERSION "0188" /* format to fit INQUIRY revision field */ -static const char *sdebug_version_date = "20190125"; +#define SDEBUG_VERSION "0189" /* format to fit INQUIRY revision field */ +static const char *sdebug_version_date = "20200421"; #define MY_NAME "scsi_debug" From d36da3058ced5fc1513df60dc4c4716280c59267 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Wed, 22 Apr 2020 19:42:15 +0900 Subject: [PATCH 212/562] scsi: scsi_debug: Add ZBC mode and VPD pages The ZBC standard "piggy-backs" on many, but not all, of the facilities in SBC. Add those ZBC mode pages (plus mode parameter block descriptors (e.g. "WP")) and VPD pages in common with SBC. Add ZBC specific VPD page for the host-managed ZBC device type (ptype=0x14). Link: https://lore.kernel.org/r/20200422104221.378203-2-damien.lemoal@wdc.com Signed-off-by: Douglas Gilbert Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 55 +++++++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 119226896d43..fc6b51d148c0 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -1445,6 +1445,24 @@ static int inquiry_vpd_b2(unsigned char *arr) return 0x4; } +/* Zoned block device characteristics VPD page (ZBC mandatory) */ +static int inquiry_vpd_b6(unsigned char *arr) +{ + memset(arr, 0, 0x3c); + arr[0] = 0x1; /* set URSWRZ (unrestricted read in seq. wr req zone) */ + /* + * Set Optimal number of open sequential write preferred zones and + * Optimal number of non-sequentially written sequential write + * preferred zones and Maximum number of open sequential write + * required zones fields to 'not reported' (0xffffffff). Leave other + * fields set to zero. + */ + put_unaligned_be32(0xffffffff, &arr[4]); + put_unaligned_be32(0xffffffff, &arr[8]); + put_unaligned_be32(0xffffffff, &arr[12]); + return 0x3c; +} + #define SDEBUG_LONG_INQ_SZ 96 #define SDEBUG_MAX_INQ_ARR_SZ 584 @@ -1454,13 +1472,15 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) unsigned char *arr; unsigned char *cmd = scp->cmnd; int alloc_len, n, ret; - bool have_wlun, is_disk; + bool have_wlun, is_disk, is_zbc, is_disk_zbc; alloc_len = get_unaligned_be16(cmd + 3); arr = kzalloc(SDEBUG_MAX_INQ_ARR_SZ, GFP_ATOMIC); if (! arr) return DID_REQUEUE << 16; is_disk = (sdebug_ptype == TYPE_DISK); + is_zbc = (sdebug_ptype == TYPE_ZBC); + is_disk_zbc = (is_disk || is_zbc); have_wlun = scsi_is_wlun(scp->device->lun); if (have_wlun) pq_pdt = TYPE_WLUN; /* present, wlun */ @@ -1498,11 +1518,14 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) arr[n++] = 0x86; /* extended inquiry */ arr[n++] = 0x87; /* mode page policy */ arr[n++] = 0x88; /* SCSI ports */ - if (is_disk) { /* SBC only */ + if (is_disk_zbc) { /* SBC or ZBC */ arr[n++] = 0x89; /* ATA information */ arr[n++] = 0xb0; /* Block limits */ arr[n++] = 0xb1; /* Block characteristics */ - arr[n++] = 0xb2; /* Logical Block Prov */ + if (is_disk) + arr[n++] = 0xb2; /* LB Provisioning */ + else if (is_zbc) + arr[n++] = 0xb6; /* ZB dev. char. */ } arr[3] = n - 4; /* number of supported VPD pages */ } else if (0x80 == cmd[2]) { /* unit serial number */ @@ -1541,19 +1564,22 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) } else if (0x88 == cmd[2]) { /* SCSI Ports */ arr[1] = cmd[2]; /*sanity */ arr[3] = inquiry_vpd_88(&arr[4], target_dev_id); - } else if (is_disk && 0x89 == cmd[2]) { /* ATA information */ + } else if (is_disk_zbc && 0x89 == cmd[2]) { /* ATA info */ arr[1] = cmd[2]; /*sanity */ n = inquiry_vpd_89(&arr[4]); put_unaligned_be16(n, arr + 2); - } else if (is_disk && 0xb0 == cmd[2]) { /* Block limits */ + } else if (is_disk_zbc && 0xb0 == cmd[2]) { /* Block limits */ arr[1] = cmd[2]; /*sanity */ arr[3] = inquiry_vpd_b0(&arr[4]); - } else if (is_disk && 0xb1 == cmd[2]) { /* Block char. */ + } else if (is_disk_zbc && 0xb1 == cmd[2]) { /* Block char. */ arr[1] = cmd[2]; /*sanity */ arr[3] = inquiry_vpd_b1(&arr[4]); } else if (is_disk && 0xb2 == cmd[2]) { /* LB Prov. */ arr[1] = cmd[2]; /*sanity */ arr[3] = inquiry_vpd_b2(&arr[4]); + } else if (is_zbc && cmd[2] == 0xb6) { /* ZB dev. charact. */ + arr[1] = cmd[2]; /*sanity */ + arr[3] = inquiry_vpd_b6(&arr[4]); } else { mk_sense_invalid_fld(scp, SDEB_IN_CDB, 2, -1); kfree(arr); @@ -1591,6 +1617,9 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) } else if (sdebug_ptype == TYPE_TAPE) { /* SSC-4 rev 3 */ put_unaligned_be16(0x525, arr + n); n += 2; + } else if (is_zbc) { /* ZBC BSR INCITS 536 revision 05 */ + put_unaligned_be16(0x624, arr + n); + n += 2; } put_unaligned_be16(0x2100, arr + n); /* SPL-4 no version claimed */ ret = fill_from_dev_buffer(scp, arr, @@ -2180,7 +2209,7 @@ static int resp_mode_sense(struct scsi_cmnd *scp, unsigned char *ap; unsigned char arr[SDEBUG_MAX_MSENSE_SZ]; unsigned char *cmd = scp->cmnd; - bool dbd, llbaa, msense_6, is_disk, bad_pcode; + bool dbd, llbaa, msense_6, is_disk, is_zbc, bad_pcode; dbd = !!(cmd[1] & 0x8); /* disable block descriptors */ pcontrol = (cmd[2] & 0xc0) >> 6; @@ -2189,7 +2218,8 @@ static int resp_mode_sense(struct scsi_cmnd *scp, msense_6 = (MODE_SENSE == cmd[0]); llbaa = msense_6 ? false : !!(cmd[1] & 0x10); is_disk = (sdebug_ptype == TYPE_DISK); - if (is_disk && !dbd) + is_zbc = (sdebug_ptype == TYPE_ZBC); + if ((is_disk || is_zbc) && !dbd) bd_len = llbaa ? 16 : 8; else bd_len = 0; @@ -2201,8 +2231,8 @@ static int resp_mode_sense(struct scsi_cmnd *scp, } target_dev_id = ((devip->sdbg_host->shost->host_no + 1) * 2000) + (devip->target * 1000) - 3; - /* for disks set DPOFUA bit and clear write protect (WP) bit */ - if (is_disk) { + /* for disks+zbc set DPOFUA bit and clear write protect (WP) bit */ + if (is_disk || is_zbc) { dev_spec = 0x10; /* =0x90 if WP=1 implies read-only */ if (sdebug_wp) dev_spec |= 0x80; @@ -2262,7 +2292,7 @@ static int resp_mode_sense(struct scsi_cmnd *scp, bad_pcode = true; break; case 0x8: /* Caching page, direct access */ - if (is_disk) { + if (is_disk || is_zbc) { len = resp_caching_pg(ap, pcontrol, target); offset += len; } else @@ -2300,6 +2330,9 @@ static int resp_mode_sense(struct scsi_cmnd *scp, target); len += resp_caching_pg(ap + len, pcontrol, target); + } else if (is_zbc) { + len += resp_caching_pg(ap + len, pcontrol, + target); } len += resp_ctrl_m_pg(ap + len, pcontrol, target); len += resp_sas_sf_m_pg(ap + len, pcontrol, target); From f0d1cf9378bd4030725efa4c154cd39383dd0c12 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Wed, 22 Apr 2020 19:42:16 +0900 Subject: [PATCH 213/562] scsi: scsi_debug: Add ZBC zone commands Add support for the 5 ZBC commands and enough functionality to emulate a host-managed device with one conventional zone and a set of sequential write-required zones up to the disk capacity. Link: https://lore.kernel.org/r/20200422104221.378203-3-damien.lemoal@wdc.com Signed-off-by: Douglas Gilbert Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 823 +++++++++++++++++++++++++++++++++++++- 1 file changed, 806 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index fc6b51d148c0..6e999320a7f0 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -94,6 +94,11 @@ static const char *sdebug_version_date = "20200421"; #define MICROCODE_CHANGED_ASCQ 0x1 /* with TARGET_CHANGED_ASC */ #define MICROCODE_CHANGED_WO_RESET_ASCQ 0x16 #define WRITE_ERROR_ASC 0xc +#define UNALIGNED_WRITE_ASCQ 0x4 +#define WRITE_BOUNDARY_ASCQ 0x5 +#define READ_INVDATA_ASCQ 0x6 +#define READ_BOUNDARY_ASCQ 0x7 +#define INSUFF_ZONE_ASCQ 0xe /* Additional Sense Code Qualifier (ASCQ) */ #define ACK_NAK_TO 0x3 @@ -147,6 +152,10 @@ static const char *sdebug_version_date = "20200421"; #define DEF_UUID_CTL 0 #define JDELAY_OVERRIDDEN -9999 +/* Default parameters for ZBC drives */ +#define DEF_ZBC_ZONE_SIZE_MB 128 +#define DEF_ZBC_MAX_OPEN_ZONES 8 + #define SDEBUG_LUN_0_VAL 0 /* bit mask values for sdebug_opts */ @@ -250,6 +259,24 @@ static const char *sdebug_version_date = "20200421"; #define SDEB_XA_NOT_IN_USE XA_MARK_1 +/* enumeration names taken from table 26, zbcr05 */ +enum sdebug_z_cond { + ZBC_NOT_WRITE_POINTER = 0x0, + ZC1_EMPTY = 0x1, + ZC2_IMPLICIT_OPEN = 0x2, + ZC3_EXPLICIT_OPEN = 0x3, + ZC4_CLOSED = 0x4, + ZC6_READ_ONLY = 0xd, + ZC5_FULL = 0xe, + ZC7_OFFLINE = 0xf, +}; + +struct sdeb_zone_state { /* ZBC: per zone state */ + enum sdebug_z_cond z_cond; + unsigned int z_size; + sector_t z_start; + sector_t z_wp; +}; struct sdebug_dev_info { struct list_head dev_list; @@ -262,6 +289,16 @@ struct sdebug_dev_info { atomic_t num_in_q; atomic_t stopped; bool used; + + /* For ZBC devices */ + unsigned int zsize; + unsigned int zsize_shift; + unsigned int nr_zones; + unsigned int nr_imp_open; + unsigned int nr_exp_open; + unsigned int nr_closed; + unsigned int max_open; + struct sdeb_zone_state *zstate; }; struct sdebug_host_info { @@ -369,7 +406,9 @@ enum sdeb_opcode_index { SDEB_I_SYNC_CACHE = 27, /* 10, 16 */ SDEB_I_COMP_WRITE = 28, SDEB_I_PRE_FETCH = 29, /* 10, 16 */ - SDEB_I_LAST_ELEM_P1 = 30, /* keep this last (previous + 1) */ + SDEB_I_ZONE_OUT = 30, /* 0x94+SA; includes no data xfer */ + SDEB_I_ZONE_IN = 31, /* 0x95+SA; all have data-in */ + SDEB_I_LAST_ELEM_P1 = 32, /* keep this last (previous + 1) */ }; @@ -401,7 +440,8 @@ static const unsigned char opcode_ind_arr[256] = { 0, 0, 0, 0, 0, SDEB_I_ATA_PT, 0, 0, SDEB_I_READ, SDEB_I_COMP_WRITE, SDEB_I_WRITE, 0, 0, 0, 0, SDEB_I_VERIFY, - SDEB_I_PRE_FETCH, SDEB_I_SYNC_CACHE, 0, SDEB_I_WRITE_SAME, 0, 0, 0, 0, + SDEB_I_PRE_FETCH, SDEB_I_SYNC_CACHE, 0, SDEB_I_WRITE_SAME, + SDEB_I_ZONE_OUT, SDEB_I_ZONE_IN, 0, 0, 0, 0, 0, 0, 0, 0, SDEB_I_SERV_ACT_IN_16, SDEB_I_SERV_ACT_OUT_16, /* 0xa0; 0xa0->0xbf: 12 byte cdbs */ SDEB_I_REPORT_LUNS, SDEB_I_ATA_PT, 0, SDEB_I_MAINT_IN, @@ -449,6 +489,11 @@ static int resp_comp_write(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_write_buffer(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_sync_cache(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_pre_fetch(struct scsi_cmnd *, struct sdebug_dev_info *); +static int resp_report_zones(struct scsi_cmnd *, struct sdebug_dev_info *); +static int resp_open_zone(struct scsi_cmnd *, struct sdebug_dev_info *); +static int resp_close_zone(struct scsi_cmnd *, struct sdebug_dev_info *); +static int resp_finish_zone(struct scsi_cmnd *, struct sdebug_dev_info *); +static int resp_rwp_zone(struct scsi_cmnd *, struct sdebug_dev_info *); static int sdebug_do_add_host(bool mk_new_store); static int sdebug_add_host_helper(int per_host_idx); @@ -553,6 +598,24 @@ static const struct opcode_info_t pre_fetch_iarr[] = { 0xff, 0xff, 0xff, 0xff, 0x3f, 0xc7} }, /* PRE-FETCH (16) */ }; +static const struct opcode_info_t zone_out_iarr[] = { /* ZONE OUT(16) */ + {0, 0x94, 0x1, F_SA_LOW, resp_close_zone, NULL, + {16, 0x1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0, 0, 0xff, 0xff, 0x1, 0xc7} }, /* CLOSE ZONE */ + {0, 0x94, 0x2, F_SA_LOW, resp_finish_zone, NULL, + {16, 0x2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0, 0, 0xff, 0xff, 0x1, 0xc7} }, /* FINISH ZONE */ + {0, 0x94, 0x4, F_SA_LOW, resp_rwp_zone, NULL, + {16, 0x4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0, 0, 0xff, 0xff, 0x1, 0xc7} }, /* RESET WRITE POINTER */ +}; + +static const struct opcode_info_t zone_in_iarr[] = { /* ZONE IN(16) */ + {0, 0x95, 0x6, F_SA_LOW | F_D_IN, NULL, NULL, + {16, 0x6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x3f, 0xc7} }, /* REPORT ZONES */ +}; + /* This array is accessed via SDEB_I_* values. Make sure all are mapped, * plus the terminating elements for logic that scans this table such as @@ -655,6 +718,15 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEM_P1 + 1] = { 0, 0, 0, 0} }, /* PRE-FETCH (10) */ /* 30 */ + {ARRAY_SIZE(zone_out_iarr), 0x94, 0x3, F_SA_LOW, + resp_open_zone, zone_out_iarr, /* ZONE_OUT(16), OPEN ZONE) */ + {16, 0x3 /* SA */, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x0, 0x0, 0xff, 0xff, 0x1, 0xc7} }, + {ARRAY_SIZE(zone_in_iarr), 0x95, 0x0, F_SA_LOW | F_D_IN, + resp_report_zones, zone_in_iarr, /* ZONE_IN(16), REPORT ZONES) */ + {16, 0x0 /* SA */, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xbf, 0xc7} }, +/* sentinel */ {0xff, 0, 0, 0, NULL, NULL, /* terminating element */ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, }; @@ -742,6 +814,11 @@ static int dix_writes; static int dix_reads; static int dif_errors; +/* ZBC global data */ +static bool sdeb_zbc_in_use; /* true when ptype=TYPE_ZBC [0x14] */ +static const int zbc_zone_size_mb; +static const int zbc_max_open_zones = DEF_ZBC_MAX_OPEN_ZONES; + static int submit_queues = DEF_SUBMIT_QUEUES; /* > 1 for multi-queue (mq) */ static struct sdebug_queue *sdebug_q_arr; /* ptr to array of submit queues */ @@ -1446,20 +1523,22 @@ static int inquiry_vpd_b2(unsigned char *arr) } /* Zoned block device characteristics VPD page (ZBC mandatory) */ -static int inquiry_vpd_b6(unsigned char *arr) +static int inquiry_vpd_b6(struct sdebug_dev_info *devip, unsigned char *arr) { memset(arr, 0, 0x3c); arr[0] = 0x1; /* set URSWRZ (unrestricted read in seq. wr req zone) */ /* * Set Optimal number of open sequential write preferred zones and * Optimal number of non-sequentially written sequential write - * preferred zones and Maximum number of open sequential write - * required zones fields to 'not reported' (0xffffffff). Leave other - * fields set to zero. + * preferred zones fields to 'not reported' (0xffffffff). Leave other + * fields set to zero, apart from Max. number of open swrz_s field. */ put_unaligned_be32(0xffffffff, &arr[4]); put_unaligned_be32(0xffffffff, &arr[8]); - put_unaligned_be32(0xffffffff, &arr[12]); + if (devip->max_open) + put_unaligned_be32(devip->max_open, &arr[12]); + else + put_unaligned_be32(0xffffffff, &arr[12]); return 0x3c; } @@ -1579,7 +1658,7 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) arr[3] = inquiry_vpd_b2(&arr[4]); } else if (is_zbc && cmd[2] == 0xb6) { /* ZB dev. charact. */ arr[1] = cmd[2]; /*sanity */ - arr[3] = inquiry_vpd_b6(&arr[4]); + arr[3] = inquiry_vpd_b6(devip, &arr[4]); } else { mk_sense_invalid_fld(scp, SDEB_IN_CDB, 2, -1); kfree(arr); @@ -2550,9 +2629,185 @@ static int resp_log_sense(struct scsi_cmnd *scp, min_t(int, len, SDEBUG_MAX_INQ_ARR_SZ)); } -static inline int check_device_access_params(struct scsi_cmnd *scp, - unsigned long long lba, unsigned int num, bool write) +static inline bool sdebug_dev_is_zoned(struct sdebug_dev_info *devip) { + return devip->nr_zones != 0; +} + +static struct sdeb_zone_state *zbc_zone(struct sdebug_dev_info *devip, + unsigned long long lba) +{ + unsigned int zno; + + if (devip->zsize_shift) + zno = lba >> devip->zsize_shift; + else + zno = lba / devip->zsize; + return &devip->zstate[zno]; +} + +static inline bool zbc_zone_is_conv(struct sdeb_zone_state *zsp) +{ + return zsp->z_cond == ZBC_NOT_WRITE_POINTER; +} + +static void zbc_close_zone(struct sdebug_dev_info *devip, + struct sdeb_zone_state *zsp) +{ + enum sdebug_z_cond zc; + + if (zbc_zone_is_conv(zsp)) + return; + + zc = zsp->z_cond; + if (!(zc == ZC2_IMPLICIT_OPEN || zc == ZC3_EXPLICIT_OPEN)) + return; + + if (zc == ZC2_IMPLICIT_OPEN) + devip->nr_imp_open--; + else + devip->nr_exp_open--; + + if (zsp->z_wp == zsp->z_start) { + zsp->z_cond = ZC1_EMPTY; + } else { + zsp->z_cond = ZC4_CLOSED; + devip->nr_closed++; + } +} + +static void zbc_close_imp_open_zone(struct sdebug_dev_info *devip) +{ + struct sdeb_zone_state *zsp = &devip->zstate[0]; + unsigned int i; + + for (i = 0; i < devip->nr_zones; i++, zsp++) { + if (zsp->z_cond == ZC2_IMPLICIT_OPEN) { + zbc_close_zone(devip, zsp); + return; + } + } +} + +static void zbc_open_zone(struct sdebug_dev_info *devip, + struct sdeb_zone_state *zsp, bool explicit) +{ + enum sdebug_z_cond zc; + + if (zbc_zone_is_conv(zsp)) + return; + + zc = zsp->z_cond; + if ((explicit && zc == ZC3_EXPLICIT_OPEN) || + (!explicit && zc == ZC2_IMPLICIT_OPEN)) + return; + + /* Close an implicit open zone if necessary */ + if (explicit && zsp->z_cond == ZC2_IMPLICIT_OPEN) + zbc_close_zone(devip, zsp); + else if (devip->max_open && + devip->nr_imp_open + devip->nr_exp_open >= devip->max_open) + zbc_close_imp_open_zone(devip); + + if (zsp->z_cond == ZC4_CLOSED) + devip->nr_closed--; + if (explicit) { + zsp->z_cond = ZC3_EXPLICIT_OPEN; + devip->nr_exp_open++; + } else { + zsp->z_cond = ZC2_IMPLICIT_OPEN; + devip->nr_imp_open++; + } +} + +static void zbc_inc_wp(struct sdebug_dev_info *devip, + unsigned long long lba, unsigned int num) +{ + struct sdeb_zone_state *zsp = zbc_zone(devip, lba); + + if (zbc_zone_is_conv(zsp)) + return; + + zsp->z_wp += num; + if (zsp->z_wp >= zsp->z_start + zsp->z_size) + zsp->z_cond = ZC5_FULL; +} + +static int check_zbc_access_params(struct scsi_cmnd *scp, + unsigned long long lba, unsigned int num, bool write) +{ + struct scsi_device *sdp = scp->device; + struct sdebug_dev_info *devip = (struct sdebug_dev_info *)sdp->hostdata; + struct sdeb_zone_state *zsp = zbc_zone(devip, lba); + struct sdeb_zone_state *zsp_end = zbc_zone(devip, lba + num - 1); + + if (!write) { + /* Reads cannot cross zone types boundaries */ + if (zsp_end != zsp && + zbc_zone_is_conv(zsp) && + !zbc_zone_is_conv(zsp_end)) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, + LBA_OUT_OF_RANGE, + READ_INVDATA_ASCQ); + return check_condition_result; + } + return 0; + } + + /* No restrictions for writes within conventional zones */ + if (zbc_zone_is_conv(zsp)) { + if (!zbc_zone_is_conv(zsp_end)) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, + LBA_OUT_OF_RANGE, + WRITE_BOUNDARY_ASCQ); + return check_condition_result; + } + return 0; + } + + /* Writes cannot cross sequential zone boundaries */ + if (zsp_end != zsp) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, + LBA_OUT_OF_RANGE, + WRITE_BOUNDARY_ASCQ); + return check_condition_result; + } + /* Cannot write full zones */ + if (zsp->z_cond == ZC5_FULL) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, + INVALID_FIELD_IN_CDB, 0); + return check_condition_result; + } + /* Writes must be aligned to the zone WP */ + if (lba != zsp->z_wp) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, + LBA_OUT_OF_RANGE, + UNALIGNED_WRITE_ASCQ); + return check_condition_result; + } + + /* Handle implicit open of closed and empty zones */ + if (zsp->z_cond == ZC1_EMPTY || zsp->z_cond == ZC4_CLOSED) { + if (devip->max_open && + devip->nr_exp_open >= devip->max_open) { + mk_sense_buffer(scp, DATA_PROTECT, + INSUFF_RES_ASC, + INSUFF_ZONE_ASCQ); + return check_condition_result; + } + zbc_open_zone(devip, zsp, false); + } + + return 0; +} + +static inline int check_device_access_params + (struct scsi_cmnd *scp, unsigned long long lba, + unsigned int num, bool write) +{ + struct scsi_device *sdp = scp->device; + struct sdebug_dev_info *devip = (struct sdebug_dev_info *)sdp->hostdata; + if (lba + num > sdebug_capacity) { mk_sense_buffer(scp, ILLEGAL_REQUEST, LBA_OUT_OF_RANGE, 0); return check_condition_result; @@ -2567,6 +2822,9 @@ static inline int check_device_access_params(struct scsi_cmnd *scp, mk_sense_buffer(scp, DATA_PROTECT, WRITE_PROTECTED, 0x2); return check_condition_result; } + if (sdebug_dev_is_zoned(devip)) + return check_zbc_access_params(scp, lba, num, write); + return 0; } @@ -3153,10 +3411,13 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) sdev_printk(KERN_ERR, scp->device, "Unprotected WR " "to DIF device\n"); } - ret = check_device_access_params(scp, lba, num, true); - if (ret) - return ret; + write_lock(macc_lckp); + ret = check_device_access_params(scp, lba, num, true); + if (ret) { + write_unlock(macc_lckp); + return ret; + } /* DIX + T10 DIF */ if (unlikely(sdebug_dix && scsi_prot_sg_count(scp))) { @@ -3172,6 +3433,9 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) ret = do_device_access(sip, scp, 0, lba, num, true); if (unlikely(scsi_debug_lbp())) map_region(sip, lba, num); + /* If ZBC zone then bump its write pointer */ + if (sdebug_dev_is_zoned(devip)) + zbc_inc_wp(devip, lba, num); write_unlock(macc_lckp); if (unlikely(-1 == ret)) return DID_ERROR << 16; @@ -3326,6 +3590,9 @@ static int resp_write_scat(struct scsi_cmnd *scp, } ret = do_device_access(sip, scp, sg_off, lba, num, true); + /* If ZBC zone then bump its write pointer */ + if (sdebug_dev_is_zoned(devip)) + zbc_inc_wp(devip, lba, num); if (unlikely(scsi_debug_lbp())) map_region(sip, lba, num); if (unlikely(-1 == ret)) { @@ -3374,6 +3641,8 @@ static int resp_write_scat(struct scsi_cmnd *scp, static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, u32 ei_lba, bool unmap, bool ndob) { + struct scsi_device *sdp = scp->device; + struct sdebug_dev_info *devip = (struct sdebug_dev_info *)sdp->hostdata; unsigned long long i; u64 block, lbaa; u32 lb_size = sdebug_sector_size; @@ -3384,12 +3653,14 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, u8 *fs1p; u8 *fsp; - ret = check_device_access_params(scp, lba, num, true); - if (ret) - return ret; - write_lock(macc_lckp); + ret = check_device_access_params(scp, lba, num, true); + if (ret) { + write_unlock(macc_lckp); + return ret; + } + if (unmap && scsi_debug_lbp()) { unmap_region(sip, lba, num); goto out; @@ -3421,6 +3692,9 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, } if (scsi_debug_lbp()) map_region(sip, lba, num); + /* If ZBC zone then bump its write pointer */ + if (sdebug_dev_is_zoned(devip)) + zbc_inc_wp(devip, lba, num); out: write_unlock(macc_lckp); @@ -3948,6 +4222,426 @@ static int resp_verify(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) return ret; } +#define RZONES_DESC_HD 64 + +/* Report zones depending on start LBA nad reporting options */ +static int resp_report_zones(struct scsi_cmnd *scp, + struct sdebug_dev_info *devip) +{ + unsigned int i, max_zones, rep_max_zones, nrz = 0; + int ret = 0; + u32 alloc_len, rep_opts, rep_len; + bool partial; + u64 lba, zs_lba; + u8 *arr = NULL, *desc; + u8 *cmd = scp->cmnd; + struct sdeb_zone_state *zsp; + struct sdeb_store_info *sip = devip2sip(devip); + rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; + + if (!sdebug_dev_is_zoned(devip)) { + mk_sense_invalid_opcode(scp); + return check_condition_result; + } + zs_lba = get_unaligned_be64(cmd + 2); + alloc_len = get_unaligned_be32(cmd + 10); + rep_opts = cmd[14] & 0x3f; + partial = cmd[14] & 0x80; + + if (zs_lba >= sdebug_capacity) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, LBA_OUT_OF_RANGE, 0); + return check_condition_result; + } + + max_zones = devip->nr_zones - zs_lba / devip->zsize; + rep_max_zones = min((alloc_len - 64) >> ilog2(RZONES_DESC_HD), + max_zones); + + arr = kcalloc(RZONES_DESC_HD, alloc_len, GFP_ATOMIC); + if (!arr) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, INSUFF_RES_ASC, + INSUFF_RES_ASCQ); + return check_condition_result; + } + + read_lock(macc_lckp); + + desc = arr + 64; + for (i = 0; i < max_zones; i++) { + lba = zs_lba + devip->zsize * i; + if (lba > sdebug_capacity) + break; + zsp = zbc_zone(devip, lba); + switch (rep_opts) { + case 0x00: + /* All zones */ + break; + case 0x01: + /* Empty zones */ + if (zsp->z_cond != ZC1_EMPTY) + continue; + break; + case 0x02: + /* Implicit open zones */ + if (zsp->z_cond != ZC2_IMPLICIT_OPEN) + continue; + break; + case 0x03: + /* Explicit open zones */ + if (zsp->z_cond != ZC3_EXPLICIT_OPEN) + continue; + break; + case 0x04: + /* Closed zones */ + if (zsp->z_cond != ZC4_CLOSED) + continue; + break; + case 0x05: + /* Full zones */ + if (zsp->z_cond != ZC5_FULL) + continue; + break; + case 0x06: + case 0x07: + case 0x10: + case 0x11: + /* + * Read-only, offline, reset WP recommended and + * non-seq-resource-used are not emulated: no zones + * to report; + */ + continue; + case 0x3f: + /* Not write pointer (conventional) zones */ + if (!zbc_zone_is_conv(zsp)) + continue; + break; + default: + mk_sense_buffer(scp, ILLEGAL_REQUEST, + INVALID_FIELD_IN_CDB, 0); + ret = check_condition_result; + goto fini; + } + + if (nrz < rep_max_zones) { + /* Fill zone descriptor */ + if (zbc_zone_is_conv(zsp)) + desc[0] = 0x1; + else + desc[0] = 0x2; + desc[1] = zsp->z_cond << 4; + put_unaligned_be64((u64)zsp->z_size, desc + 8); + put_unaligned_be64((u64)zsp->z_start, desc + 16); + put_unaligned_be64((u64)zsp->z_wp, desc + 24); + desc += 64; + } + + if (partial && nrz >= rep_max_zones) + break; + + nrz++; + } + + /* Report header */ + put_unaligned_be32(nrz * RZONES_DESC_HD, arr + 0); + put_unaligned_be64(sdebug_capacity - 1, arr + 8); + + rep_len = (unsigned long)desc - (unsigned long)arr; + ret = fill_from_dev_buffer(scp, arr, min_t(int, alloc_len, rep_len)); + +fini: + read_unlock(macc_lckp); + kfree(arr); + return ret; +} + +/* Logic transplanted from tcmu-runner, file_zbc.c */ +static void zbc_open_all(struct sdebug_dev_info *devip) +{ + struct sdeb_zone_state *zsp = &devip->zstate[0]; + unsigned int i; + + for (i = 0; i < devip->nr_zones; i++, zsp++) { + if (zsp->z_cond == ZC4_CLOSED) + zbc_open_zone(devip, &devip->zstate[i], true); + } +} + +static int resp_open_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) +{ + int res = 0; + u64 z_id; + enum sdebug_z_cond zc; + u8 *cmd = scp->cmnd; + struct sdeb_zone_state *zsp; + bool all = cmd[14] & 0x01; + struct sdeb_store_info *sip = devip2sip(devip); + rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; + + if (!sdebug_dev_is_zoned(devip)) { + mk_sense_invalid_opcode(scp); + return check_condition_result; + } + + write_lock(macc_lckp); + + if (all) { + /* Check if all closed zones can be open */ + if (devip->max_open && + devip->nr_exp_open + devip->nr_closed > devip->max_open) { + mk_sense_buffer(scp, DATA_PROTECT, INSUFF_RES_ASC, + INSUFF_ZONE_ASCQ); + res = check_condition_result; + goto fini; + } + /* Open all closed zones */ + zbc_open_all(devip); + goto fini; + } + + /* Open the specified zone */ + z_id = get_unaligned_be64(cmd + 2); + if (z_id >= sdebug_capacity) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, LBA_OUT_OF_RANGE, 0); + res = check_condition_result; + goto fini; + } + + zsp = zbc_zone(devip, z_id); + if (z_id != zsp->z_start) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0); + res = check_condition_result; + goto fini; + } + if (zbc_zone_is_conv(zsp)) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0); + res = check_condition_result; + goto fini; + } + + zc = zsp->z_cond; + if (zc == ZC3_EXPLICIT_OPEN || zc == ZC5_FULL) + goto fini; + + if (devip->max_open && devip->nr_exp_open >= devip->max_open) { + mk_sense_buffer(scp, DATA_PROTECT, INSUFF_RES_ASC, + INSUFF_ZONE_ASCQ); + res = check_condition_result; + goto fini; + } + + if (zc == ZC2_IMPLICIT_OPEN) + zbc_close_zone(devip, zsp); + zbc_open_zone(devip, zsp, true); +fini: + write_unlock(macc_lckp); + return res; +} + +static void zbc_close_all(struct sdebug_dev_info *devip) +{ + unsigned int i; + + for (i = 0; i < devip->nr_zones; i++) + zbc_close_zone(devip, &devip->zstate[i]); +} + +static int resp_close_zone(struct scsi_cmnd *scp, + struct sdebug_dev_info *devip) +{ + int res = 0; + u64 z_id; + u8 *cmd = scp->cmnd; + struct sdeb_zone_state *zsp; + bool all = cmd[14] & 0x01; + struct sdeb_store_info *sip = devip2sip(devip); + rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; + + if (!sdebug_dev_is_zoned(devip)) { + mk_sense_invalid_opcode(scp); + return check_condition_result; + } + + write_lock(macc_lckp); + + if (all) { + zbc_close_all(devip); + goto fini; + } + + /* Close specified zone */ + z_id = get_unaligned_be64(cmd + 2); + if (z_id >= sdebug_capacity) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, LBA_OUT_OF_RANGE, 0); + res = check_condition_result; + goto fini; + } + + zsp = zbc_zone(devip, z_id); + if (z_id != zsp->z_start) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0); + res = check_condition_result; + goto fini; + } + if (zbc_zone_is_conv(zsp)) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0); + res = check_condition_result; + goto fini; + } + + zbc_close_zone(devip, zsp); +fini: + write_unlock(macc_lckp); + return res; +} + +static void zbc_finish_zone(struct sdebug_dev_info *devip, + struct sdeb_zone_state *zsp, bool empty) +{ + enum sdebug_z_cond zc = zsp->z_cond; + + if (zc == ZC4_CLOSED || zc == ZC2_IMPLICIT_OPEN || + zc == ZC3_EXPLICIT_OPEN || (empty && zc == ZC1_EMPTY)) { + if (zc == ZC2_IMPLICIT_OPEN || zc == ZC3_EXPLICIT_OPEN) + zbc_close_zone(devip, zsp); + if (zsp->z_cond == ZC4_CLOSED) + devip->nr_closed--; + zsp->z_wp = zsp->z_start + zsp->z_size; + zsp->z_cond = ZC5_FULL; + } +} + +static void zbc_finish_all(struct sdebug_dev_info *devip) +{ + unsigned int i; + + for (i = 0; i < devip->nr_zones; i++) + zbc_finish_zone(devip, &devip->zstate[i], false); +} + +static int resp_finish_zone(struct scsi_cmnd *scp, + struct sdebug_dev_info *devip) +{ + struct sdeb_zone_state *zsp; + int res = 0; + u64 z_id; + u8 *cmd = scp->cmnd; + bool all = cmd[14] & 0x01; + struct sdeb_store_info *sip = devip2sip(devip); + rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; + + if (!sdebug_dev_is_zoned(devip)) { + mk_sense_invalid_opcode(scp); + return check_condition_result; + } + + write_lock(macc_lckp); + + if (all) { + zbc_finish_all(devip); + goto fini; + } + + /* Finish the specified zone */ + z_id = get_unaligned_be64(cmd + 2); + if (z_id >= sdebug_capacity) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, LBA_OUT_OF_RANGE, 0); + res = check_condition_result; + goto fini; + } + + zsp = zbc_zone(devip, z_id); + if (z_id != zsp->z_start) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0); + res = check_condition_result; + goto fini; + } + if (zbc_zone_is_conv(zsp)) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0); + res = check_condition_result; + goto fini; + } + + zbc_finish_zone(devip, zsp, true); +fini: + write_unlock(macc_lckp); + return res; +} + +static void zbc_rwp_zone(struct sdebug_dev_info *devip, + struct sdeb_zone_state *zsp) +{ + enum sdebug_z_cond zc; + + if (zbc_zone_is_conv(zsp)) + return; + + zc = zsp->z_cond; + if (zc == ZC2_IMPLICIT_OPEN || zc == ZC3_EXPLICIT_OPEN) + zbc_close_zone(devip, zsp); + + if (zsp->z_cond == ZC4_CLOSED) + devip->nr_closed--; + + zsp->z_wp = zsp->z_start; + zsp->z_cond = ZC1_EMPTY; +} + +static void zbc_rwp_all(struct sdebug_dev_info *devip) +{ + unsigned int i; + + for (i = 0; i < devip->nr_zones; i++) + zbc_rwp_zone(devip, &devip->zstate[i]); +} + +static int resp_rwp_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) +{ + struct sdeb_zone_state *zsp; + int res = 0; + u64 z_id; + u8 *cmd = scp->cmnd; + bool all = cmd[14] & 0x01; + struct sdeb_store_info *sip = devip2sip(devip); + rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; + + if (!sdebug_dev_is_zoned(devip)) { + mk_sense_invalid_opcode(scp); + return check_condition_result; + } + + write_lock(macc_lckp); + + if (all) { + zbc_rwp_all(devip); + goto fini; + } + + z_id = get_unaligned_be64(cmd + 2); + if (z_id >= sdebug_capacity) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, LBA_OUT_OF_RANGE, 0); + res = check_condition_result; + goto fini; + } + + zsp = zbc_zone(devip, z_id); + if (z_id != zsp->z_start) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0); + res = check_condition_result; + goto fini; + } + if (zbc_zone_is_conv(zsp)) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0); + res = check_condition_result; + goto fini; + } + + zbc_rwp_zone(devip, zsp); +fini: + write_unlock(macc_lckp); + return res; +} + static struct sdebug_queue *get_queue(struct scsi_cmnd *cmnd) { u32 tag = blk_mq_unique_tag(cmnd->request); @@ -4053,6 +4747,76 @@ static void sdebug_q_cmd_wq_complete(struct work_struct *work) static bool got_shared_uuid; static uuid_t shared_uuid; +static int sdebug_device_create_zones(struct sdebug_dev_info *devip) +{ + struct sdeb_zone_state *zsp; + sector_t capacity = get_sdebug_capacity(); + sector_t zstart = 0; + unsigned int i; + + /* + * Set the zone size: if zbc_zone_size_mb is not set, figure out a + * zone size allowing for at least 4 zones on the device. Otherwise, + * use the specified zone size checking that at least 2 zones can be + * created for the device. + */ + if (!zbc_zone_size_mb) { + devip->zsize = (DEF_ZBC_ZONE_SIZE_MB * SZ_1M) + >> ilog2(sdebug_sector_size); + while (capacity < devip->zsize << 2 && devip->zsize >= 2) + devip->zsize >>= 1; + if (devip->zsize < 2) { + pr_err("Device capacity too small\n"); + return -EINVAL; + } + } else { + devip->zsize = (zbc_zone_size_mb * SZ_1M) + >> ilog2(sdebug_sector_size); + if (devip->zsize >= capacity) { + pr_err("Zone size too large for device capacity\n"); + return -EINVAL; + } + } + + if (is_power_of_2(devip->zsize)) + devip->zsize_shift = ilog2(devip->zsize); + devip->nr_zones = (capacity + devip->zsize - 1) >> devip->zsize_shift; + + /* zbc_max_open_zones can be 0, meaning "not reported" (no limit) */ + if (zbc_max_open_zones >= devip->nr_zones - 1) + devip->max_open = (devip->nr_zones - 1) / 2; + else + devip->max_open = zbc_max_open_zones; + + devip->zstate = kcalloc(devip->nr_zones, + sizeof(struct sdeb_zone_state), GFP_KERNEL); + if (!devip->zstate) + return -ENOMEM; + + for (i = 0; i < devip->nr_zones; i++) { + zsp = &devip->zstate[i]; + + zsp->z_start = zstart; + + if (i == 0) { + zsp->z_cond = ZBC_NOT_WRITE_POINTER; + zsp->z_wp = (sector_t)-1; + } else { + zsp->z_cond = ZC1_EMPTY; + zsp->z_wp = zsp->z_start; + } + + if (zsp->z_start + devip->zsize < capacity) + zsp->z_size = devip->zsize; + else + zsp->z_size = capacity - zsp->z_start; + + zstart += zsp->z_size; + } + + return 0; +} + static struct sdebug_dev_info *sdebug_device_create( struct sdebug_host_info *sdbg_host, gfp_t flags) { @@ -4072,6 +4836,13 @@ static struct sdebug_dev_info *sdebug_device_create( } } devip->sdbg_host = sdbg_host; + if (sdeb_zbc_in_use) { + if (sdebug_device_create_zones(devip)) { + kfree(devip); + return NULL; + } + } + devip->sdbg_host = sdbg_host; list_add_tail(&devip->dev_list, &sdbg_host->dev_info_list); } return devip; @@ -5059,7 +5830,13 @@ static ssize_t ptype_store(struct device_driver *ddp, const char *buf, { int n; + /* Cannot change from or to TYPE_ZBC with sysfs */ + if (sdebug_ptype == TYPE_ZBC) + return -EINVAL; + if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n >= 0)) { + if (n == TYPE_ZBC) + return -EINVAL; sdebug_ptype = n; return count; } @@ -5316,6 +6093,10 @@ static ssize_t virtual_gb_store(struct device_driver *ddp, const char *buf, int n; bool changed; + /* Ignore capacity change for ZBC drives for now */ + if (sdeb_zbc_in_use) + return -ENOTSUPP; + if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n >= 0)) { changed = (sdebug_virtual_gb != n); sdebug_virtual_gb = n; @@ -5706,6 +6487,12 @@ static int __init scsi_debug_init(void) for (k = 0; k < submit_queues; ++k) spin_lock_init(&sdebug_q_arr[k].qc_lock); + /* + * check for host managed zoned block device specified with ptype=0x14. + */ + if (sdebug_ptype == TYPE_ZBC) + sdeb_zbc_in_use = true; + if (sdebug_dev_size_mb < 1) sdebug_dev_size_mb = 1; /* force minimum 1 MB ramdisk */ sz = (unsigned long)sdebug_dev_size_mb * 1048576; @@ -5997,6 +6784,7 @@ static int sdebug_add_host_helper(int per_host_idx) list_for_each_entry_safe(sdbg_devinfo, tmp, &sdbg_host->dev_info_list, dev_list) { list_del(&sdbg_devinfo->dev_list); + kfree(sdbg_devinfo->zstate); kfree(sdbg_devinfo); } kfree(sdbg_host); @@ -6408,6 +7196,7 @@ static int sdebug_driver_remove(struct device *dev) list_for_each_entry_safe(sdbg_devinfo, tmp, &sdbg_host->dev_info_list, dev_list) { list_del(&sdbg_devinfo->dev_list); + kfree(sdbg_devinfo->zstate); kfree(sdbg_devinfo); } From 9267e0eb41fedc2d4b930a90aca17051fa1ef21a Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Wed, 22 Apr 2020 19:42:17 +0900 Subject: [PATCH 214/562] scsi: scsi_debug: Add ZBC module parameter Add the zbc module parameter to take either: 0: none (probably a conventional disk) 1: host-aware 2: host-managed These values are chosen to match 'enum blk_zoned_model' found in include/linux/blkdev.h . Instead of "none", "no" or "0" can be given. Instead of "host-aware", "aware or "1" can be given. Instead of "host-managed", "managed" or "2" can be given. Note: the zbc parameter can only be given at driver/module load time; it cannot be changed via sysfs thereafter. At this time there is no ZBC "host-aware" implementation so that string (or the value '1') results in a modprobe error. Link: https://lore.kernel.org/r/20200422104221.378203-4-damien.lemoal@wdc.com Signed-off-by: Douglas Gilbert Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 81 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 6e999320a7f0..d0ac1f584205 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -113,7 +113,9 @@ static const char *sdebug_version_date = "20200421"; #define DEF_ATO 1 #define DEF_CDB_LEN 10 #define DEF_JDELAY 1 /* if > 0 unit is a jiffy */ +#define DEF_DEV_SIZE_PRE_INIT 0 #define DEF_DEV_SIZE_MB 8 +#define DEF_ZBC_DEV_SIZE_MB 128 #define DEF_DIF 0 #define DEF_DIX 0 #define DEF_PER_HOST_STORE false @@ -736,7 +738,7 @@ static int sdebug_add_host = DEF_NUM_HOST; /* in sysfs this is relative */ static int sdebug_ato = DEF_ATO; static int sdebug_cdb_len = DEF_CDB_LEN; static int sdebug_jdelay = DEF_JDELAY; /* if > 0 then unit is jiffies */ -static int sdebug_dev_size_mb = DEF_DEV_SIZE_MB; +static int sdebug_dev_size_mb = DEF_DEV_SIZE_PRE_INIT; static int sdebug_dif = DEF_DIF; static int sdebug_dix = DEF_DIX; static int sdebug_dsense = DEF_D_SENSE; @@ -785,6 +787,9 @@ static bool have_dif_prot; static bool write_since_sync; static bool sdebug_statistics = DEF_STATISTICS; static bool sdebug_wp; +/* Following enum: 0: no zbc, def; 1: host aware; 2: host managed */ +static enum blk_zoned_model sdeb_zbc_model = BLK_ZONED_NONE; +static char *sdeb_zbc_model_s; static unsigned int sdebug_store_sectors; static sector_t sdebug_capacity; /* in sectors */ @@ -5534,6 +5539,7 @@ module_param_named(vpd_use_hostno, sdebug_vpd_use_hostno, int, module_param_named(wp, sdebug_wp, bool, S_IRUGO | S_IWUSR); module_param_named(write_same_length, sdebug_write_same_length, int, S_IRUGO | S_IWUSR); +module_param_named(zbc, sdeb_zbc_model_s, charp, S_IRUGO); MODULE_AUTHOR("Eric Youngdale + Douglas Gilbert"); MODULE_DESCRIPTION("SCSI debug adapter driver"); @@ -5595,6 +5601,7 @@ MODULE_PARM_DESC(virtual_gb, "virtual gigabyte (GiB) size (def=0 -> use dev_size MODULE_PARM_DESC(vpd_use_hostno, "0 -> dev ids ignore hostno (def=1 -> unique dev ids)"); MODULE_PARM_DESC(wp, "Write Protect (def=0)"); MODULE_PARM_DESC(write_same_length, "Maximum blocks per WRITE SAME cmd (def=0xffff)"); +MODULE_PARM_DESC(zbc, "'none' [0]; 'aware' [1]; 'managed' [2] (def=0). Can have 'host-' prefix"); #define SDEBUG_INFO_LEN 256 static char sdebug_info[SDEBUG_INFO_LEN]; @@ -6357,6 +6364,45 @@ static ssize_t cdb_len_store(struct device_driver *ddp, const char *buf, } static DRIVER_ATTR_RW(cdb_len); +static const char * const zbc_model_strs_a[] = { + [BLK_ZONED_NONE] = "none", + [BLK_ZONED_HA] = "host-aware", + [BLK_ZONED_HM] = "host-managed", +}; + +static const char * const zbc_model_strs_b[] = { + [BLK_ZONED_NONE] = "no", + [BLK_ZONED_HA] = "aware", + [BLK_ZONED_HM] = "managed", +}; + +static const char * const zbc_model_strs_c[] = { + [BLK_ZONED_NONE] = "0", + [BLK_ZONED_HA] = "1", + [BLK_ZONED_HM] = "2", +}; + +static int sdeb_zbc_model_str(const char *cp) +{ + int res = sysfs_match_string(zbc_model_strs_a, cp); + + if (res < 0) { + res = sysfs_match_string(zbc_model_strs_b, cp); + if (res < 0) { + res = sysfs_match_string(zbc_model_strs_c, cp); + if (sdeb_zbc_model < 0) + return -EINVAL; + } + } + return res; +} + +static ssize_t zbc_show(struct device_driver *ddp, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%s\n", + zbc_model_strs_a[sdeb_zbc_model]); +} +static DRIVER_ATTR_RO(zbc); /* Note: The following array creates attribute files in the /sys/bus/pseudo/drivers/scsi_debug directory. The advantage of these @@ -6399,6 +6445,7 @@ static struct attribute *sdebug_drv_attrs[] = { &driver_attr_strict.attr, &driver_attr_uuid_ctl.attr, &driver_attr_cdb_len.attr, + &driver_attr_zbc.attr, NULL, }; ATTRIBUTE_GROUPS(sdebug_drv); @@ -6488,11 +6535,39 @@ static int __init scsi_debug_init(void) spin_lock_init(&sdebug_q_arr[k].qc_lock); /* - * check for host managed zoned block device specified with ptype=0x14. + * check for host managed zoned block device specified with + * ptype=0x14 or zbc=XXX. */ - if (sdebug_ptype == TYPE_ZBC) + if (sdebug_ptype == TYPE_ZBC) { + sdeb_zbc_model = BLK_ZONED_HM; + } else if (sdeb_zbc_model_s && *sdeb_zbc_model_s) { + k = sdeb_zbc_model_str(sdeb_zbc_model_s); + if (k < 0) { + ret = k; + goto free_vm; + } + sdeb_zbc_model = k; + switch (sdeb_zbc_model) { + case BLK_ZONED_NONE: + sdebug_ptype = TYPE_DISK; + break; + case BLK_ZONED_HM: + sdebug_ptype = TYPE_ZBC; + break; + case BLK_ZONED_HA: + default: + pr_err("Invalid ZBC model\n"); + return -EINVAL; + } + } + if (sdeb_zbc_model != BLK_ZONED_NONE) { sdeb_zbc_in_use = true; + if (sdebug_dev_size_mb == DEF_DEV_SIZE_PRE_INIT) + sdebug_dev_size_mb = DEF_ZBC_DEV_SIZE_MB; + } + if (sdebug_dev_size_mb == DEF_DEV_SIZE_PRE_INIT) + sdebug_dev_size_mb = DEF_DEV_SIZE_MB; if (sdebug_dev_size_mb < 1) sdebug_dev_size_mb = 1; /* force minimum 1 MB ramdisk */ sz = (unsigned long)sdebug_dev_size_mb * 1048576; From 380603a5bb83d3c55eee20511ba3091e206b7d99 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 22 Apr 2020 19:42:18 +0900 Subject: [PATCH 215/562] scsi: scsi_debug: Add zone_max_open module parameter Add the zone_max_open module parameters to control the maximum number of open zones of a ZBC device. This parameter is ignored for device types other than 0x14 (zbc=2 case). Link: https://lore.kernel.org/r/20200422104221.378203-5-damien.lemoal@wdc.com Signed-off-by: Douglas Gilbert Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index d0ac1f584205..31e8d0b02747 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -822,7 +822,7 @@ static int dif_errors; /* ZBC global data */ static bool sdeb_zbc_in_use; /* true when ptype=TYPE_ZBC [0x14] */ static const int zbc_zone_size_mb; -static const int zbc_max_open_zones = DEF_ZBC_MAX_OPEN_ZONES; +static int sdeb_zbc_max_open = DEF_ZBC_MAX_OPEN_ZONES; static int submit_queues = DEF_SUBMIT_QUEUES; /* > 1 for multi-queue (mq) */ static struct sdebug_queue *sdebug_q_arr; /* ptr to array of submit queues */ @@ -4788,10 +4788,10 @@ static int sdebug_device_create_zones(struct sdebug_dev_info *devip) devip->nr_zones = (capacity + devip->zsize - 1) >> devip->zsize_shift; /* zbc_max_open_zones can be 0, meaning "not reported" (no limit) */ - if (zbc_max_open_zones >= devip->nr_zones - 1) + if (sdeb_zbc_max_open >= devip->nr_zones - 1) devip->max_open = (devip->nr_zones - 1) / 2; else - devip->max_open = zbc_max_open_zones; + devip->max_open = sdeb_zbc_max_open; devip->zstate = kcalloc(devip->nr_zones, sizeof(struct sdeb_zone_state), GFP_KERNEL); @@ -5540,6 +5540,7 @@ module_param_named(wp, sdebug_wp, bool, S_IRUGO | S_IWUSR); module_param_named(write_same_length, sdebug_write_same_length, int, S_IRUGO | S_IWUSR); module_param_named(zbc, sdeb_zbc_model_s, charp, S_IRUGO); +module_param_named(zone_max_open, sdeb_zbc_max_open, int, S_IRUGO); MODULE_AUTHOR("Eric Youngdale + Douglas Gilbert"); MODULE_DESCRIPTION("SCSI debug adapter driver"); @@ -5602,6 +5603,7 @@ MODULE_PARM_DESC(vpd_use_hostno, "0 -> dev ids ignore hostno (def=1 -> unique de MODULE_PARM_DESC(wp, "Write Protect (def=0)"); MODULE_PARM_DESC(write_same_length, "Maximum blocks per WRITE SAME cmd (def=0xffff)"); MODULE_PARM_DESC(zbc, "'none' [0]; 'aware' [1]; 'managed' [2] (def=0). Can have 'host-' prefix"); +MODULE_PARM_DESC(zone_max_open, "Maximum number of open zones; [0] for no limit (def=auto)"); #define SDEBUG_INFO_LEN 256 static char sdebug_info[SDEBUG_INFO_LEN]; From aa8fecf96b704adfd2ee2b6c76248c1f1cb237ef Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 22 Apr 2020 19:42:19 +0900 Subject: [PATCH 216/562] scsi: scsi_debug: Add zone_nr_conv module parameter Allow controlling the number of conventional zones of a ZBC device with the new zone_nr_conv module parameter. The default value is 1 and the specified value must be less than the total number of zones of the device. This parameter is ignored for device types other than 0x14 (zbc=2 case). Link: https://lore.kernel.org/r/20200422104221.378203-6-damien.lemoal@wdc.com Signed-off-by: Douglas Gilbert Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 31e8d0b02747..76bd679c5af1 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -157,6 +157,7 @@ static const char *sdebug_version_date = "20200421"; /* Default parameters for ZBC drives */ #define DEF_ZBC_ZONE_SIZE_MB 128 #define DEF_ZBC_MAX_OPEN_ZONES 8 +#define DEF_ZBC_NR_CONV_ZONES 1 #define SDEBUG_LUN_0_VAL 0 @@ -296,6 +297,7 @@ struct sdebug_dev_info { unsigned int zsize; unsigned int zsize_shift; unsigned int nr_zones; + unsigned int nr_conv_zones; unsigned int nr_imp_open; unsigned int nr_exp_open; unsigned int nr_closed; @@ -823,6 +825,7 @@ static int dif_errors; static bool sdeb_zbc_in_use; /* true when ptype=TYPE_ZBC [0x14] */ static const int zbc_zone_size_mb; static int sdeb_zbc_max_open = DEF_ZBC_MAX_OPEN_ZONES; +static int sdeb_zbc_nr_conv = DEF_ZBC_NR_CONV_ZONES; static int submit_queues = DEF_SUBMIT_QUEUES; /* > 1 for multi-queue (mq) */ static struct sdebug_queue *sdebug_q_arr; /* ptr to array of submit queues */ @@ -4787,6 +4790,12 @@ static int sdebug_device_create_zones(struct sdebug_dev_info *devip) devip->zsize_shift = ilog2(devip->zsize); devip->nr_zones = (capacity + devip->zsize - 1) >> devip->zsize_shift; + if (sdeb_zbc_nr_conv >= devip->nr_zones) { + pr_err("Number of conventional zones too large\n"); + return -EINVAL; + } + devip->nr_conv_zones = sdeb_zbc_nr_conv; + /* zbc_max_open_zones can be 0, meaning "not reported" (no limit) */ if (sdeb_zbc_max_open >= devip->nr_zones - 1) devip->max_open = (devip->nr_zones - 1) / 2; @@ -4803,7 +4812,7 @@ static int sdebug_device_create_zones(struct sdebug_dev_info *devip) zsp->z_start = zstart; - if (i == 0) { + if (i < devip->nr_conv_zones) { zsp->z_cond = ZBC_NOT_WRITE_POINTER; zsp->z_wp = (sector_t)-1; } else { @@ -5541,6 +5550,7 @@ module_param_named(write_same_length, sdebug_write_same_length, int, S_IRUGO | S_IWUSR); module_param_named(zbc, sdeb_zbc_model_s, charp, S_IRUGO); module_param_named(zone_max_open, sdeb_zbc_max_open, int, S_IRUGO); +module_param_named(zone_nr_conv, sdeb_zbc_nr_conv, int, S_IRUGO); MODULE_AUTHOR("Eric Youngdale + Douglas Gilbert"); MODULE_DESCRIPTION("SCSI debug adapter driver"); @@ -5604,6 +5614,7 @@ MODULE_PARM_DESC(wp, "Write Protect (def=0)"); MODULE_PARM_DESC(write_same_length, "Maximum blocks per WRITE SAME cmd (def=0xffff)"); MODULE_PARM_DESC(zbc, "'none' [0]; 'aware' [1]; 'managed' [2] (def=0). Can have 'host-' prefix"); MODULE_PARM_DESC(zone_max_open, "Maximum number of open zones; [0] for no limit (def=auto)"); +MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones (def=1)"); #define SDEBUG_INFO_LEN 256 static char sdebug_info[SDEBUG_INFO_LEN]; From 98e0a689868c26eb82650ee759073f2295e74c97 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 22 Apr 2020 19:42:20 +0900 Subject: [PATCH 217/562] scsi: scsi_debug: Add zone_size_mb module parameter Add the zone_size_mb module parameters to control the zone size of a ZBC device. If the zone size specified is not a divisor of the device capacity, the last zone of the device will be created as a smaller "runt" zone. This parameter is ignored for device types other than 0x14 (zbc=2 case). Note: for testing purposes, zone sizes that are not a power of 2 are accepted but will result in the drive being rejected by the sd driver. Link: https://lore.kernel.org/r/20200422104221.378203-7-damien.lemoal@wdc.com Signed-off-by: Douglas Gilbert Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 76bd679c5af1..14a65604e176 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -823,7 +823,7 @@ static int dif_errors; /* ZBC global data */ static bool sdeb_zbc_in_use; /* true when ptype=TYPE_ZBC [0x14] */ -static const int zbc_zone_size_mb; +static int sdeb_zbc_zone_size_mb; static int sdeb_zbc_max_open = DEF_ZBC_MAX_OPEN_ZONES; static int sdeb_zbc_nr_conv = DEF_ZBC_NR_CONV_ZONES; @@ -4763,12 +4763,12 @@ static int sdebug_device_create_zones(struct sdebug_dev_info *devip) unsigned int i; /* - * Set the zone size: if zbc_zone_size_mb is not set, figure out a - * zone size allowing for at least 4 zones on the device. Otherwise, + * Set the zone size: if sdeb_zbc_zone_size_mb is not set, figure out + * a zone size allowing for at least 4 zones on the device. Otherwise, * use the specified zone size checking that at least 2 zones can be * created for the device. */ - if (!zbc_zone_size_mb) { + if (!sdeb_zbc_zone_size_mb) { devip->zsize = (DEF_ZBC_ZONE_SIZE_MB * SZ_1M) >> ilog2(sdebug_sector_size); while (capacity < devip->zsize << 2 && devip->zsize >= 2) @@ -4778,7 +4778,7 @@ static int sdebug_device_create_zones(struct sdebug_dev_info *devip) return -EINVAL; } } else { - devip->zsize = (zbc_zone_size_mb * SZ_1M) + devip->zsize = (sdeb_zbc_zone_size_mb * SZ_1M) >> ilog2(sdebug_sector_size); if (devip->zsize >= capacity) { pr_err("Zone size too large for device capacity\n"); @@ -5551,6 +5551,7 @@ module_param_named(write_same_length, sdebug_write_same_length, int, module_param_named(zbc, sdeb_zbc_model_s, charp, S_IRUGO); module_param_named(zone_max_open, sdeb_zbc_max_open, int, S_IRUGO); module_param_named(zone_nr_conv, sdeb_zbc_nr_conv, int, S_IRUGO); +module_param_named(zone_size_mb, sdeb_zbc_zone_size_mb, int, S_IRUGO); MODULE_AUTHOR("Eric Youngdale + Douglas Gilbert"); MODULE_DESCRIPTION("SCSI debug adapter driver"); @@ -5615,6 +5616,7 @@ MODULE_PARM_DESC(write_same_length, "Maximum blocks per WRITE SAME cmd (def=0xff MODULE_PARM_DESC(zbc, "'none' [0]; 'aware' [1]; 'managed' [2] (def=0). Can have 'host-' prefix"); MODULE_PARM_DESC(zone_max_open, "Maximum number of open zones; [0] for no limit (def=auto)"); MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones (def=1)"); +MODULE_PARM_DESC(zone_size_mb, "Zone size in MiB (def=auto)"); #define SDEBUG_INFO_LEN 256 static char sdebug_info[SDEBUG_INFO_LEN]; From 64e14ece07004f0bf434fe7aad4a6d6411b1d9b6 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 22 Apr 2020 19:42:21 +0900 Subject: [PATCH 218/562] scsi: scsi_debug: Implement ZBC host-aware emulation Implement ZBC host-aware device model emulation. The main changes from the host-managed emulation are the device type (TYPE_DISK is used), relaxation of access checks for read and write operations and different handling of a sequential write preferred zone write pointer as mandated by the ZBC r05 specifications. To facilitate the implementation and avoid a lot of "if" statement, the zmodel field is added to the device information and the z_type field to the zone state data structure. Link: https://lore.kernel.org/r/20200422104221.378203-8-damien.lemoal@wdc.com Tested-by: Douglas Gilbert Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 148 ++++++++++++++++++++++++++------------ 1 file changed, 103 insertions(+), 45 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 14a65604e176..d3ea16f3c12e 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -262,6 +262,13 @@ static const char *sdebug_version_date = "20200421"; #define SDEB_XA_NOT_IN_USE XA_MARK_1 +/* Zone types (zbcr05 table 25) */ +enum sdebug_z_type { + ZBC_ZONE_TYPE_CNV = 0x1, + ZBC_ZONE_TYPE_SWR = 0x2, + ZBC_ZONE_TYPE_SWP = 0x3, +}; + /* enumeration names taken from table 26, zbcr05 */ enum sdebug_z_cond { ZBC_NOT_WRITE_POINTER = 0x0, @@ -275,7 +282,9 @@ enum sdebug_z_cond { }; struct sdeb_zone_state { /* ZBC: per zone state */ + enum sdebug_z_type z_type; enum sdebug_z_cond z_cond; + bool z_non_seq_resource; unsigned int z_size; sector_t z_start; sector_t z_wp; @@ -294,6 +303,7 @@ struct sdebug_dev_info { bool used; /* For ZBC devices */ + enum blk_zoned_model zmodel; unsigned int zsize; unsigned int zsize_shift; unsigned int nr_zones; @@ -822,7 +832,7 @@ static int dix_reads; static int dif_errors; /* ZBC global data */ -static bool sdeb_zbc_in_use; /* true when ptype=TYPE_ZBC [0x14] */ +static bool sdeb_zbc_in_use; /* true for host-aware and host-managed disks */ static int sdeb_zbc_zone_size_mb; static int sdeb_zbc_max_open = DEF_ZBC_MAX_OPEN_ZONES; static int sdeb_zbc_nr_conv = DEF_ZBC_NR_CONV_ZONES; @@ -1500,13 +1510,15 @@ static int inquiry_vpd_b0(unsigned char *arr) } /* Block device characteristics VPD page (SBC-3) */ -static int inquiry_vpd_b1(unsigned char *arr) +static int inquiry_vpd_b1(struct sdebug_dev_info *devip, unsigned char *arr) { memset(arr, 0, 0x3c); arr[0] = 0; arr[1] = 1; /* non rotating medium (e.g. solid state) */ arr[2] = 0; arr[3] = 5; /* less than 1.8" */ + if (devip->zmodel == BLK_ZONED_HA) + arr[4] = 1 << 4; /* zoned field = 01b */ return 0x3c; } @@ -1543,7 +1555,7 @@ static int inquiry_vpd_b6(struct sdebug_dev_info *devip, unsigned char *arr) */ put_unaligned_be32(0xffffffff, &arr[4]); put_unaligned_be32(0xffffffff, &arr[8]); - if (devip->max_open) + if (sdeb_zbc_model == BLK_ZONED_HM && devip->max_open) put_unaligned_be32(devip->max_open, &arr[12]); else put_unaligned_be32(0xffffffff, &arr[12]); @@ -1566,7 +1578,7 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) if (! arr) return DID_REQUEUE << 16; is_disk = (sdebug_ptype == TYPE_DISK); - is_zbc = (sdebug_ptype == TYPE_ZBC); + is_zbc = (devip->zmodel != BLK_ZONED_NONE); is_disk_zbc = (is_disk || is_zbc); have_wlun = scsi_is_wlun(scp->device->lun); if (have_wlun) @@ -1611,7 +1623,7 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) arr[n++] = 0xb1; /* Block characteristics */ if (is_disk) arr[n++] = 0xb2; /* LB Provisioning */ - else if (is_zbc) + if (is_zbc) arr[n++] = 0xb6; /* ZB dev. char. */ } arr[3] = n - 4; /* number of supported VPD pages */ @@ -1660,7 +1672,7 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) arr[3] = inquiry_vpd_b0(&arr[4]); } else if (is_disk_zbc && 0xb1 == cmd[2]) { /* Block char. */ arr[1] = cmd[2]; /*sanity */ - arr[3] = inquiry_vpd_b1(&arr[4]); + arr[3] = inquiry_vpd_b1(devip, &arr[4]); } else if (is_disk && 0xb2 == cmd[2]) { /* LB Prov. */ arr[1] = cmd[2]; /*sanity */ arr[3] = inquiry_vpd_b2(&arr[4]); @@ -2305,7 +2317,7 @@ static int resp_mode_sense(struct scsi_cmnd *scp, msense_6 = (MODE_SENSE == cmd[0]); llbaa = msense_6 ? false : !!(cmd[1] & 0x10); is_disk = (sdebug_ptype == TYPE_DISK); - is_zbc = (sdebug_ptype == TYPE_ZBC); + is_zbc = (devip->zmodel != BLK_ZONED_NONE); if ((is_disk || is_zbc) && !dbd) bd_len = llbaa ? 16 : 8; else @@ -2656,7 +2668,7 @@ static struct sdeb_zone_state *zbc_zone(struct sdebug_dev_info *devip, static inline bool zbc_zone_is_conv(struct sdeb_zone_state *zsp) { - return zsp->z_cond == ZBC_NOT_WRITE_POINTER; + return zsp->z_type == ZBC_ZONE_TYPE_CNV; } static void zbc_close_zone(struct sdebug_dev_info *devip, @@ -2732,13 +2744,42 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip, unsigned long long lba, unsigned int num) { struct sdeb_zone_state *zsp = zbc_zone(devip, lba); + unsigned long long n, end, zend = zsp->z_start + zsp->z_size; if (zbc_zone_is_conv(zsp)) return; - zsp->z_wp += num; - if (zsp->z_wp >= zsp->z_start + zsp->z_size) - zsp->z_cond = ZC5_FULL; + if (zsp->z_type == ZBC_ZONE_TYPE_SWR) { + zsp->z_wp += num; + if (zsp->z_wp >= zend) + zsp->z_cond = ZC5_FULL; + return; + } + + while (num) { + if (lba != zsp->z_wp) + zsp->z_non_seq_resource = true; + + end = lba + num; + if (end >= zend) { + n = zend - lba; + zsp->z_wp = zend; + } else if (end > zsp->z_wp) { + n = num; + zsp->z_wp = end; + } else { + n = num; + } + if (zsp->z_wp >= zend) + zsp->z_cond = ZC5_FULL; + + num -= n; + lba += n; + if (num) { + zsp++; + zend = zsp->z_start + zsp->z_size; + } + } } static int check_zbc_access_params(struct scsi_cmnd *scp, @@ -2750,7 +2791,9 @@ static int check_zbc_access_params(struct scsi_cmnd *scp, struct sdeb_zone_state *zsp_end = zbc_zone(devip, lba + num - 1); if (!write) { - /* Reads cannot cross zone types boundaries */ + if (devip->zmodel == BLK_ZONED_HA) + return 0; + /* For host-managed, reads cannot cross zone types boundaries */ if (zsp_end != zsp && zbc_zone_is_conv(zsp) && !zbc_zone_is_conv(zsp_end)) { @@ -2773,25 +2816,27 @@ static int check_zbc_access_params(struct scsi_cmnd *scp, return 0; } - /* Writes cannot cross sequential zone boundaries */ - if (zsp_end != zsp) { - mk_sense_buffer(scp, ILLEGAL_REQUEST, - LBA_OUT_OF_RANGE, - WRITE_BOUNDARY_ASCQ); - return check_condition_result; - } - /* Cannot write full zones */ - if (zsp->z_cond == ZC5_FULL) { - mk_sense_buffer(scp, ILLEGAL_REQUEST, - INVALID_FIELD_IN_CDB, 0); - return check_condition_result; - } - /* Writes must be aligned to the zone WP */ - if (lba != zsp->z_wp) { - mk_sense_buffer(scp, ILLEGAL_REQUEST, - LBA_OUT_OF_RANGE, - UNALIGNED_WRITE_ASCQ); - return check_condition_result; + if (zsp->z_type == ZBC_ZONE_TYPE_SWR) { + /* Writes cannot cross sequential zone boundaries */ + if (zsp_end != zsp) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, + LBA_OUT_OF_RANGE, + WRITE_BOUNDARY_ASCQ); + return check_condition_result; + } + /* Cannot write full zones */ + if (zsp->z_cond == ZC5_FULL) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, + INVALID_FIELD_IN_CDB, 0); + return check_condition_result; + } + /* Writes must be aligned to the zone WP */ + if (lba != zsp->z_wp) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, + LBA_OUT_OF_RANGE, + UNALIGNED_WRITE_ASCQ); + return check_condition_result; + } } /* Handle implicit open of closed and empty zones */ @@ -4312,13 +4357,16 @@ static int resp_report_zones(struct scsi_cmnd *scp, case 0x06: case 0x07: case 0x10: - case 0x11: /* - * Read-only, offline, reset WP recommended and - * non-seq-resource-used are not emulated: no zones - * to report; + * Read-only, offline, reset WP recommended are + * not emulated: no zones to report; */ continue; + case 0x11: + /* non-seq-resource set */ + if (!zsp->z_non_seq_resource) + continue; + break; case 0x3f: /* Not write pointer (conventional) zones */ if (!zbc_zone_is_conv(zsp)) @@ -4333,11 +4381,10 @@ static int resp_report_zones(struct scsi_cmnd *scp, if (nrz < rep_max_zones) { /* Fill zone descriptor */ - if (zbc_zone_is_conv(zsp)) - desc[0] = 0x1; - else - desc[0] = 0x2; + desc[0] = zsp->z_type; desc[1] = zsp->z_cond << 4; + if (zsp->z_non_seq_resource) + desc[1] |= 1 << 1; put_unaligned_be64((u64)zsp->z_size, desc + 8); put_unaligned_be64((u64)zsp->z_start, desc + 16); put_unaligned_be64((u64)zsp->z_wp, desc + 24); @@ -4591,6 +4638,7 @@ static void zbc_rwp_zone(struct sdebug_dev_info *devip, if (zsp->z_cond == ZC4_CLOSED) devip->nr_closed--; + zsp->z_non_seq_resource = false; zsp->z_wp = zsp->z_start; zsp->z_cond = ZC1_EMPTY; } @@ -4796,11 +4844,13 @@ static int sdebug_device_create_zones(struct sdebug_dev_info *devip) } devip->nr_conv_zones = sdeb_zbc_nr_conv; - /* zbc_max_open_zones can be 0, meaning "not reported" (no limit) */ - if (sdeb_zbc_max_open >= devip->nr_zones - 1) - devip->max_open = (devip->nr_zones - 1) / 2; - else - devip->max_open = sdeb_zbc_max_open; + if (devip->zmodel == BLK_ZONED_HM) { + /* zbc_max_open_zones can be 0, meaning "not reported" */ + if (sdeb_zbc_max_open >= devip->nr_zones - 1) + devip->max_open = (devip->nr_zones - 1) / 2; + else + devip->max_open = sdeb_zbc_max_open; + } devip->zstate = kcalloc(devip->nr_zones, sizeof(struct sdeb_zone_state), GFP_KERNEL); @@ -4813,9 +4863,14 @@ static int sdebug_device_create_zones(struct sdebug_dev_info *devip) zsp->z_start = zstart; if (i < devip->nr_conv_zones) { + zsp->z_type = ZBC_ZONE_TYPE_CNV; zsp->z_cond = ZBC_NOT_WRITE_POINTER; zsp->z_wp = (sector_t)-1; } else { + if (devip->zmodel == BLK_ZONED_HM) + zsp->z_type = ZBC_ZONE_TYPE_SWR; + else + zsp->z_type = ZBC_ZONE_TYPE_SWP; zsp->z_cond = ZC1_EMPTY; zsp->z_wp = zsp->z_start; } @@ -4851,10 +4906,13 @@ static struct sdebug_dev_info *sdebug_device_create( } devip->sdbg_host = sdbg_host; if (sdeb_zbc_in_use) { + devip->zmodel = sdeb_zbc_model; if (sdebug_device_create_zones(devip)) { kfree(devip); return NULL; } + } else { + devip->zmodel = BLK_ZONED_NONE; } devip->sdbg_host = sdbg_host; list_add_tail(&devip->dev_list, &sdbg_host->dev_info_list); @@ -6564,12 +6622,12 @@ static int __init scsi_debug_init(void) sdeb_zbc_model = k; switch (sdeb_zbc_model) { case BLK_ZONED_NONE: + case BLK_ZONED_HA: sdebug_ptype = TYPE_DISK; break; case BLK_ZONED_HM: sdebug_ptype = TYPE_ZBC; break; - case BLK_ZONED_HA: default: pr_err("Invalid ZBC model\n"); return -EINVAL; From fef2d3bb2db61d904e4016a181dd107d608fd055 Mon Sep 17 00:00:00 2001 From: Mian Yousaf Kaukab Date: Tue, 5 May 2020 10:45:01 +0200 Subject: [PATCH 219/562] gpio: tegra186: export MODULE_DEVICE_TABLE Export MODULE_DEVICE_TABLE since the driver can be built as a module. Signed-off-by: Mian Yousaf Kaukab Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-tegra186.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c index 79b553dc39a3..178e9128ded0 100644 --- a/drivers/gpio/gpio-tegra186.c +++ b/drivers/gpio/gpio-tegra186.c @@ -894,6 +894,7 @@ static const struct of_device_id tegra186_gpio_of_match[] = { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, tegra186_gpio_of_match); static struct platform_driver tegra186_gpio_driver = { .driver = { From 17f96ee2b9be9b65517fe167cfb13b86b33f3a1d Mon Sep 17 00:00:00 2001 From: Petteri Jokinen Date: Sat, 2 May 2020 20:22:39 +0300 Subject: [PATCH 220/562] gpio-f7188x: Add GPIO support for F81865 Add GPIO support for Fintek F81865 chip. Datasheet: http://www.hardwaresecrets.com/datasheets/F81865_V028P.pdf Signed-off-by: Petteri Jokinen Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-f7188x.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-f7188x.c b/drivers/gpio/gpio-f7188x.c index cadd02993539..18a3147f5a42 100644 --- a/drivers/gpio/gpio-f7188x.c +++ b/drivers/gpio/gpio-f7188x.c @@ -36,9 +36,19 @@ #define SIO_F71889A_ID 0x1005 /* F71889A chipset ID */ #define SIO_F81866_ID 0x1010 /* F81866 chipset ID */ #define SIO_F81804_ID 0x1502 /* F81804 chipset ID, same for f81966 */ +#define SIO_F81865_ID 0x0704 /* F81865 chipset ID */ -enum chips { f71869, f71869a, f71882fg, f71889a, f71889f, f81866, f81804 }; +enum chips { + f71869, + f71869a, + f71882fg, + f71889a, + f71889f, + f81866, + f81804, + f81865, +}; static const char * const f7188x_names[] = { "f71869", @@ -48,6 +58,7 @@ static const char * const f7188x_names[] = { "f71889f", "f81866", "f81804", + "f81865", }; struct f7188x_sio { @@ -233,6 +244,15 @@ static struct f7188x_gpio_bank f81804_gpio_bank[] = { F7188X_GPIO_BANK(90, 8, 0x98), }; +static struct f7188x_gpio_bank f81865_gpio_bank[] = { + F7188X_GPIO_BANK(0, 8, 0xF0), + F7188X_GPIO_BANK(10, 8, 0xE0), + F7188X_GPIO_BANK(20, 8, 0xD0), + F7188X_GPIO_BANK(30, 8, 0xC0), + F7188X_GPIO_BANK(40, 8, 0xB0), + F7188X_GPIO_BANK(50, 8, 0xA0), + F7188X_GPIO_BANK(60, 5, 0x90), +}; static int f7188x_gpio_get_direction(struct gpio_chip *chip, unsigned offset) { @@ -425,6 +445,10 @@ static int f7188x_gpio_probe(struct platform_device *pdev) data->nr_bank = ARRAY_SIZE(f81804_gpio_bank); data->bank = f81804_gpio_bank; break; + case f81865: + data->nr_bank = ARRAY_SIZE(f81865_gpio_bank); + data->bank = f81865_gpio_bank; + break; default: return -ENODEV; } @@ -490,6 +514,9 @@ static int __init f7188x_find(int addr, struct f7188x_sio *sio) case SIO_F81804_ID: sio->type = f81804; break; + case SIO_F81865_ID: + sio->type = f81865; + break; default: pr_info(DRVNAME ": Unsupported Fintek device 0x%04x\n", devid); goto err; From 3831c051dfbf58595085e432acc00ad4efcf54cc Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Thu, 30 Apr 2020 08:09:16 +0800 Subject: [PATCH 221/562] tools: gpio: add bias flags to lsgpio Add display of the bias flags. Signed-off-by: Kent Gibson Signed-off-by: Bartosz Golaszewski --- tools/gpio/lsgpio.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c index e1430f504c13..8a71ad36f83b 100644 --- a/tools/gpio/lsgpio.c +++ b/tools/gpio/lsgpio.c @@ -49,6 +49,18 @@ struct gpio_flag flagnames[] = { .name = "open-source", .mask = GPIOLINE_FLAG_OPEN_SOURCE, }, + { + .name = "pull-up", + .mask = GPIOLINE_FLAG_BIAS_PULL_UP, + }, + { + .name = "pull-down", + .mask = GPIOLINE_FLAG_BIAS_PULL_DOWN, + }, + { + .name = "bias-disabled", + .mask = GPIOLINE_FLAG_BIAS_DISABLE, + }, }; void print_flags(unsigned long flags) From 04c349a96506961b1b31e8d03e784fe3c5413e0b Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 13 Apr 2020 16:24:08 +0300 Subject: [PATCH 222/562] RDMA/mad: Remove snoop interface Snoop interface is not used. Remove it. Link: https://lore.kernel.org/r/20200413132408.931084-1-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/mad.c | 236 +--------------------------------- include/rdma/ib_mad.h | 49 +------ 2 files changed, 5 insertions(+), 280 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index c54db13fa9b0..e02b5c4fdf09 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -85,7 +85,6 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests module_param_named(recv_queue_size, mad_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); -/* Client ID 0 is used for snoop-only clients */ static DEFINE_XARRAY_ALLOC1(ib_mad_clients); static u32 ib_mad_client_next; static struct list_head ib_mad_port_list; @@ -483,141 +482,12 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, } EXPORT_SYMBOL(ib_register_mad_agent); -static inline int is_snooping_sends(int mad_snoop_flags) -{ - return (mad_snoop_flags & - (/*IB_MAD_SNOOP_POSTED_SENDS | - IB_MAD_SNOOP_RMPP_SENDS |*/ - IB_MAD_SNOOP_SEND_COMPLETIONS /*| - IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/)); -} - -static inline int is_snooping_recvs(int mad_snoop_flags) -{ - return (mad_snoop_flags & - (IB_MAD_SNOOP_RECVS /*| - IB_MAD_SNOOP_RMPP_RECVS*/)); -} - -static int register_snoop_agent(struct ib_mad_qp_info *qp_info, - struct ib_mad_snoop_private *mad_snoop_priv) -{ - struct ib_mad_snoop_private **new_snoop_table; - unsigned long flags; - int i; - - spin_lock_irqsave(&qp_info->snoop_lock, flags); - /* Check for empty slot in array. */ - for (i = 0; i < qp_info->snoop_table_size; i++) - if (!qp_info->snoop_table[i]) - break; - - if (i == qp_info->snoop_table_size) { - /* Grow table. */ - new_snoop_table = krealloc(qp_info->snoop_table, - sizeof mad_snoop_priv * - (qp_info->snoop_table_size + 1), - GFP_ATOMIC); - if (!new_snoop_table) { - i = -ENOMEM; - goto out; - } - - qp_info->snoop_table = new_snoop_table; - qp_info->snoop_table_size++; - } - qp_info->snoop_table[i] = mad_snoop_priv; - atomic_inc(&qp_info->snoop_count); -out: - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - return i; -} - -struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, - u8 port_num, - enum ib_qp_type qp_type, - int mad_snoop_flags, - ib_mad_snoop_handler snoop_handler, - ib_mad_recv_handler recv_handler, - void *context) -{ - struct ib_mad_port_private *port_priv; - struct ib_mad_agent *ret; - struct ib_mad_snoop_private *mad_snoop_priv; - int qpn; - int err; - - /* Validate parameters */ - if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) || - (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) { - ret = ERR_PTR(-EINVAL); - goto error1; - } - qpn = get_spl_qp_index(qp_type); - if (qpn == -1) { - ret = ERR_PTR(-EINVAL); - goto error1; - } - port_priv = ib_get_mad_port(device, port_num); - if (!port_priv) { - ret = ERR_PTR(-ENODEV); - goto error1; - } - /* Allocate structures */ - mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL); - if (!mad_snoop_priv) { - ret = ERR_PTR(-ENOMEM); - goto error1; - } - - /* Now, fill in the various structures */ - mad_snoop_priv->qp_info = &port_priv->qp_info[qpn]; - mad_snoop_priv->agent.device = device; - mad_snoop_priv->agent.recv_handler = recv_handler; - mad_snoop_priv->agent.snoop_handler = snoop_handler; - mad_snoop_priv->agent.context = context; - mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp; - mad_snoop_priv->agent.port_num = port_num; - mad_snoop_priv->mad_snoop_flags = mad_snoop_flags; - init_completion(&mad_snoop_priv->comp); - - err = ib_mad_agent_security_setup(&mad_snoop_priv->agent, qp_type); - if (err) { - ret = ERR_PTR(err); - goto error2; - } - - mad_snoop_priv->snoop_index = register_snoop_agent( - &port_priv->qp_info[qpn], - mad_snoop_priv); - if (mad_snoop_priv->snoop_index < 0) { - ret = ERR_PTR(mad_snoop_priv->snoop_index); - goto error3; - } - - atomic_set(&mad_snoop_priv->refcount, 1); - return &mad_snoop_priv->agent; -error3: - ib_mad_agent_security_cleanup(&mad_snoop_priv->agent); -error2: - kfree(mad_snoop_priv); -error1: - return ret; -} -EXPORT_SYMBOL(ib_register_mad_snoop); - static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { if (atomic_dec_and_test(&mad_agent_priv->refcount)) complete(&mad_agent_priv->comp); } -static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv) -{ - if (atomic_dec_and_test(&mad_snoop_priv->refcount)) - complete(&mad_snoop_priv->comp); -} - static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_port_private *port_priv; @@ -650,25 +520,6 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) kfree_rcu(mad_agent_priv, rcu); } -static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) -{ - struct ib_mad_qp_info *qp_info; - unsigned long flags; - - qp_info = mad_snoop_priv->qp_info; - spin_lock_irqsave(&qp_info->snoop_lock, flags); - qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL; - atomic_dec(&qp_info->snoop_count); - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - - deref_snoop_agent(mad_snoop_priv); - wait_for_completion(&mad_snoop_priv->comp); - - ib_mad_agent_security_cleanup(&mad_snoop_priv->agent); - - kfree(mad_snoop_priv); -} - /* * ib_unregister_mad_agent - Unregisters a client from using MAD services * @@ -677,20 +528,11 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) { struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_snoop_private *mad_snoop_priv; - /* If the TID is zero, the agent can only snoop. */ - if (mad_agent->hi_tid) { - mad_agent_priv = container_of(mad_agent, - struct ib_mad_agent_private, - agent); - unregister_mad_agent(mad_agent_priv); - } else { - mad_snoop_priv = container_of(mad_agent, - struct ib_mad_snoop_private, - agent); - unregister_mad_snoop(mad_snoop_priv); - } + mad_agent_priv = container_of(mad_agent, + struct ib_mad_agent_private, + agent); + unregister_mad_agent(mad_agent_priv); } EXPORT_SYMBOL(ib_unregister_mad_agent); @@ -706,57 +548,6 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list) spin_unlock_irqrestore(&mad_queue->lock, flags); } -static void snoop_send(struct ib_mad_qp_info *qp_info, - struct ib_mad_send_buf *send_buf, - struct ib_mad_send_wc *mad_send_wc, - int mad_snoop_flags) -{ - struct ib_mad_snoop_private *mad_snoop_priv; - unsigned long flags; - int i; - - spin_lock_irqsave(&qp_info->snoop_lock, flags); - for (i = 0; i < qp_info->snoop_table_size; i++) { - mad_snoop_priv = qp_info->snoop_table[i]; - if (!mad_snoop_priv || - !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags)) - continue; - - atomic_inc(&mad_snoop_priv->refcount); - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent, - send_buf, mad_send_wc); - deref_snoop_agent(mad_snoop_priv); - spin_lock_irqsave(&qp_info->snoop_lock, flags); - } - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); -} - -static void snoop_recv(struct ib_mad_qp_info *qp_info, - struct ib_mad_recv_wc *mad_recv_wc, - int mad_snoop_flags) -{ - struct ib_mad_snoop_private *mad_snoop_priv; - unsigned long flags; - int i; - - spin_lock_irqsave(&qp_info->snoop_lock, flags); - for (i = 0; i < qp_info->snoop_table_size; i++) { - mad_snoop_priv = qp_info->snoop_table[i]; - if (!mad_snoop_priv || - !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags)) - continue; - - atomic_inc(&mad_snoop_priv->refcount); - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL, - mad_recv_wc); - deref_snoop_agent(mad_snoop_priv); - spin_lock_irqsave(&qp_info->snoop_lock, flags); - } - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); -} - static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid, u16 pkey_index, u8 port_num, struct ib_wc *wc) { @@ -2289,9 +2080,6 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad; recv->header.recv_wc.recv_buf.grh = &recv->grh; - if (atomic_read(&qp_info->snoop_count)) - snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS); - /* Validate MAD */ if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa)) goto out; @@ -2538,9 +2326,6 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc) mad_send_wc.send_buf = &mad_send_wr->send_buf; mad_send_wc.status = wc->status; mad_send_wc.vendor_err = wc->vendor_err; - if (atomic_read(&qp_info->snoop_count)) - snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc, - IB_MAD_SNOOP_SEND_COMPLETIONS); ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); if (queued_send_wr) { @@ -2782,10 +2567,6 @@ static void local_completions(struct work_struct *work) local->mad_priv->header.recv_wc.recv_buf.grh = NULL; local->mad_priv->header.recv_wc.recv_buf.mad = (struct ib_mad *)local->mad_priv->mad; - if (atomic_read(&recv_mad_agent->qp_info->snoop_count)) - snoop_recv(recv_mad_agent->qp_info, - &local->mad_priv->header.recv_wc, - IB_MAD_SNOOP_RECVS); recv_mad_agent->agent.recv_handler( &recv_mad_agent->agent, &local->mad_send_wr->send_buf, @@ -2800,10 +2581,6 @@ static void local_completions(struct work_struct *work) mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; mad_send_wc.send_buf = &local->mad_send_wr->send_buf; - if (atomic_read(&mad_agent_priv->qp_info->snoop_count)) - snoop_send(mad_agent_priv->qp_info, - &local->mad_send_wr->send_buf, - &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); @@ -3119,10 +2896,6 @@ static void init_mad_qp(struct ib_mad_port_private *port_priv, init_mad_queue(qp_info, &qp_info->send_queue); init_mad_queue(qp_info, &qp_info->recv_queue); INIT_LIST_HEAD(&qp_info->overflow_list); - spin_lock_init(&qp_info->snoop_lock); - qp_info->snoop_table = NULL; - qp_info->snoop_table_size = 0; - atomic_set(&qp_info->snoop_count, 0); } static int create_mad_qp(struct ib_mad_qp_info *qp_info, @@ -3166,7 +2939,6 @@ static void destroy_mad_qp(struct ib_mad_qp_info *qp_info) return; ib_destroy_qp(qp_info->qp); - kfree(qp_info->snoop_table); } /* diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 4e62650e2127..8c093fc1bb9f 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -558,20 +558,6 @@ struct ib_mad_recv_wc; typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc); -/** - * ib_mad_snoop_handler - Callback handler for snooping sent MADs. - * @mad_agent: MAD agent that snooped the MAD. - * @send_buf: send MAD data buffer. - * @mad_send_wc: Work completion information on the sent MAD. Valid - * only for snooping that occurs on a send completion. - * - * Clients snooping MADs should not modify data referenced by the @send_buf - * or @mad_send_wc. - */ -typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf, - struct ib_mad_send_wc *mad_send_wc); - /** * ib_mad_recv_handler - callback handler for a received MAD. * @mad_agent: MAD agent requesting the received MAD. @@ -581,8 +567,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, * MADs received in response to a send request operation will be handed to * the user before the send operation completes. All data buffers given * to registered agents through this routine are owned by the receiving - * client, except for snooping agents. Clients snooping MADs should not - * modify the data referenced by @mad_recv_wc. + * client. */ typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent, struct ib_mad_send_buf *send_buf, @@ -595,7 +580,6 @@ typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent, * @mr: Memory region for system memory usable for DMA. * @recv_handler: Callback handler for a received MAD. * @send_handler: Callback handler for a sent MAD. - * @snoop_handler: Callback handler for snooped sent MADs. * @context: User-specified context associated with this registration. * @hi_tid: Access layer assigned transaction ID for this client. * Unsolicited MADs sent by this client will have the upper 32-bits @@ -612,7 +596,6 @@ struct ib_mad_agent { struct ib_qp *qp; ib_mad_recv_handler recv_handler; ib_mad_send_handler send_handler; - ib_mad_snoop_handler snoop_handler; void *context; u32 hi_tid; u32 flags; @@ -720,36 +703,6 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, ib_mad_recv_handler recv_handler, void *context, u32 registration_flags); - -enum ib_mad_snoop_flags { - /*IB_MAD_SNOOP_POSTED_SENDS = 1,*/ - /*IB_MAD_SNOOP_RMPP_SENDS = (1<<1),*/ - IB_MAD_SNOOP_SEND_COMPLETIONS = (1<<2), - /*IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS = (1<<3),*/ - IB_MAD_SNOOP_RECVS = (1<<4) - /*IB_MAD_SNOOP_RMPP_RECVS = (1<<5),*/ - /*IB_MAD_SNOOP_REDIRECTED_QPS = (1<<6)*/ -}; - -/** - * ib_register_mad_snoop - Register to snoop sent and received MADs. - * @device: The device to register with. - * @port_num: The port on the specified device to use. - * @qp_type: Specifies which QP traffic to snoop. Must be either - * IB_QPT_SMI or IB_QPT_GSI. - * @mad_snoop_flags: Specifies information where snooping occurs. - * @send_handler: The callback routine invoked for a snooped send. - * @recv_handler: The callback routine invoked for a snooped receive. - * @context: User specified context associated with the registration. - */ -struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, - u8 port_num, - enum ib_qp_type qp_type, - int mad_snoop_flags, - ib_mad_snoop_handler snoop_handler, - ib_mad_recv_handler recv_handler, - void *context); - /** * ib_unregister_mad_agent - Unregisters a client from using MAD services. * @mad_agent: Corresponding MAD registration request to deregister. From 11a0ae4c4bff9b2a471b54dbe910fc0f60e58e62 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 21 Apr 2020 20:24:40 +0300 Subject: [PATCH 223/562] RDMA: Allow ib_client's to fail when add() is called When a client is added it isn't allowed to fail, but all the client's have various failure paths within their add routines. This creates the very fringe condition where the client was added, failed during add and didn't set the client_data. The core code will then still call other client_data centric ops like remove(), rename(), get_nl_info(), and get_net_dev_by_params() with NULL client_data - which is confusing and unexpected. If the add() callback fails, then do not call any more client ops for the device, even remove. Remove all the now redundant checks for NULL client_data in ops callbacks. Update all the add() callbacks to return error codes appropriately. EOPNOTSUPP is used for cases where the ULP does not support the ib_device - eg because it only works with IB. Link: https://lore.kernel.org/r/20200421172440.387069-1-leon@kernel.org Signed-off-by: Leon Romanovsky Acked-by: Ursula Braun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 24 ++++++++++-------- drivers/infiniband/core/cma.c | 23 +++++++++-------- drivers/infiniband/core/device.c | 16 ++++++++++-- drivers/infiniband/core/mad.c | 17 ++++++++++--- drivers/infiniband/core/multicast.c | 12 ++++----- drivers/infiniband/core/sa_query.c | 22 ++++++++-------- drivers/infiniband/core/user_mad.c | 22 ++++++++-------- drivers/infiniband/core/uverbs_main.c | 24 +++++++++--------- drivers/infiniband/ulp/ipoib/ipoib_main.c | 15 ++++------- .../infiniband/ulp/opa_vnic/opa_vnic_vema.c | 12 ++++----- drivers/infiniband/ulp/srp/ib_srp.c | 21 ++++++++-------- drivers/infiniband/ulp/srpt/ib_srpt.c | 25 ++++++++----------- include/rdma/ib_verbs.h | 2 +- net/rds/ib.c | 21 ++++++++++------ net/smc/smc_ib.c | 10 +++----- 15 files changed, 142 insertions(+), 124 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 4794113ecd59..68e1a9bba027 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -81,7 +81,7 @@ const char *__attribute_const__ ibcm_reject_msg(int reason) EXPORT_SYMBOL(ibcm_reject_msg); struct cm_id_private; -static void cm_add_one(struct ib_device *device); +static int cm_add_one(struct ib_device *device); static void cm_remove_one(struct ib_device *device, void *client_data); static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv, struct ib_cm_sidr_rep_param *param); @@ -4382,7 +4382,7 @@ static void cm_remove_port_fs(struct cm_port *port) } -static void cm_add_one(struct ib_device *ib_device) +static int cm_add_one(struct ib_device *ib_device) { struct cm_device *cm_dev; struct cm_port *port; @@ -4401,7 +4401,7 @@ static void cm_add_one(struct ib_device *ib_device) cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt), GFP_KERNEL); if (!cm_dev) - return; + return -ENOMEM; cm_dev->ib_device = ib_device; cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay; @@ -4413,8 +4413,10 @@ static void cm_add_one(struct ib_device *ib_device) continue; port = kzalloc(sizeof *port, GFP_KERNEL); - if (!port) + if (!port) { + ret = -ENOMEM; goto error1; + } cm_dev->port[i-1] = port; port->cm_dev = cm_dev; @@ -4435,8 +4437,10 @@ static void cm_add_one(struct ib_device *ib_device) cm_recv_handler, port, 0); - if (IS_ERR(port->mad_agent)) + if (IS_ERR(port->mad_agent)) { + ret = PTR_ERR(port->mad_agent); goto error2; + } ret = ib_modify_port(ib_device, i, 0, &port_modify); if (ret) @@ -4445,15 +4449,17 @@ static void cm_add_one(struct ib_device *ib_device) count++; } - if (!count) + if (!count) { + ret = -EOPNOTSUPP; goto free; + } ib_set_client_data(ib_device, &cm_client, cm_dev); write_lock_irqsave(&cm.device_lock, flags); list_add_tail(&cm_dev->list, &cm.device_list); write_unlock_irqrestore(&cm.device_lock, flags); - return; + return 0; error3: ib_unregister_mad_agent(port->mad_agent); @@ -4475,6 +4481,7 @@ static void cm_add_one(struct ib_device *ib_device) } free: kfree(cm_dev); + return ret; } static void cm_remove_one(struct ib_device *ib_device, void *client_data) @@ -4489,9 +4496,6 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) unsigned long flags; int i; - if (!cm_dev) - return; - write_lock_irqsave(&cm.device_lock, flags); list_del(&cm_dev->list); write_unlock_irqrestore(&cm.device_lock, flags); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 6406a597dfb6..e8d99b71f44a 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -153,7 +153,7 @@ struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res) } EXPORT_SYMBOL(rdma_res_to_id); -static void cma_add_one(struct ib_device *device); +static int cma_add_one(struct ib_device *device); static void cma_remove_one(struct ib_device *device, void *client_data); static struct ib_client cma_client = { @@ -4638,29 +4638,34 @@ static struct notifier_block cma_nb = { .notifier_call = cma_netdev_callback }; -static void cma_add_one(struct ib_device *device) +static int cma_add_one(struct ib_device *device) { struct cma_device *cma_dev; struct rdma_id_private *id_priv; unsigned int i; unsigned long supported_gids = 0; + int ret; cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); if (!cma_dev) - return; + return -ENOMEM; cma_dev->device = device; cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, sizeof(*cma_dev->default_gid_type), GFP_KERNEL); - if (!cma_dev->default_gid_type) + if (!cma_dev->default_gid_type) { + ret = -ENOMEM; goto free_cma_dev; + } cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, sizeof(*cma_dev->default_roce_tos), GFP_KERNEL); - if (!cma_dev->default_roce_tos) + if (!cma_dev->default_roce_tos) { + ret = -ENOMEM; goto free_gid_type; + } rdma_for_each_port (device, i) { supported_gids = roce_gid_type_mask_support(device, i); @@ -4686,15 +4691,14 @@ static void cma_add_one(struct ib_device *device) mutex_unlock(&lock); trace_cm_add_one(device); - return; + return 0; free_gid_type: kfree(cma_dev->default_gid_type); free_cma_dev: kfree(cma_dev); - - return; + return ret; } static int cma_remove_id_dev(struct rdma_id_private *id_priv) @@ -4756,9 +4760,6 @@ static void cma_remove_one(struct ib_device *device, void *client_data) trace_cm_remove_one(device); - if (!cma_dev) - return; - mutex_lock(&lock); list_del(&cma_dev->list); mutex_unlock(&lock); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index d0b3d35ad3e4..d9f565a779df 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -677,8 +677,20 @@ static int add_client_context(struct ib_device *device, if (ret) goto out; downgrade_write(&device->client_data_rwsem); - if (client->add) - client->add(device); + if (client->add) { + if (client->add(device)) { + /* + * If a client fails to add then the error code is + * ignored, but we won't call any more ops on this + * client. + */ + xa_erase(&device->client_data, client->client_id); + up_read(&device->client_data_rwsem); + ib_device_put(device); + ib_client_put(client); + return 0; + } + } /* Readers shall not see a client until add has been completed */ xa_set_mark(&device->client_data, client->client_id, diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index e02b5c4fdf09..186e0d652e8b 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -3076,9 +3076,11 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) return 0; } -static void ib_mad_init_device(struct ib_device *device) +static int ib_mad_init_device(struct ib_device *device) { int start, i; + unsigned int count = 0; + int ret; start = rdma_start_port(device); @@ -3086,17 +3088,23 @@ static void ib_mad_init_device(struct ib_device *device) if (!rdma_cap_ib_mad(device, i)) continue; - if (ib_mad_port_open(device, i)) { + ret = ib_mad_port_open(device, i); + if (ret) { dev_err(&device->dev, "Couldn't open port %d\n", i); goto error; } - if (ib_agent_port_open(device, i)) { + ret = ib_agent_port_open(device, i); + if (ret) { dev_err(&device->dev, "Couldn't open port %d for agents\n", i); goto error_agent; } + count++; } - return; + if (!count) + return -EOPNOTSUPP; + + return 0; error_agent: if (ib_mad_port_close(device, i)) @@ -3113,6 +3121,7 @@ static void ib_mad_init_device(struct ib_device *device) if (ib_mad_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d\n", i); } + return ret; } static void ib_mad_remove_device(struct ib_device *device, void *client_data) diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 9c2d8b7f1af9..740f03ecc05d 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -42,7 +42,7 @@ #include #include "sa.h" -static void mcast_add_one(struct ib_device *device); +static int mcast_add_one(struct ib_device *device); static void mcast_remove_one(struct ib_device *device, void *client_data); static struct ib_client mcast_client = { @@ -815,7 +815,7 @@ static void mcast_event_handler(struct ib_event_handler *handler, } } -static void mcast_add_one(struct ib_device *device) +static int mcast_add_one(struct ib_device *device) { struct mcast_device *dev; struct mcast_port *port; @@ -825,7 +825,7 @@ static void mcast_add_one(struct ib_device *device) dev = kmalloc(struct_size(dev, port, device->phys_port_cnt), GFP_KERNEL); if (!dev) - return; + return -ENOMEM; dev->start_port = rdma_start_port(device); dev->end_port = rdma_end_port(device); @@ -845,7 +845,7 @@ static void mcast_add_one(struct ib_device *device) if (!count) { kfree(dev); - return; + return -EOPNOTSUPP; } dev->device = device; @@ -853,6 +853,7 @@ static void mcast_add_one(struct ib_device *device) INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler); ib_register_event_handler(&dev->event_handler); + return 0; } static void mcast_remove_one(struct ib_device *device, void *client_data) @@ -861,9 +862,6 @@ static void mcast_remove_one(struct ib_device *device, void *client_data) struct mcast_port *port; int i; - if (!dev) - return; - ib_unregister_event_handler(&dev->event_handler); flush_workqueue(mcast_wq); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 2dd326f2beed..5c878646ff62 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -174,7 +174,7 @@ static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = { }; -static void ib_sa_add_one(struct ib_device *device); +static int ib_sa_add_one(struct ib_device *device); static void ib_sa_remove_one(struct ib_device *device, void *client_data); static struct ib_client sa_client = { @@ -2322,18 +2322,19 @@ static void ib_sa_event(struct ib_event_handler *handler, } } -static void ib_sa_add_one(struct ib_device *device) +static int ib_sa_add_one(struct ib_device *device) { struct ib_sa_device *sa_dev; int s, e, i; int count = 0; + int ret; s = rdma_start_port(device); e = rdma_end_port(device); sa_dev = kzalloc(struct_size(sa_dev, port, e - s + 1), GFP_KERNEL); if (!sa_dev) - return; + return -ENOMEM; sa_dev->start_port = s; sa_dev->end_port = e; @@ -2353,8 +2354,10 @@ static void ib_sa_add_one(struct ib_device *device) ib_register_mad_agent(device, i + s, IB_QPT_GSI, NULL, 0, send_handler, recv_handler, sa_dev, 0); - if (IS_ERR(sa_dev->port[i].agent)) + if (IS_ERR(sa_dev->port[i].agent)) { + ret = PTR_ERR(sa_dev->port[i].agent); goto err; + } INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah); INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work, @@ -2363,8 +2366,10 @@ static void ib_sa_add_one(struct ib_device *device) count++; } - if (!count) + if (!count) { + ret = -EOPNOTSUPP; goto free; + } ib_set_client_data(device, &sa_client, sa_dev); @@ -2383,7 +2388,7 @@ static void ib_sa_add_one(struct ib_device *device) update_sm_ah(&sa_dev->port[i].update_task); } - return; + return 0; err: while (--i >= 0) { @@ -2392,7 +2397,7 @@ static void ib_sa_add_one(struct ib_device *device) } free: kfree(sa_dev); - return; + return ret; } static void ib_sa_remove_one(struct ib_device *device, void *client_data) @@ -2400,9 +2405,6 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data) struct ib_sa_device *sa_dev = client_data; int i; - if (!sa_dev) - return; - ib_unregister_event_handler(&sa_dev->event_handler); flush_workqueue(ib_wq); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index da229eab5903..b0d0b522cc76 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -142,7 +142,7 @@ static dev_t dynamic_issm_dev; static DEFINE_IDA(umad_ida); -static void ib_umad_add_one(struct ib_device *device); +static int ib_umad_add_one(struct ib_device *device); static void ib_umad_remove_one(struct ib_device *device, void *client_data); static void ib_umad_dev_free(struct kref *kref) @@ -1352,37 +1352,41 @@ static void ib_umad_kill_port(struct ib_umad_port *port) put_device(&port->dev); } -static void ib_umad_add_one(struct ib_device *device) +static int ib_umad_add_one(struct ib_device *device) { struct ib_umad_device *umad_dev; int s, e, i; int count = 0; + int ret; s = rdma_start_port(device); e = rdma_end_port(device); umad_dev = kzalloc(struct_size(umad_dev, ports, e - s + 1), GFP_KERNEL); if (!umad_dev) - return; + return -ENOMEM; kref_init(&umad_dev->kref); for (i = s; i <= e; ++i) { if (!rdma_cap_ib_mad(device, i)) continue; - if (ib_umad_init_port(device, i, umad_dev, - &umad_dev->ports[i - s])) + ret = ib_umad_init_port(device, i, umad_dev, + &umad_dev->ports[i - s]); + if (ret) goto err; count++; } - if (!count) + if (!count) { + ret = -EOPNOTSUPP; goto free; + } ib_set_client_data(device, &umad_client, umad_dev); - return; + return 0; err: while (--i >= s) { @@ -1394,6 +1398,7 @@ static void ib_umad_add_one(struct ib_device *device) free: /* balances kref_init */ ib_umad_dev_put(umad_dev); + return ret; } static void ib_umad_remove_one(struct ib_device *device, void *client_data) @@ -1401,9 +1406,6 @@ static void ib_umad_remove_one(struct ib_device *device, void *client_data) struct ib_umad_device *umad_dev = client_data; unsigned int i; - if (!umad_dev) - return; - rdma_for_each_port (device, i) { if (rdma_cap_ib_mad(device, i)) ib_umad_kill_port( diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 8710a3427146..d52eb870533b 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -75,7 +75,7 @@ static dev_t dynamic_uverbs_dev; static struct class *uverbs_class; static DEFINE_IDA(uverbs_ida); -static void ib_uverbs_add_one(struct ib_device *device); +static int ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); /* @@ -1091,7 +1091,7 @@ static int ib_uverbs_create_uapi(struct ib_device *device, return 0; } -static void ib_uverbs_add_one(struct ib_device *device) +static int ib_uverbs_add_one(struct ib_device *device) { int devnum; dev_t base; @@ -1099,16 +1099,16 @@ static void ib_uverbs_add_one(struct ib_device *device) int ret; if (!device->ops.alloc_ucontext) - return; + return -EOPNOTSUPP; uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL); if (!uverbs_dev) - return; + return -ENOMEM; ret = init_srcu_struct(&uverbs_dev->disassociate_srcu); if (ret) { kfree(uverbs_dev); - return; + return -ENOMEM; } device_initialize(&uverbs_dev->dev); @@ -1128,15 +1128,18 @@ static void ib_uverbs_add_one(struct ib_device *device) devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1, GFP_KERNEL); - if (devnum < 0) + if (devnum < 0) { + ret = -ENOMEM; goto err; + } uverbs_dev->devnum = devnum; if (devnum >= IB_UVERBS_NUM_FIXED_MINOR) base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR; else base = IB_UVERBS_BASE_DEV + devnum; - if (ib_uverbs_create_uapi(device, uverbs_dev)) + ret = ib_uverbs_create_uapi(device, uverbs_dev); + if (ret) goto err_uapi; uverbs_dev->dev.devt = base; @@ -1151,7 +1154,7 @@ static void ib_uverbs_add_one(struct ib_device *device) goto err_uapi; ib_set_client_data(device, &uverbs_client, uverbs_dev); - return; + return 0; err_uapi: ida_free(&uverbs_ida, devnum); @@ -1160,7 +1163,7 @@ static void ib_uverbs_add_one(struct ib_device *device) ib_uverbs_comp_dev(uverbs_dev); wait_for_completion(&uverbs_dev->comp); put_device(&uverbs_dev->dev); - return; + return ret; } static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, @@ -1203,9 +1206,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) struct ib_uverbs_device *uverbs_dev = client_data; int wait_clients = 1; - if (!uverbs_dev) - return; - cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev); ida_free(&uverbs_ida, uverbs_dev->devnum); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 81b8227214f1..d4c6a97ce4c0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -86,7 +86,7 @@ struct workqueue_struct *ipoib_workqueue; struct ib_sa_client ipoib_sa_client; -static void ipoib_add_one(struct ib_device *device); +static int ipoib_add_one(struct ib_device *device); static void ipoib_remove_one(struct ib_device *device, void *client_data); static void ipoib_neigh_reclaim(struct rcu_head *rp); static struct net_device *ipoib_get_net_dev_by_params( @@ -479,9 +479,6 @@ static struct net_device *ipoib_get_net_dev_by_params( if (ret) return NULL; - if (!dev_list) - return NULL; - /* See if we can find a unique device matching the L2 parameters */ matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index, gid, NULL, &net_dev); @@ -2514,7 +2511,7 @@ static struct net_device *ipoib_add_port(const char *format, return ERR_PTR(-ENOMEM); } -static void ipoib_add_one(struct ib_device *device) +static int ipoib_add_one(struct ib_device *device) { struct list_head *dev_list; struct net_device *dev; @@ -2524,7 +2521,7 @@ static void ipoib_add_one(struct ib_device *device) dev_list = kmalloc(sizeof(*dev_list), GFP_KERNEL); if (!dev_list) - return; + return -ENOMEM; INIT_LIST_HEAD(dev_list); @@ -2541,10 +2538,11 @@ static void ipoib_add_one(struct ib_device *device) if (!count) { kfree(dev_list); - return; + return -EOPNOTSUPP; } ib_set_client_data(device, &ipoib_client, dev_list); + return 0; } static void ipoib_remove_one(struct ib_device *device, void *client_data) @@ -2552,9 +2550,6 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data) struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv; struct list_head *dev_list = client_data; - if (!dev_list) - return; - list_for_each_entry_safe(priv, tmp, dev_list, list) { LIST_HEAD(head); ipoib_parent_unregister_pre(priv->dev); diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index 6e8d650c17c7..874a8eb7638c 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -113,7 +113,7 @@ struct opa_vnic_vema_port { struct mutex lock; }; -static void opa_vnic_vema_add_one(struct ib_device *device); +static int opa_vnic_vema_add_one(struct ib_device *device); static void opa_vnic_vema_rem_one(struct ib_device *device, void *client_data); @@ -989,18 +989,18 @@ static void opa_vnic_ctrl_config_dev(struct opa_vnic_ctrl_port *cport, bool en) * * Allocate the vnic control port and initialize it. */ -static void opa_vnic_vema_add_one(struct ib_device *device) +static int opa_vnic_vema_add_one(struct ib_device *device) { struct opa_vnic_ctrl_port *cport; int rc, size = sizeof(*cport); if (!rdma_cap_opa_vnic(device)) - return; + return -EOPNOTSUPP; size += device->phys_port_cnt * sizeof(struct opa_vnic_vema_port); cport = kzalloc(size, GFP_KERNEL); if (!cport) - return; + return -ENOMEM; cport->num_ports = device->phys_port_cnt; cport->ibdev = device; @@ -1012,6 +1012,7 @@ static void opa_vnic_vema_add_one(struct ib_device *device) ib_set_client_data(device, &opa_vnic_client, cport); opa_vnic_ctrl_config_dev(cport, true); + return 0; } /** @@ -1026,9 +1027,6 @@ static void opa_vnic_vema_rem_one(struct ib_device *device, { struct opa_vnic_ctrl_port *cport = client_data; - if (!cport) - return; - c_info("removing VNIC client\n"); opa_vnic_ctrl_config_dev(cport, false); vema_unregister(cport); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index cd1181c39ed2..00b4f88b113e 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -146,7 +146,7 @@ module_param(ch_count, uint, 0444); MODULE_PARM_DESC(ch_count, "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA."); -static void srp_add_one(struct ib_device *device); +static int srp_add_one(struct ib_device *device); static void srp_remove_one(struct ib_device *device, void *client_data); static void srp_rename_dev(struct ib_device *device, void *client_data); static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc); @@ -4132,7 +4132,7 @@ static void srp_rename_dev(struct ib_device *device, void *client_data) } } -static void srp_add_one(struct ib_device *device) +static int srp_add_one(struct ib_device *device) { struct srp_device *srp_dev; struct ib_device_attr *attr = &device->attrs; @@ -4144,7 +4144,7 @@ static void srp_add_one(struct ib_device *device) srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL); if (!srp_dev) - return; + return -ENOMEM; /* * Use the smallest page size supported by the HCA, down to a @@ -4197,8 +4197,12 @@ static void srp_add_one(struct ib_device *device) srp_dev->dev = device; srp_dev->pd = ib_alloc_pd(device, flags); - if (IS_ERR(srp_dev->pd)) - goto free_dev; + if (IS_ERR(srp_dev->pd)) { + int ret = PTR_ERR(srp_dev->pd); + + kfree(srp_dev); + return ret; + } if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) { srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey; @@ -4212,10 +4216,7 @@ static void srp_add_one(struct ib_device *device) } ib_set_client_data(device, &srp_client, srp_dev); - return; - -free_dev: - kfree(srp_dev); + return 0; } static void srp_remove_one(struct ib_device *device, void *client_data) @@ -4225,8 +4226,6 @@ static void srp_remove_one(struct ib_device *device, void *client_data) struct srp_target_port *target; srp_dev = client_data; - if (!srp_dev) - return; list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { device_unregister(&host->dev); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 9d02d8088f1c..7ed38d1cb997 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3101,7 +3101,7 @@ static int srpt_use_srq(struct srpt_device *sdev, bool use_srq) * srpt_add_one - InfiniBand device addition callback function * @device: Describes a HCA. */ -static void srpt_add_one(struct ib_device *device) +static int srpt_add_one(struct ib_device *device) { struct srpt_device *sdev; struct srpt_port *sport; @@ -3112,14 +3112,16 @@ static void srpt_add_one(struct ib_device *device) sdev = kzalloc(struct_size(sdev, port, device->phys_port_cnt), GFP_KERNEL); if (!sdev) - goto err; + return -ENOMEM; sdev->device = device; mutex_init(&sdev->sdev_mutex); sdev->pd = ib_alloc_pd(device, 0); - if (IS_ERR(sdev->pd)) + if (IS_ERR(sdev->pd)) { + ret = PTR_ERR(sdev->pd); goto free_dev; + } sdev->lkey = sdev->pd->local_dma_lkey; @@ -3135,6 +3137,7 @@ static void srpt_add_one(struct ib_device *device) if (IS_ERR(sdev->cm_id)) { pr_info("ib_create_cm_id() failed: %ld\n", PTR_ERR(sdev->cm_id)); + ret = PTR_ERR(sdev->cm_id); sdev->cm_id = NULL; if (!rdma_cm_id) goto err_ring; @@ -3179,7 +3182,8 @@ static void srpt_add_one(struct ib_device *device) mutex_init(&sport->port_gid_id.mutex); INIT_LIST_HEAD(&sport->port_gid_id.tpg_list); - if (srpt_refresh_port(sport)) { + ret = srpt_refresh_port(sport); + if (ret) { pr_err("MAD registration failed for %s-%d.\n", dev_name(&sdev->device->dev), i); goto err_event; @@ -3190,10 +3194,9 @@ static void srpt_add_one(struct ib_device *device) list_add_tail(&sdev->list, &srpt_dev_list); spin_unlock(&srpt_dev_lock); -out: ib_set_client_data(device, &srpt_client, sdev); pr_debug("added %s.\n", dev_name(&device->dev)); - return; + return 0; err_event: ib_unregister_event_handler(&sdev->event_handler); @@ -3205,10 +3208,8 @@ static void srpt_add_one(struct ib_device *device) ib_dealloc_pd(sdev->pd); free_dev: kfree(sdev); -err: - sdev = NULL; pr_info("%s(%s) failed.\n", __func__, dev_name(&device->dev)); - goto out; + return ret; } /** @@ -3221,12 +3222,6 @@ static void srpt_remove_one(struct ib_device *device, void *client_data) struct srpt_device *sdev = client_data; int i; - if (!sdev) { - pr_info("%s(%s): nothing to do.\n", __func__, - dev_name(&device->dev)); - return; - } - srpt_unregister_mad_agent(sdev); ib_unregister_event_handler(&sdev->event_handler); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8d29f2f79da8..c3d715e2fc66 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2722,7 +2722,7 @@ struct ib_device { struct ib_client_nl_info; struct ib_client { const char *name; - void (*add) (struct ib_device *); + int (*add)(struct ib_device *ibdev); void (*remove)(struct ib_device *, void *client_data); void (*rename)(struct ib_device *dev, void *client_data); int (*get_nl_info)(struct ib_device *ibdev, void *client_data, diff --git a/net/rds/ib.c b/net/rds/ib.c index a792d8a3872a..90212ed3edf1 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -127,19 +127,20 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev) queue_work(rds_wq, &rds_ibdev->free_work); } -static void rds_ib_add_one(struct ib_device *device) +static int rds_ib_add_one(struct ib_device *device) { struct rds_ib_device *rds_ibdev; bool has_fr, has_fmr; + int ret; /* Only handle IB (no iWARP) devices */ if (device->node_type != RDMA_NODE_IB_CA) - return; + return -EOPNOTSUPP; rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL, ibdev_to_node(device)); if (!rds_ibdev) - return; + return -ENOMEM; spin_lock_init(&rds_ibdev->spinlock); refcount_set(&rds_ibdev->refcount, 1); @@ -182,12 +183,14 @@ static void rds_ib_add_one(struct ib_device *device) if (!rds_ibdev->vector_load) { pr_err("RDS/IB: %s failed to allocate vector memory\n", __func__); + ret = -ENOMEM; goto put_dev; } rds_ibdev->dev = device; rds_ibdev->pd = ib_alloc_pd(device, 0); if (IS_ERR(rds_ibdev->pd)) { + ret = PTR_ERR(rds_ibdev->pd); rds_ibdev->pd = NULL; goto put_dev; } @@ -195,12 +198,15 @@ static void rds_ib_add_one(struct ib_device *device) device->dma_device, sizeof(struct rds_header), L1_CACHE_BYTES, 0); - if (!rds_ibdev->rid_hdrs_pool) + if (!rds_ibdev->rid_hdrs_pool) { + ret = -ENOMEM; goto put_dev; + } rds_ibdev->mr_1m_pool = rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL); if (IS_ERR(rds_ibdev->mr_1m_pool)) { + ret = PTR_ERR(rds_ibdev->mr_1m_pool); rds_ibdev->mr_1m_pool = NULL; goto put_dev; } @@ -208,6 +214,7 @@ static void rds_ib_add_one(struct ib_device *device) rds_ibdev->mr_8k_pool = rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_8K_POOL); if (IS_ERR(rds_ibdev->mr_8k_pool)) { + ret = PTR_ERR(rds_ibdev->mr_8k_pool); rds_ibdev->mr_8k_pool = NULL; goto put_dev; } @@ -227,12 +234,13 @@ static void rds_ib_add_one(struct ib_device *device) refcount_inc(&rds_ibdev->refcount); ib_set_client_data(device, &rds_ib_client, rds_ibdev); - refcount_inc(&rds_ibdev->refcount); rds_ib_nodev_connect(); + return 0; put_dev: rds_ib_dev_put(rds_ibdev); + return ret; } /* @@ -274,9 +282,6 @@ static void rds_ib_remove_one(struct ib_device *device, void *client_data) { struct rds_ib_device *rds_ibdev = client_data; - if (!rds_ibdev) - return; - rds_ib_dev_shutdown(rds_ibdev); /* stop connection attempts from getting a reference to this device. */ diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index e7e7c3c6e94a..2fad5f3fe093 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -547,18 +547,18 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev) static struct ib_client smc_ib_client; /* callback function for ib_register_client() */ -static void smc_ib_add_dev(struct ib_device *ibdev) +static int smc_ib_add_dev(struct ib_device *ibdev) { struct smc_ib_device *smcibdev; u8 port_cnt; int i; if (ibdev->node_type != RDMA_NODE_IB_CA) - return; + return -EOPNOTSUPP; smcibdev = kzalloc(sizeof(*smcibdev), GFP_KERNEL); if (!smcibdev) - return; + return -ENOMEM; smcibdev->ibdev = ibdev; INIT_WORK(&smcibdev->port_event_work, smc_ib_port_event_work); @@ -583,6 +583,7 @@ static void smc_ib_add_dev(struct ib_device *ibdev) smcibdev->pnetid[i]); } schedule_work(&smcibdev->port_event_work); + return 0; } /* callback function for ib_unregister_client() */ @@ -590,9 +591,6 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) { struct smc_ib_device *smcibdev = client_data; - if (!smcibdev || smcibdev->ibdev != ibdev) - return; - ib_set_client_data(ibdev, &smc_ib_client, NULL); spin_lock(&smc_ib_devices.lock); list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ spin_unlock(&smc_ib_devices.lock); From d5665a21250efeeb73579a2f8d71ee1820f37952 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 4 May 2020 08:19:31 +0300 Subject: [PATCH 224/562] RDMA/core: Add hash functions to calculate RoCEv2 flowlabel and UDP source port Add two hash functions to distribute RoCE v2 UDP source and Flowlabel symmetrically. These are user visible API and any change in the implementation needs to be tested for inter-operability between old and new variant. Link: https://lore.kernel.org/r/20200504051935.269708-2-leon@kernel.org Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 44 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c3d715e2fc66..4c488cade70f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4709,4 +4709,48 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device) bool rdma_dev_access_netns(const struct ib_device *device, const struct net *net); + +#define IB_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000) +#define IB_GRH_FLOWLABEL_MASK (0x000FFFFF) + +/** + * rdma_flow_label_to_udp_sport - generate a RoCE v2 UDP src port value based + * on the flow_label + * + * This function will convert the 20 bit flow_label input to a valid RoCE v2 + * UDP src port 14 bit value. All RoCE V2 drivers should use this same + * convention. + */ +static inline u16 rdma_flow_label_to_udp_sport(u32 fl) +{ + u32 fl_low = fl & 0x03fff, fl_high = fl & 0xFC000; + + fl_low ^= fl_high >> 14; + return (u16)(fl_low | IB_ROCE_UDP_ENCAP_VALID_PORT_MIN); +} + +/** + * rdma_calc_flow_label - generate a RDMA symmetric flow label value based on + * local and remote qpn values + * + * This function folded the multiplication results of two qpns, 24 bit each, + * fields, and converts it to a 20 bit results. + * + * This function will create symmetric flow_label value based on the local + * and remote qpn values. this will allow both the requester and responder + * to calculate the same flow_label for a given connection. + * + * This helper function should be used by driver in case the upper layer + * provide a zero flow_label value. This is to improve entropy of RDMA + * traffic in the network. + */ +static inline u32 rdma_calc_flow_label(u32 lqpn, u32 rqpn) +{ + u64 v = (u64)lqpn * rqpn; + + v ^= v >> 20; + v ^= v >> 40; + + return (u32)(v & IB_GRH_FLOWLABEL_MASK); +} #endif /* IB_VERBS_H */ From 9611d53aa1600ba94a36cd7bfd6a95dbae76c8e6 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 4 May 2020 08:19:32 +0300 Subject: [PATCH 225/562] RDMA/core: Consider flow label when building skb Use rdma_flow_label_to_udp_sport to calculate the UDP source port of the RoCEV2 packet. Link: https://lore.kernel.org/r/20200504051935.269708-3-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/lag.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c index a29533626a7c..7063e41eaf26 100644 --- a/drivers/infiniband/core/lag.c +++ b/drivers/infiniband/core/lag.c @@ -34,7 +34,8 @@ static struct sk_buff *rdma_build_skb(struct ib_device *device, skb_push(skb, sizeof(struct udphdr)); skb_reset_transport_header(skb); uh = udp_hdr(skb); - uh->source = htons(0xC000); + uh->source = + htons(rdma_flow_label_to_udp_sport(ah_attr->grh.flow_label)); uh->dest = htons(ROCE_V2_UDP_DPORT); uh->len = htons(sizeof(struct udphdr)); @@ -114,7 +115,8 @@ struct net_device *rdma_lag_get_ah_roce_slave(struct ib_device *device, struct net_device *master; if (!(ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE && - ah_attr->grh.sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)) + ah_attr->grh.sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP && + ah_attr->grh.flow_label)) return NULL; rcu_read_lock(); From 2b880b2e5e03e790a9b9fd7e3e8fcf7a36230a16 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 4 May 2020 08:19:33 +0300 Subject: [PATCH 226/562] RDMA/mlx5: Define RoCEv2 udp source port when set path Calculate and set UDP source port based on the flow label. If flow label is not defined in GRH then calculate it based on lqpn/rqpn. Link: https://lore.kernel.org/r/20200504051935.269708-4-leon@kernel.org Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 810bbd52daec..e624886bcf85 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3133,6 +3133,21 @@ static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev, return err; } +static void mlx5_set_path_udp_sport(struct mlx5_qp_path *path, + const struct rdma_ah_attr *ah, + u32 lqpn, u32 rqpn) + +{ + u32 fl = ah->grh.flow_label; + u16 sport; + + if (!fl) + fl = rdma_calc_flow_label(lqpn, rqpn); + + sport = rdma_flow_label_to_udp_sport(fl); + path->udp_sport = cpu_to_be16(sport); +} + static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, const struct rdma_ah_attr *ah, struct mlx5_qp_path *path, u8 port, int attr_mask, @@ -3164,12 +3179,15 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return -EINVAL; memcpy(path->rmac, ah->roce.dmac, sizeof(ah->roce.dmac)); - if (qp->ibqp.qp_type == IB_QPT_RC || - qp->ibqp.qp_type == IB_QPT_UC || - qp->ibqp.qp_type == IB_QPT_XRC_INI || - qp->ibqp.qp_type == IB_QPT_XRC_TGT) - path->udp_sport = - mlx5_get_roce_udp_sport(dev, ah->grh.sgid_attr); + if ((qp->ibqp.qp_type == IB_QPT_RC || + qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_XRC_INI || + qp->ibqp.qp_type == IB_QPT_XRC_TGT) && + (grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) && + (attr_mask & IB_QP_DEST_QPN)) + mlx5_set_path_udp_sport(path, ah, + qp->ibqp.qp_num, + attr->dest_qp_num); path->dci_cfi_prio_sl = (sl & 0x7) << 4; gid_type = ah->grh.sgid_attr->gid_type; if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) From f66534051936044728e2be9937eb408494ca4007 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 4 May 2020 08:19:34 +0300 Subject: [PATCH 227/562] RDMA/cma: Initialize the flow label of CM's route path record If flow label is not set by the user or it's not IPv4, initialize it with the cma src/dst based on the "Kernighan and Ritchie's hash function". Link: https://lore.kernel.org/r/20200504051935.269708-5-leon@kernel.org Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e8d99b71f44a..432eec472164 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2909,6 +2909,24 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos) return 0; } +static __be32 cma_get_roce_udp_flow_label(struct rdma_id_private *id_priv) +{ + struct sockaddr_in6 *addr6; + u16 dport, sport; + u32 hash, fl; + + addr6 = (struct sockaddr_in6 *)cma_src_addr(id_priv); + fl = be32_to_cpu(addr6->sin6_flowinfo) & IB_GRH_FLOWLABEL_MASK; + if ((cma_family(id_priv) != AF_INET6) || !fl) { + dport = be16_to_cpu(cma_port(cma_dst_addr(id_priv))); + sport = be16_to_cpu(cma_port(cma_src_addr(id_priv))); + hash = (u32)sport * 31 + dport; + fl = hash & IB_GRH_FLOWLABEL_MASK; + } + + return cpu_to_be32(fl); +} + static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) { struct rdma_route *route = &id_priv->id.route; @@ -2975,6 +2993,11 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) goto err2; } + if (rdma_protocol_roce_udp_encap(id_priv->id.device, + id_priv->id.port_num)) + route->path_rec->flow_label = + cma_get_roce_udp_flow_label(id_priv); + cma_init_resolve_route_work(work, id_priv); queue_work(cma_wq, &work->work); From 5ac55dfc6d92c12d5ef423cd16165eb0350f8f51 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 4 May 2020 08:19:35 +0300 Subject: [PATCH 228/562] RDMA/mlx5: Set UDP source port based on the grh.flow_label Calculate UDP source port based on the grh.flow_label. If grh.flow_label is not valid, we will use minimal supported UDP source port. Link: https://lore.kernel.org/r/20200504051935.269708-6-leon@kernel.org Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/ah.c | 21 +++++++++++++++++++-- drivers/infiniband/hw/mlx5/main.c | 4 ++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 ++-- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index cc858f658567..59e5ec39b447 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -32,6 +32,24 @@ #include "mlx5_ib.h" +static __be16 mlx5_ah_get_udp_sport(const struct mlx5_ib_dev *dev, + const struct rdma_ah_attr *ah_attr) +{ + enum ib_gid_type gid_type = ah_attr->grh.sgid_attr->gid_type; + __be16 sport; + + if ((gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) && + (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) && + (ah_attr->grh.flow_label & IB_GRH_FLOWLABEL_MASK)) + sport = cpu_to_be16( + rdma_flow_label_to_udp_sport(ah_attr->grh.flow_label)); + else + sport = mlx5_get_roce_udp_sport_min(dev, + ah_attr->grh.sgid_attr); + + return sport; +} + static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, struct rdma_ah_init_attr *init_attr) { @@ -60,8 +78,7 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, memcpy(ah->av.rmac, ah_attr->roce.dmac, sizeof(ah_attr->roce.dmac)); - ah->av.udp_sport = - mlx5_get_roce_udp_sport(dev, ah_attr->grh.sgid_attr); + ah->av.udp_sport = mlx5_ah_get_udp_sport(dev, ah_attr); ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1; if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) #define MLX5_ECN_ENABLED BIT(1) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e7fb290c9d8d..0b8cc219e085 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -629,8 +629,8 @@ static int mlx5_ib_del_gid(const struct ib_gid_attr *attr, attr->index, NULL, NULL); } -__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, - const struct ib_gid_attr *attr) +__be16 mlx5_get_roce_udp_sport_min(const struct mlx5_ib_dev *dev, + const struct ib_gid_attr *attr) { if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) return 0; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f250753319d0..3041808773e6 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1356,8 +1356,8 @@ int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port, int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type); -__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, - const struct ib_gid_attr *attr); +__be16 mlx5_get_roce_udp_sport_min(const struct mlx5_ib_dev *dev, + const struct ib_gid_attr *attr); void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); From 9b2cf76c9f052987ae5c4ad450ebebdc7c5d7b87 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Tue, 28 Apr 2020 19:03:39 +0800 Subject: [PATCH 229/562] RDMA/hns: Optimize PBL buffer allocation process PBL table has its own implementation for multi-hop addressing currently, but for the hardware, all table's addressing use the same logic, there is no need to implement repeatedly. So optimize the PBL buffer allocation process by using the mtr's interfaces. Link: https://lore.kernel.org/r/1588071823-40200-2-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Reported-by: kbuild test robot Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 19 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 45 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 76 +- drivers/infiniband/hw/hns/hns_roce_mr.c | 724 ++++---------------- 4 files changed, 196 insertions(+), 668 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index e1032cec2b12..1089db8ad0b5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -403,30 +403,17 @@ struct hns_roce_mw { struct hns_roce_mr { struct ib_mr ibmr; - struct ib_umem *umem; u64 iova; /* MR's virtual orignal addr */ u64 size; /* Address range of MR */ u32 key; /* Key of MR */ u32 pd; /* PD num of MR */ u32 access; /* Access permission of MR */ - u32 npages; int enabled; /* MR's active status */ int type; /* MR's register type */ - u64 *pbl_buf; /* MR's PBL space */ - dma_addr_t pbl_dma_addr; /* MR's PBL space PA */ - u32 pbl_size; /* PA number in the PBL */ - u64 pbl_ba; /* page table address */ - u32 l0_chunk_last_num; /* L0 last number */ - u32 l1_chunk_last_num; /* L1 last number */ - u64 **pbl_bt_l2; /* PBL BT L2 */ - u64 **pbl_bt_l1; /* PBL BT L1 */ - u64 *pbl_bt_l0; /* PBL BT L0 */ - dma_addr_t *pbl_l2_dma_addr; /* PBL BT L2 dma addr */ - dma_addr_t *pbl_l1_dma_addr; /* PBL BT L1 dma addr */ - dma_addr_t pbl_l0_dma_addr; /* PBL BT L0 dma addr */ - u32 pbl_ba_pg_sz; /* BT chunk page size */ - u32 pbl_buf_pg_sz; /* buf chunk page size */ u32 pbl_hop_num; /* multi-hop number */ + struct hns_roce_mtr pbl_mtr; + u32 npages; + dma_addr_t *page_list; }; struct hns_roce_mr_table { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 49775cda83dc..b4b98e818328 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1099,7 +1099,6 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, struct completion comp; long end = HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS; unsigned long start = jiffies; - int npages; int ret = 0; priv = (struct hns_roce_v1_priv *)hr_dev->priv; @@ -1146,17 +1145,9 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, dev_dbg(dev, "Free mr 0x%x use 0x%x us.\n", mr->key, jiffies_to_usecs(jiffies) - jiffies_to_usecs(start)); - if (mr->size != ~0ULL) { - npages = ib_umem_page_count(mr->umem); - dma_free_coherent(dev, npages * 8, mr->pbl_buf, - mr->pbl_dma_addr); - } - hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, key_to_hw_index(mr->key), 0); - - ib_umem_release(mr->umem); - + hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr); kfree(mr); return ret; @@ -1826,9 +1817,12 @@ static void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port, static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, unsigned long mtpt_idx) { + struct hns_roce_dev *hr_dev = to_hr_dev(mr->ibmr.device); + u64 pages[HNS_ROCE_MAX_INNER_MTPT_NUM] = { 0 }; + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_v1_mpt_entry *mpt_entry; - struct sg_dma_page_iter sg_iter; - u64 *pages; + dma_addr_t pbl_ba; + int count; int i; /* MPT filled into mailbox buf */ @@ -1878,22 +1872,15 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, if (mr->type == MR_TYPE_DMA) return 0; - pages = (u64 *) __get_free_page(GFP_KERNEL); - if (!pages) - return -ENOMEM; - - i = 0; - for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) { - pages[i] = ((u64)sg_page_iter_dma_address(&sg_iter)) >> 12; - - /* Directly record to MTPT table firstly 7 entry */ - if (i >= HNS_ROCE_MAX_INNER_MTPT_NUM) - break; - i++; + count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages, + ARRAY_SIZE(pages), &pbl_ba); + if (count < 1) { + ibdev_err(ibdev, "failed to find PBL mtr, count = %d.", count); + return -ENOBUFS; } /* Register user mr */ - for (i = 0; i < HNS_ROCE_MAX_INNER_MTPT_NUM; i++) { + for (i = 0; i < count; i++) { switch (i) { case 0: mpt_entry->pa0_l = cpu_to_le32((u32)(pages[i])); @@ -1959,13 +1946,9 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, } } - free_page((unsigned long) pages); - - mpt_entry->pbl_addr_l = cpu_to_le32((u32)(mr->pbl_dma_addr)); - + mpt_entry->pbl_addr_l = cpu_to_le32(pbl_ba); roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M, - MPT_BYTE_12_PBL_ADDR_H_S, - ((u32)(mr->pbl_dma_addr >> 32))); + MPT_BYTE_12_PBL_ADDR_H_S, upper_32_bits(pbl_ba)); return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 2a8c3893bccd..e699932e6926 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -95,6 +95,7 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, { struct hns_roce_mr *mr = to_hr_mr(wr->mr); struct hns_roce_wqe_frmr_seg *fseg = wqe; + u64 pbl_ba; /* use ib_access_flags */ roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S, @@ -109,19 +110,20 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0); /* Data structure reuse may lead to confusion */ - rc_sq_wqe->msg_len = cpu_to_le32(mr->pbl_ba & 0xffffffff); - rc_sq_wqe->inv_key = cpu_to_le32(mr->pbl_ba >> 32); + pbl_ba = mr->pbl_mtr.hem_cfg.root_ba; + rc_sq_wqe->msg_len = cpu_to_le32(lower_32_bits(pbl_ba)); + rc_sq_wqe->inv_key = cpu_to_le32(upper_32_bits(pbl_ba)); rc_sq_wqe->byte_16 = cpu_to_le32(wr->mr->length & 0xffffffff); rc_sq_wqe->byte_20 = cpu_to_le32(wr->mr->length >> 32); rc_sq_wqe->rkey = cpu_to_le32(wr->key); rc_sq_wqe->va = cpu_to_le64(wr->mr->iova); - fseg->pbl_size = cpu_to_le32(mr->pbl_size); + fseg->pbl_size = cpu_to_le32(mr->npages); roce_set_field(fseg->mode_buf_pg_sz, V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M, V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S, - mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); roce_set_bit(fseg->mode_buf_pg_sz, V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0); } @@ -2439,32 +2441,30 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, static int set_mtpt_pbl(struct hns_roce_v2_mpt_entry *mpt_entry, struct hns_roce_mr *mr) { - struct sg_dma_page_iter sg_iter; - u64 page_addr; - u64 *pages; - int i; + struct hns_roce_dev *hr_dev = to_hr_dev(mr->ibmr.device); + u64 pages[HNS_ROCE_V2_MAX_INNER_MTPT_NUM] = { 0 }; + struct ib_device *ibdev = &hr_dev->ib_dev; + dma_addr_t pbl_ba; + int i, count; - mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size); - mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3)); + count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages, + ARRAY_SIZE(pages), &pbl_ba); + if (count < 1) { + ibdev_err(ibdev, "failed to find PBL mtr, count = %d.\n", + count); + return -ENOBUFS; + } + + /* Aligned to the hardware address access unit */ + for (i = 0; i < count; i++) + pages[i] >>= 6; + + mpt_entry->pbl_size = cpu_to_le32(mr->npages); + mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3); roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M, V2_MPT_BYTE_48_PBL_BA_H_S, - upper_32_bits(mr->pbl_ba >> 3)); + upper_32_bits(pbl_ba >> 3)); - pages = (u64 *)__get_free_page(GFP_KERNEL); - if (!pages) - return -ENOMEM; - - i = 0; - for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) { - page_addr = sg_page_iter_dma_address(&sg_iter); - pages[i] = page_addr >> 6; - - /* Record the first 2 entry directly to MTPT table */ - if (i >= HNS_ROCE_V2_MAX_INNER_MTPT_NUM - 1) - goto found; - i++; - } -found: mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0])); roce_set_field(mpt_entry->byte_56_pa0_h, V2_MPT_BYTE_56_PA0_H_M, V2_MPT_BYTE_56_PA0_H_S, upper_32_bits(pages[0])); @@ -2475,9 +2475,7 @@ static int set_mtpt_pbl(struct hns_roce_v2_mpt_entry *mpt_entry, roce_set_field(mpt_entry->byte_64_buf_pa1, V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, - mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET); - - free_page((unsigned long)pages); + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); return 0; } @@ -2499,7 +2497,7 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, - mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, V2_MPT_BYTE_4_PD_S, mr->pd); @@ -2585,11 +2583,19 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr) { + struct hns_roce_dev *hr_dev = to_hr_dev(mr->ibmr.device); + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_v2_mpt_entry *mpt_entry; + dma_addr_t pbl_ba = 0; mpt_entry = mb_buf; memset(mpt_entry, 0, sizeof(*mpt_entry)); + if (hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, NULL, 0, &pbl_ba) < 0) { + ibdev_err(ibdev, "failed to find frmr mtr.\n"); + return -ENOBUFS; + } + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE); roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M, @@ -2597,7 +2603,7 @@ static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr) roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, - mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, V2_MPT_BYTE_4_PD_S, mr->pd); @@ -2610,17 +2616,17 @@ static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr) roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0); roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1); - mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size); + mpt_entry->pbl_size = cpu_to_le32(mr->npages); - mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3)); + mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >> 3)); roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M, V2_MPT_BYTE_48_PBL_BA_H_S, - upper_32_bits(mr->pbl_ba >> 3)); + upper_32_bits(pbl_ba >> 3)); roce_set_field(mpt_entry->byte_64_buf_pa1, V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, - mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 99e3876e712c..c65f1f682819 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -293,418 +293,89 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt) } } -static void hns_roce_loop_free(struct hns_roce_dev *hr_dev, - struct hns_roce_mr *mr, int err_loop_index, - int loop_i, int loop_j) +static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, + u32 pd, u64 iova, u64 size, u32 access) { - struct device *dev = hr_dev->dev; - u32 mhop_num; - u32 pbl_bt_sz; - u64 bt_idx; - int i, j; - - pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); - mhop_num = hr_dev->caps.pbl_hop_num; - - i = loop_i; - if (mhop_num == 3 && err_loop_index == 2) { - for (; i >= 0; i--) { - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], - mr->pbl_l1_dma_addr[i]); - - for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) { - if (i == loop_i && j >= loop_j) - break; - - bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j; - dma_free_coherent(dev, pbl_bt_sz, - mr->pbl_bt_l2[bt_idx], - mr->pbl_l2_dma_addr[bt_idx]); - } - } - } else if (mhop_num == 3 && err_loop_index == 1) { - for (i -= 1; i >= 0; i--) { - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], - mr->pbl_l1_dma_addr[i]); - - for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) { - bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j; - dma_free_coherent(dev, pbl_bt_sz, - mr->pbl_bt_l2[bt_idx], - mr->pbl_l2_dma_addr[bt_idx]); - } - } - } else if (mhop_num == 2 && err_loop_index == 1) { - for (i -= 1; i >= 0; i--) - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], - mr->pbl_l1_dma_addr[i]); - } else { - dev_warn(dev, "not support: mhop_num=%d, err_loop_index=%d.", - mhop_num, err_loop_index); - return; - } - - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, mr->pbl_l0_dma_addr); - mr->pbl_bt_l0 = NULL; - mr->pbl_l0_dma_addr = 0; -} -static int pbl_1hop_alloc(struct hns_roce_dev *hr_dev, int npages, - struct hns_roce_mr *mr, u32 pbl_bt_sz) -{ - struct device *dev = hr_dev->dev; - - if (npages > pbl_bt_sz / 8) { - dev_err(dev, "npages %d is larger than buf_pg_sz!", - npages); - return -EINVAL; - } - mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, - &(mr->pbl_dma_addr), - GFP_KERNEL); - if (!mr->pbl_buf) - return -ENOMEM; - - mr->pbl_size = npages; - mr->pbl_ba = mr->pbl_dma_addr; - mr->pbl_hop_num = 1; - mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; - mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; - return 0; - -} - - -static int pbl_2hop_alloc(struct hns_roce_dev *hr_dev, int npages, - struct hns_roce_mr *mr, u32 pbl_bt_sz) -{ - struct device *dev = hr_dev->dev; - int npages_allocated; - u64 pbl_last_bt_num; - u64 pbl_bt_cnt = 0; - u64 size; - int i; - - pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8); - - /* alloc L1 BT */ - for (i = 0; i < pbl_bt_sz / 8; i++) { - if (pbl_bt_cnt + 1 < pbl_last_bt_num) { - size = pbl_bt_sz; - } else { - npages_allocated = i * (pbl_bt_sz / 8); - size = (npages - npages_allocated) * 8; - } - mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size, - &(mr->pbl_l1_dma_addr[i]), - GFP_KERNEL); - if (!mr->pbl_bt_l1[i]) { - hns_roce_loop_free(hr_dev, mr, 1, i, 0); - return -ENOMEM; - } - - *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; - - pbl_bt_cnt++; - if (pbl_bt_cnt >= pbl_last_bt_num) - break; - } - - mr->l0_chunk_last_num = i + 1; - - return 0; -} - -static int pbl_3hop_alloc(struct hns_roce_dev *hr_dev, int npages, - struct hns_roce_mr *mr, u32 pbl_bt_sz) -{ - struct device *dev = hr_dev->dev; - int mr_alloc_done = 0; - int npages_allocated; - u64 pbl_last_bt_num; - u64 pbl_bt_cnt = 0; - u64 bt_idx; - u64 size; - int i; - int j = 0; - - pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8); - - mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num, - sizeof(*mr->pbl_l2_dma_addr), - GFP_KERNEL); - if (!mr->pbl_l2_dma_addr) - return -ENOMEM; - - mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num, - sizeof(*mr->pbl_bt_l2), - GFP_KERNEL); - if (!mr->pbl_bt_l2) - goto err_kcalloc_bt_l2; - - /* alloc L1, L2 BT */ - for (i = 0; i < pbl_bt_sz / 8; i++) { - mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz, - &(mr->pbl_l1_dma_addr[i]), - GFP_KERNEL); - if (!mr->pbl_bt_l1[i]) { - hns_roce_loop_free(hr_dev, mr, 1, i, 0); - goto err_dma_alloc_l0; - } - - *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; - - for (j = 0; j < pbl_bt_sz / 8; j++) { - bt_idx = i * pbl_bt_sz / 8 + j; - - if (pbl_bt_cnt + 1 < pbl_last_bt_num) { - size = pbl_bt_sz; - } else { - npages_allocated = bt_idx * - (pbl_bt_sz / 8); - size = (npages - npages_allocated) * 8; - } - mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent( - dev, size, - &(mr->pbl_l2_dma_addr[bt_idx]), - GFP_KERNEL); - if (!mr->pbl_bt_l2[bt_idx]) { - hns_roce_loop_free(hr_dev, mr, 2, i, j); - goto err_dma_alloc_l0; - } - - *(mr->pbl_bt_l1[i] + j) = - mr->pbl_l2_dma_addr[bt_idx]; - - pbl_bt_cnt++; - if (pbl_bt_cnt >= pbl_last_bt_num) { - mr_alloc_done = 1; - break; - } - } - - if (mr_alloc_done) - break; - } - - mr->l0_chunk_last_num = i + 1; - mr->l1_chunk_last_num = j + 1; - - - return 0; - -err_dma_alloc_l0: - kfree(mr->pbl_bt_l2); - mr->pbl_bt_l2 = NULL; - -err_kcalloc_bt_l2: - kfree(mr->pbl_l2_dma_addr); - mr->pbl_l2_dma_addr = NULL; - - return -ENOMEM; -} - - -/* PBL multi hop addressing */ -static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages, - struct hns_roce_mr *mr) -{ - struct device *dev = hr_dev->dev; - u32 pbl_bt_sz; - u32 mhop_num; - - mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num); - pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); - - if (mhop_num == HNS_ROCE_HOP_NUM_0) - return 0; - - if (mhop_num == 1) - return pbl_1hop_alloc(hr_dev, npages, mr, pbl_bt_sz); - - mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8, - sizeof(*mr->pbl_l1_dma_addr), - GFP_KERNEL); - if (!mr->pbl_l1_dma_addr) - return -ENOMEM; - - mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1), - GFP_KERNEL); - if (!mr->pbl_bt_l1) - goto err_kcalloc_bt_l1; - - /* alloc L0 BT */ - mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz, - &(mr->pbl_l0_dma_addr), - GFP_KERNEL); - if (!mr->pbl_bt_l0) - goto err_kcalloc_l2_dma; - - if (mhop_num == 2) { - if (pbl_2hop_alloc(hr_dev, npages, mr, pbl_bt_sz)) - goto err_kcalloc_l2_dma; - } - - if (mhop_num == 3) { - if (pbl_3hop_alloc(hr_dev, npages, mr, pbl_bt_sz)) - goto err_kcalloc_l2_dma; - } - - - mr->pbl_size = npages; - mr->pbl_ba = mr->pbl_l0_dma_addr; - mr->pbl_hop_num = hr_dev->caps.pbl_hop_num; - mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; - mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; - - return 0; - -err_kcalloc_l2_dma: - kfree(mr->pbl_bt_l1); - mr->pbl_bt_l1 = NULL; - -err_kcalloc_bt_l1: - kfree(mr->pbl_l1_dma_addr); - mr->pbl_l1_dma_addr = NULL; - - return -ENOMEM; -} - -static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, - u64 size, u32 access, int npages, - struct hns_roce_mr *mr) -{ - struct device *dev = hr_dev->dev; - unsigned long index = 0; - int ret; + struct ib_device *ibdev = &hr_dev->ib_dev; + unsigned long obj = 0; + int err; /* Allocate a key for mr from mr_table */ - ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index); - if (ret) + err = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &obj); + if (err) { + ibdev_err(ibdev, + "failed to alloc bitmap for MR key, ret = %d.\n", + err); return -ENOMEM; + } mr->iova = iova; /* MR va starting addr */ mr->size = size; /* MR addr range */ mr->pd = pd; /* MR num */ mr->access = access; /* MR access permit */ mr->enabled = 0; /* MR active status */ - mr->key = hw_index_to_key(index); /* MR key */ + mr->key = hw_index_to_key(obj); /* MR key */ - if (size == ~0ull) { - mr->pbl_buf = NULL; - mr->pbl_dma_addr = 0; - /* PBL multi-hop addressing parameters */ - mr->pbl_bt_l2 = NULL; - mr->pbl_bt_l1 = NULL; - mr->pbl_bt_l0 = NULL; - mr->pbl_l2_dma_addr = NULL; - mr->pbl_l1_dma_addr = NULL; - mr->pbl_l0_dma_addr = 0; - } else { - if (!hr_dev->caps.pbl_hop_num) { - mr->pbl_buf = dma_alloc_coherent(dev, - npages * BA_BYTE_LEN, - &(mr->pbl_dma_addr), - GFP_KERNEL); - if (!mr->pbl_buf) - return -ENOMEM; - } else { - ret = hns_roce_mhop_alloc(hr_dev, npages, mr); - } + err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table, obj); + if (err) { + ibdev_err(ibdev, "failed to alloc mtpt, ret = %d.\n", err); + goto err_free_bitmap; } - return ret; + return 0; +err_free_bitmap: + hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, obj, BITMAP_NO_RR); + return err; } -static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev, - struct hns_roce_mr *mr) +static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { - struct device *dev = hr_dev->dev; - int npages_allocated; - int npages; - int i, j; - u32 pbl_bt_sz; - u32 mhop_num; - u64 bt_idx; + unsigned long obj = key_to_hw_index(mr->key); - npages = mr->pbl_size; - pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); - mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num; + hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, obj); + hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, obj, BITMAP_NO_RR); +} - if (mhop_num == HNS_ROCE_HOP_NUM_0) - return; +static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, + size_t length, struct ib_udata *udata, u64 start, + int access) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + bool is_fast = mr->type == MR_TYPE_FRMR; + struct hns_roce_buf_attr buf_attr = {}; + int err; - if (mhop_num == 1) { - dma_free_coherent(dev, (unsigned int)(npages * BA_BYTE_LEN), - mr->pbl_buf, mr->pbl_dma_addr); - return; - } + mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num; + buf_attr.page_shift = is_fast ? PAGE_SHIFT : + hr_dev->caps.pbl_buf_pg_sz + PAGE_ADDR_SHIFT; + buf_attr.region[0].size = length; + buf_attr.region[0].hopnum = mr->pbl_hop_num; + buf_attr.region_count = 1; + buf_attr.fixed_page = true; + buf_attr.user_access = access; + /* fast MR's buffer is alloced before mapping, not at creation */ + buf_attr.mtt_only = is_fast; - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, - mr->pbl_l0_dma_addr); + err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr, + hr_dev->caps.pbl_ba_pg_sz + PAGE_ADDR_SHIFT, + udata, start); + if (err) + ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err); + else + mr->npages = mr->pbl_mtr.hem_cfg.buf_pg_count; - if (mhop_num == 2) { - for (i = 0; i < mr->l0_chunk_last_num; i++) { - if (i == mr->l0_chunk_last_num - 1) { - npages_allocated = - i * (pbl_bt_sz / BA_BYTE_LEN); + return err; +} - dma_free_coherent(dev, - (npages - npages_allocated) * BA_BYTE_LEN, - mr->pbl_bt_l1[i], - mr->pbl_l1_dma_addr[i]); - - break; - } - - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], - mr->pbl_l1_dma_addr[i]); - } - } else if (mhop_num == 3) { - for (i = 0; i < mr->l0_chunk_last_num; i++) { - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], - mr->pbl_l1_dma_addr[i]); - - for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) { - bt_idx = i * (pbl_bt_sz / BA_BYTE_LEN) + j; - - if ((i == mr->l0_chunk_last_num - 1) - && j == mr->l1_chunk_last_num - 1) { - npages_allocated = bt_idx * - (pbl_bt_sz / BA_BYTE_LEN); - - dma_free_coherent(dev, - (npages - npages_allocated) * - BA_BYTE_LEN, - mr->pbl_bt_l2[bt_idx], - mr->pbl_l2_dma_addr[bt_idx]); - - break; - } - - dma_free_coherent(dev, pbl_bt_sz, - mr->pbl_bt_l2[bt_idx], - mr->pbl_l2_dma_addr[bt_idx]); - } - } - } - - kfree(mr->pbl_bt_l1); - kfree(mr->pbl_l1_dma_addr); - mr->pbl_bt_l1 = NULL; - mr->pbl_l1_dma_addr = NULL; - if (mhop_num == 3) { - kfree(mr->pbl_bt_l2); - kfree(mr->pbl_l2_dma_addr); - mr->pbl_bt_l2 = NULL; - mr->pbl_l2_dma_addr = NULL; - } +static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) +{ + hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr); } static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { - struct device *dev = hr_dev->dev; - int npages = 0; + struct ib_device *ibdev = &hr_dev->ib_dev; int ret; if (mr->enabled) { @@ -712,27 +383,12 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1)); if (ret) - dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret); + ibdev_warn(ibdev, "failed to destroy mpt, ret = %d.\n", + ret); } - if (mr->size != ~0ULL) { - if (mr->type == MR_TYPE_MR) - npages = ib_umem_page_count(mr->umem); - - if (!hr_dev->caps.pbl_hop_num) - dma_free_coherent(dev, - (unsigned int)(npages * BA_BYTE_LEN), - mr->pbl_buf, mr->pbl_dma_addr); - else - hns_roce_mhop_free(hr_dev, mr); - } - - if (mr->enabled) - hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, - key_to_hw_index(mr->key)); - - hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, - key_to_hw_index(mr->key), BITMAP_NO_RR); + free_mr_pbl(hr_dev, mr); + free_mr_key(hr_dev, mr); } static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, @@ -742,18 +398,12 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, unsigned long mtpt_idx = key_to_hw_index(mr->key); struct device *dev = hr_dev->dev; struct hns_roce_cmd_mailbox *mailbox; - struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; - - /* Prepare HEM entry memory */ - ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx); - if (ret) - return ret; /* Allocate mailbox memory */ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) { ret = PTR_ERR(mailbox); - goto err_table; + return ret; } if (mr->type != MR_TYPE_FRMR) @@ -780,8 +430,6 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, err_page: hns_roce_free_cmd_mailbox(hr_dev, mailbox); -err_table: - hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx); return ret; } @@ -982,18 +630,19 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev) struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) { + struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct hns_roce_mr *mr; int ret; - mr = kmalloc(sizeof(*mr), GFP_KERNEL); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (mr == NULL) return ERR_PTR(-ENOMEM); mr->type = MR_TYPE_DMA; /* Allocate memory region key */ - ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0, - ~0ULL, acc, 0, mr); + hns_roce_hem_list_init(&mr->pbl_mtr.hem_list); + ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, 0, acc); if (ret) goto err_free; @@ -1002,12 +651,10 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) goto err_mr; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; - mr->umem = NULL; return &mr->ibmr; - err_mr: - hns_roce_mr_free(to_hr_dev(pd->device), mr); + free_mr_key(hr_dev, mr); err_free: kfree(mr); @@ -1085,120 +732,41 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, return ret; } -static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev, - struct hns_roce_mr *mr, - struct ib_umem *umem) -{ - struct sg_dma_page_iter sg_iter; - int i = 0, j = 0; - u64 page_addr; - u32 pbl_bt_sz; - - if (hr_dev->caps.pbl_hop_num == HNS_ROCE_HOP_NUM_0) - return 0; - - pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); - for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { - page_addr = sg_page_iter_dma_address(&sg_iter); - if (!hr_dev->caps.pbl_hop_num) { - /* for hip06, page addr is aligned to 4K */ - mr->pbl_buf[i++] = page_addr >> 12; - } else if (hr_dev->caps.pbl_hop_num == 1) { - mr->pbl_buf[i++] = page_addr; - } else { - if (hr_dev->caps.pbl_hop_num == 2) - mr->pbl_bt_l1[i][j] = page_addr; - else if (hr_dev->caps.pbl_hop_num == 3) - mr->pbl_bt_l2[i][j] = page_addr; - - j++; - if (j >= (pbl_bt_sz / BA_BYTE_LEN)) { - i++; - j = 0; - } - } - } - - /* Memory barrier */ - mb(); - - return 0; -} - struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); - struct device *dev = hr_dev->dev; struct hns_roce_mr *mr; - int bt_size; int ret; - int n; - int i; - mr = kmalloc(sizeof(*mr), GFP_KERNEL); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(pd->device, start, length, access_flags); - if (IS_ERR(mr->umem)) { - ret = PTR_ERR(mr->umem); - goto err_free; - } - - n = ib_umem_page_count(mr->umem); - - if (!hr_dev->caps.pbl_hop_num) { - if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) { - dev_err(dev, - " MR len %lld err. MR is limited to 4G at most!\n", - length); - ret = -EINVAL; - goto err_umem; - } - } else { - u64 pbl_size = 1; - - bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / - BA_BYTE_LEN; - for (i = 0; i < hr_dev->caps.pbl_hop_num; i++) - pbl_size *= bt_size; - if (n > pbl_size) { - dev_err(dev, - " MR len %lld err. MR page num is limited to %lld!\n", - length, pbl_size); - ret = -EINVAL; - goto err_umem; - } - } - mr->type = MR_TYPE_MR; - - ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length, - access_flags, n, mr); + ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, virt_addr, length, + access_flags); if (ret) - goto err_umem; + goto err_alloc_mr; - ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem); + ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, access_flags); if (ret) - goto err_mr; + goto err_alloc_key; ret = hns_roce_mr_enable(hr_dev, mr); if (ret) - goto err_mr; + goto err_alloc_pbl; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; return &mr->ibmr; -err_mr: - hns_roce_mr_free(hr_dev, mr); - -err_umem: - ib_umem_release(mr->umem); - -err_free: +err_alloc_pbl: + free_mr_pbl(hr_dev, mr); +err_alloc_key: + free_mr_key(hr_dev, mr); +err_alloc_mr: kfree(mr); return ERR_PTR(ret); } @@ -1210,84 +778,36 @@ static int rereg_mr_trans(struct ib_mr *ibmr, int flags, u32 pdn, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_mr *mr = to_hr_mr(ibmr); - struct device *dev = hr_dev->dev; - int npages; int ret; - if (mr->size != ~0ULL) { - npages = ib_umem_page_count(mr->umem); - - if (hr_dev->caps.pbl_hop_num) - hns_roce_mhop_free(hr_dev, mr); - else - dma_free_coherent(dev, npages * 8, - mr->pbl_buf, mr->pbl_dma_addr); - } - ib_umem_release(mr->umem); - - mr->umem = ib_umem_get(ibmr->device, start, length, mr_access_flags); - if (IS_ERR(mr->umem)) { - ret = PTR_ERR(mr->umem); - mr->umem = NULL; - return -ENOMEM; - } - npages = ib_umem_page_count(mr->umem); - - if (hr_dev->caps.pbl_hop_num) { - ret = hns_roce_mhop_alloc(hr_dev, npages, mr); - if (ret) - goto release_umem; - } else { - mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, - &(mr->pbl_dma_addr), - GFP_KERNEL); - if (!mr->pbl_buf) { - ret = -ENOMEM; - goto release_umem; - } + free_mr_pbl(hr_dev, mr); + ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, mr_access_flags); + if (ret) { + ibdev_err(ibdev, "failed to create mr PBL, ret = %d.\n", ret); + return ret; } ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, mr_access_flags, virt_addr, length, mailbox->buf); - if (ret) - goto release_umem; - - - ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem); if (ret) { - if (mr->size != ~0ULL) { - npages = ib_umem_page_count(mr->umem); - - if (hr_dev->caps.pbl_hop_num) - hns_roce_mhop_free(hr_dev, mr); - else - dma_free_coherent(dev, npages * 8, - mr->pbl_buf, - mr->pbl_dma_addr); - } - - goto release_umem; + ibdev_err(ibdev, "failed to write mtpt, ret = %d.\n", ret); + free_mr_pbl(hr_dev, mr); } - return 0; - -release_umem: - ib_umem_release(mr->umem); return ret; - } - int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); + struct ib_device *ib_dev = &hr_dev->ib_dev; struct hns_roce_mr *mr = to_hr_mr(ibmr); struct hns_roce_cmd_mailbox *mailbox; - struct device *dev = hr_dev->dev; unsigned long mtpt_idx; u32 pdn = 0; int ret; @@ -1308,7 +828,7 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx); if (ret) - dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret); + ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret); mr->enabled = 0; @@ -1332,8 +852,7 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx); if (ret) { - dev_err(dev, "CREATE_MPT failed (%d)\n", ret); - ib_umem_release(mr->umem); + ibdev_err(ib_dev, "failed to create MPT, ret = %d.\n", ret); goto free_cmd_mbox; } @@ -1361,8 +880,6 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) ret = hr_dev->hw->dereg_mr(hr_dev, mr, udata); } else { hns_roce_mr_free(hr_dev, mr); - - ib_umem_release(mr->umem); kfree(mr); } @@ -1376,12 +893,8 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, struct device *dev = hr_dev->dev; struct hns_roce_mr *mr; u64 length; - u32 page_size; int ret; - page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT); - length = max_num_sg * page_size; - if (mr_type != IB_MR_TYPE_MEM_REG) return ERR_PTR(-EINVAL); @@ -1398,23 +911,27 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, mr->type = MR_TYPE_FRMR; /* Allocate memory region key */ - ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length, - 0, max_num_sg, mr); + length = max_num_sg * (1 << PAGE_SHIFT); + ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, length, 0); if (ret) goto err_free; + ret = alloc_mr_pbl(hr_dev, mr, length, NULL, 0, 0); + if (ret) + goto err_key; + ret = hns_roce_mr_enable(hr_dev, mr); if (ret) - goto err_mr; + goto err_pbl; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; - mr->umem = NULL; return &mr->ibmr; -err_mr: - hns_roce_mr_free(to_hr_dev(pd->device), mr); - +err_key: + free_mr_key(hr_dev, mr); +err_pbl: + free_mr_pbl(hr_dev, mr); err_free: kfree(mr); return ERR_PTR(ret); @@ -1424,19 +941,54 @@ static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr) { struct hns_roce_mr *mr = to_hr_mr(ibmr); - mr->pbl_buf[mr->npages++] = addr; + if (likely(mr->npages < mr->pbl_mtr.hem_cfg.buf_pg_count)) { + mr->page_list[mr->npages++] = addr; + return 0; + } - return 0; + return -ENOBUFS; } int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) { + struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_mr *mr = to_hr_mr(ibmr); + struct hns_roce_buf_region region = {}; + int ret = 0; mr->npages = 0; + mr->page_list = kvcalloc(mr->pbl_mtr.hem_cfg.buf_pg_count, + sizeof(dma_addr_t), GFP_KERNEL); + if (!mr->page_list) + return ret; - return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page); + ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page); + if (ret < 1) { + ibdev_err(ibdev, "failed to store sg pages %d %d, cnt = %d.\n", + mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret); + goto err_page_list; + } + + region.offset = 0; + region.count = mr->npages; + region.hopnum = mr->pbl_hop_num; + ret = hns_roce_mtr_map(hr_dev, &mr->pbl_mtr, ®ion, 1, mr->page_list, + mr->npages); + if (ret) { + ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret); + ret = 0; + } else { + mr->pbl_mtr.hem_cfg.buf_pg_shift = ilog2(ibmr->page_size); + ret = mr->npages; + } + +err_page_list: + kvfree(mr->page_list); + mr->page_list = NULL; + + return ret; } static void hns_roce_mw_free(struct hns_roce_dev *hr_dev, From 2929c40f08a9654c4c8e35ad0a36d611deb61394 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Tue, 28 Apr 2020 19:03:40 +0800 Subject: [PATCH 230/562] RDMA/hns: Remove unused MTT functions The MTT (Memory Translate Table) interface is no longer used to configure the buffer address to BT (Base Address Table) that requires driver mapping. Because the MTT is not compatible with multi-hop addressing of the hip08, it is replaced by MTR (Memory Translate Region) interface, and all the MTT functions should be removed. Link: https://lore.kernel.org/r/1588071823-40200-3-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_alloc.c | 43 -- drivers/infiniband/hw/hns/hns_roce_device.h | 64 --- drivers/infiniband/hw/hns/hns_roce_hem.c | 105 ---- drivers/infiniband/hw/hns/hns_roce_hem.h | 6 - drivers/infiniband/hw/hns/hns_roce_main.c | 70 +-- drivers/infiniband/hw/hns/hns_roce_mr.c | 521 -------------------- 6 files changed, 2 insertions(+), 807 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index e04e7596d979..365e7db6c498 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -283,49 +283,6 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, return total; } -void hns_roce_init_buf_region(struct hns_roce_buf_region *region, int hopnum, - int offset, int buf_cnt) -{ - if (hopnum == HNS_ROCE_HOP_NUM_0) - region->hopnum = 0; - else - region->hopnum = hopnum; - - region->offset = offset; - region->count = buf_cnt; -} - -void hns_roce_free_buf_list(dma_addr_t **bufs, int region_cnt) -{ - int i; - - for (i = 0; i < region_cnt; i++) { - kfree(bufs[i]); - bufs[i] = NULL; - } -} - -int hns_roce_alloc_buf_list(struct hns_roce_buf_region *regions, - dma_addr_t **bufs, int region_cnt) -{ - struct hns_roce_buf_region *r; - int i; - - for (i = 0; i < region_cnt; i++) { - r = ®ions[i]; - bufs[i] = kcalloc(r->count, sizeof(dma_addr_t), GFP_KERNEL); - if (!bufs[i]) - goto err_alloc; - } - - return 0; - -err_alloc: - hns_roce_free_buf_list(bufs, i); - - return -ENOMEM; -} - void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev) { if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1089db8ad0b5..da3850aeeb14 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -222,13 +222,6 @@ enum { HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), }; -enum hns_roce_mtt_type { - MTT_TYPE_WQE, - MTT_TYPE_CQE, - MTT_TYPE_SRQWQE, - MTT_TYPE_IDX -}; - #define HNS_ROCE_DB_TYPE_COUNT 2 #define HNS_ROCE_DB_UNIT_SIZE 4 @@ -267,8 +260,6 @@ enum { #define HNS_ROCE_PORT_DOWN 0 #define HNS_ROCE_PORT_UP 1 -#define HNS_ROCE_MTT_ENTRY_PER_SEG 8 - #define PAGE_ADDR_SHIFT 12 /* The minimum page count for hardware access page directly. */ @@ -303,22 +294,6 @@ struct hns_roce_bitmap { unsigned long *table; }; -/* Order bitmap length -- bit num compute formula: 1 << (max_order - order) */ -/* Order = 0: bitmap is biggest, order = max bitmap is least (only a bit) */ -/* Every bit repesent to a partner free/used status in bitmap */ -/* - * Initial, bits of other bitmap are all 0 except that a bit of max_order is 1 - * Bit = 1 represent to idle and available; bit = 0: not available - */ -struct hns_roce_buddy { - /* Members point to every order level bitmap */ - unsigned long **bits; - /* Represent to avail bits of the order level bitmap */ - u32 *num_free; - int max_order; - spinlock_t lock; -}; - /* For Hardware Entry Memory */ struct hns_roce_hem_table { /* HEM type: 0 = qpc, 1 = mtt, 2 = cqc, 3 = srq, 4 = other */ @@ -339,13 +314,6 @@ struct hns_roce_hem_table { dma_addr_t *bt_l0_dma_addr; }; -struct hns_roce_mtt { - unsigned long first_seg; - int order; - int page_shift; - enum hns_roce_mtt_type mtt_type; -}; - struct hns_roce_buf_region { int offset; /* page offset */ u32 count; /* page count */ @@ -418,15 +386,7 @@ struct hns_roce_mr { struct hns_roce_mr_table { struct hns_roce_bitmap mtpt_bitmap; - struct hns_roce_buddy mtt_buddy; - struct hns_roce_hem_table mtt_table; struct hns_roce_hem_table mtpt_table; - struct hns_roce_buddy mtt_cqe_buddy; - struct hns_roce_hem_table mtt_cqe_table; - struct hns_roce_buddy mtt_srqwqe_buddy; - struct hns_roce_hem_table mtt_srqwqe_table; - struct hns_roce_buddy mtt_idx_buddy; - struct hns_roce_hem_table mtt_idx_table; }; struct hns_roce_wq { @@ -1141,21 +1101,6 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev); void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev); -int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift, - struct hns_roce_mtt *mtt); -void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt); -int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, struct hns_roce_buf *buf); - -void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift, - int buf_pg_shift); -int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t **bufs, struct hns_roce_buf_region *regions, - int region_cnt); -void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev, - struct hns_roce_mtr *mtr); - /* hns roce hw need current block and next block addr from mtt */ #define MTT_MIN_COUNT 2 int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, @@ -1228,15 +1173,6 @@ void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf); int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, struct hns_roce_buf *buf, u32 page_shift); -int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, struct ib_umem *umem); - -void hns_roce_init_buf_region(struct hns_roce_buf_region *region, int hopnum, - int offset, int buf_cnt); -int hns_roce_alloc_buf_list(struct hns_roce_buf_region *regions, - dma_addr_t **bufs, int count); -void hns_roce_free_buf_list(dma_addr_t **bufs, int count); - int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int buf_cnt, int start, struct hns_roce_buf *buf); int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index a245e753afe9..37d101eec181 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -75,18 +75,6 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) case HEM_TYPE_CQC_TIMER: hop_num = hr_dev->caps.cqc_timer_hop_num; break; - case HEM_TYPE_CQE: - hop_num = hr_dev->caps.cqe_hop_num; - break; - case HEM_TYPE_MTT: - hop_num = hr_dev->caps.mtt_hop_num; - break; - case HEM_TYPE_SRQWQE: - hop_num = hr_dev->caps.srqwqe_hop_num; - break; - case HEM_TYPE_IDX: - hop_num = hr_dev->caps.idx_hop_num; - break; default: return false; } @@ -195,38 +183,6 @@ static int get_hem_table_config(struct hns_roce_dev *hr_dev, mhop->ba_l0_num = hr_dev->caps.srqc_bt_num; mhop->hop_num = hr_dev->caps.srqc_hop_num; break; - case HEM_TYPE_MTT: - mhop->buf_chunk_size = 1 << (hr_dev->caps.mtt_buf_pg_sz - + PAGE_SHIFT); - mhop->bt_chunk_size = 1 << (hr_dev->caps.mtt_ba_pg_sz - + PAGE_SHIFT); - mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN; - mhop->hop_num = hr_dev->caps.mtt_hop_num; - break; - case HEM_TYPE_CQE: - mhop->buf_chunk_size = 1 << (hr_dev->caps.cqe_buf_pg_sz - + PAGE_SHIFT); - mhop->bt_chunk_size = 1 << (hr_dev->caps.cqe_ba_pg_sz - + PAGE_SHIFT); - mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN; - mhop->hop_num = hr_dev->caps.cqe_hop_num; - break; - case HEM_TYPE_SRQWQE: - mhop->buf_chunk_size = 1 << (hr_dev->caps.srqwqe_buf_pg_sz - + PAGE_SHIFT); - mhop->bt_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz - + PAGE_SHIFT); - mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN; - mhop->hop_num = hr_dev->caps.srqwqe_hop_num; - break; - case HEM_TYPE_IDX: - mhop->buf_chunk_size = 1 << (hr_dev->caps.idx_buf_pg_sz - + PAGE_SHIFT); - mhop->bt_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz - + PAGE_SHIFT); - mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN; - mhop->hop_num = hr_dev->caps.idx_hop_num; - break; default: dev_err(dev, "Table %d not support multi-hop addressing!\n", type); @@ -899,57 +855,6 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, return addr; } -int hns_roce_table_get_range(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, - unsigned long start, unsigned long end) -{ - struct hns_roce_hem_mhop mhop; - unsigned long inc = table->table_chunk_size / table->obj_size; - unsigned long i = 0; - int ret; - - if (hns_roce_check_whether_mhop(hr_dev, table->type)) { - ret = hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop); - if (ret) - goto fail; - inc = mhop.bt_chunk_size / table->obj_size; - } - - /* Allocate MTT entry memory according to chunk(128K) */ - for (i = start; i <= end; i += inc) { - ret = hns_roce_table_get(hr_dev, table, i); - if (ret) - goto fail; - } - - return 0; - -fail: - while (i > start) { - i -= inc; - hns_roce_table_put(hr_dev, table, i); - } - return ret; -} - -void hns_roce_table_put_range(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, - unsigned long start, unsigned long end) -{ - struct hns_roce_hem_mhop mhop; - unsigned long inc = table->table_chunk_size / table->obj_size; - unsigned long i; - - if (hns_roce_check_whether_mhop(hr_dev, table->type)) { - if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop)) - return; - inc = mhop.bt_chunk_size / table->obj_size; - } - - for (i = start; i <= end; i += inc) - hns_roce_table_put(hr_dev, table, i); -} - int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, u32 type, unsigned long obj_size, unsigned long nobj, @@ -1112,12 +1017,6 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) { - if ((hr_dev->caps.num_idx_segs)) - hns_roce_cleanup_hem_table(hr_dev, - &hr_dev->mr_table.mtt_idx_table); - if (hr_dev->caps.num_srqwqe_segs) - hns_roce_cleanup_hem_table(hr_dev, - &hr_dev->mr_table.mtt_srqwqe_table); if (hr_dev->caps.srqc_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); @@ -1137,10 +1036,6 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - hns_roce_cleanup_hem_table(hr_dev, - &hr_dev->mr_table.mtt_cqe_table); - hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); } struct roce_hem_item { diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index a00b6c27735a..1fa0bdcb1989 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -115,12 +115,6 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev, void *hns_roce_table_find(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj, dma_addr_t *dma_handle); -int hns_roce_table_get_range(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, - unsigned long start, unsigned long end); -void hns_roce_table_put_range(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, - unsigned long start, unsigned long end); int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, u32 type, unsigned long obj_size, unsigned long nobj, diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index d0031d559213..fd3581efe9a8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -579,33 +579,12 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) int ret; struct device *dev = hr_dev->dev; - ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtt_table, - HEM_TYPE_MTT, hr_dev->caps.mtt_entry_sz, - hr_dev->caps.num_mtt_segs, 1); - if (ret) { - dev_err(dev, "Failed to init MTT context memory, aborting.\n"); - return ret; - } - - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) { - ret = hns_roce_init_hem_table(hr_dev, - &hr_dev->mr_table.mtt_cqe_table, - HEM_TYPE_CQE, - hr_dev->caps.mtt_entry_sz, - hr_dev->caps.num_cqe_segs, 1); - if (ret) { - dev_err(dev, - "Failed to init CQE context memory, aborting.\n"); - goto err_unmap_cqe; - } - } - ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table, HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz, hr_dev->caps.num_mtpts, 1); if (ret) { dev_err(dev, "Failed to init MTPT context memory, aborting.\n"); - goto err_unmap_mtt; + return ret; } ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.qp_table, @@ -660,32 +639,6 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) } } - if (hr_dev->caps.num_srqwqe_segs) { - ret = hns_roce_init_hem_table(hr_dev, - &hr_dev->mr_table.mtt_srqwqe_table, - HEM_TYPE_SRQWQE, - hr_dev->caps.mtt_entry_sz, - hr_dev->caps.num_srqwqe_segs, 1); - if (ret) { - dev_err(dev, - "Failed to init MTT srqwqe memory, aborting.\n"); - goto err_unmap_srq; - } - } - - if (hr_dev->caps.num_idx_segs) { - ret = hns_roce_init_hem_table(hr_dev, - &hr_dev->mr_table.mtt_idx_table, - HEM_TYPE_IDX, - hr_dev->caps.idx_entry_sz, - hr_dev->caps.num_idx_segs, 1); - if (ret) { - dev_err(dev, - "Failed to init MTT idx memory, aborting.\n"); - goto err_unmap_srqwqe; - } - } - if (hr_dev->caps.sccc_entry_sz) { ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.sccc_table, @@ -695,7 +648,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) if (ret) { dev_err(dev, "Failed to init SCC context memory, aborting.\n"); - goto err_unmap_idx; + goto err_unmap_srq; } } @@ -733,17 +686,6 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) if (hr_dev->caps.sccc_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.sccc_table); - -err_unmap_idx: - if (hr_dev->caps.num_idx_segs) - hns_roce_cleanup_hem_table(hr_dev, - &hr_dev->mr_table.mtt_idx_table); - -err_unmap_srqwqe: - if (hr_dev->caps.num_srqwqe_segs) - hns_roce_cleanup_hem_table(hr_dev, - &hr_dev->mr_table.mtt_srqwqe_table); - err_unmap_srq: if (hr_dev->caps.srqc_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); @@ -765,14 +707,6 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) err_unmap_dmpt: hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); -err_unmap_mtt: - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - hns_roce_cleanup_hem_table(hr_dev, - &hr_dev->mr_table.mtt_cqe_table); - -err_unmap_cqe: - hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); - return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index c65f1f682819..ecd76759d47a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -66,233 +66,6 @@ int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order, - unsigned long *seg) -{ - int o; - u32 m; - - spin_lock(&buddy->lock); - - for (o = order; o <= buddy->max_order; ++o) { - if (buddy->num_free[o]) { - m = 1 << (buddy->max_order - o); - *seg = find_first_bit(buddy->bits[o], m); - if (*seg < m) - goto found; - } - } - spin_unlock(&buddy->lock); - return -EINVAL; - - found: - clear_bit(*seg, buddy->bits[o]); - --buddy->num_free[o]; - - while (o > order) { - --o; - *seg <<= 1; - set_bit(*seg ^ 1, buddy->bits[o]); - ++buddy->num_free[o]; - } - - spin_unlock(&buddy->lock); - - *seg <<= order; - return 0; -} - -static void hns_roce_buddy_free(struct hns_roce_buddy *buddy, unsigned long seg, - int order) -{ - seg >>= order; - - spin_lock(&buddy->lock); - - while (test_bit(seg ^ 1, buddy->bits[order])) { - clear_bit(seg ^ 1, buddy->bits[order]); - --buddy->num_free[order]; - seg >>= 1; - ++order; - } - - set_bit(seg, buddy->bits[order]); - ++buddy->num_free[order]; - - spin_unlock(&buddy->lock); -} - -static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order) -{ - int i, s; - - buddy->max_order = max_order; - spin_lock_init(&buddy->lock); - buddy->bits = kcalloc(buddy->max_order + 1, - sizeof(*buddy->bits), - GFP_KERNEL); - buddy->num_free = kcalloc(buddy->max_order + 1, - sizeof(*buddy->num_free), - GFP_KERNEL); - if (!buddy->bits || !buddy->num_free) - goto err_out; - - for (i = 0; i <= buddy->max_order; ++i) { - s = BITS_TO_LONGS(1 << (buddy->max_order - i)); - buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL | - __GFP_NOWARN); - if (!buddy->bits[i]) { - buddy->bits[i] = vzalloc(array_size(s, sizeof(long))); - if (!buddy->bits[i]) - goto err_out_free; - } - } - - set_bit(0, buddy->bits[buddy->max_order]); - buddy->num_free[buddy->max_order] = 1; - - return 0; - -err_out_free: - for (i = 0; i <= buddy->max_order; ++i) - kvfree(buddy->bits[i]); - -err_out: - kfree(buddy->bits); - kfree(buddy->num_free); - return -ENOMEM; -} - -static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy) -{ - int i; - - for (i = 0; i <= buddy->max_order; ++i) - kvfree(buddy->bits[i]); - - kfree(buddy->bits); - kfree(buddy->num_free); -} - -static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order, - unsigned long *seg, u32 mtt_type) -{ - struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; - struct hns_roce_hem_table *table; - struct hns_roce_buddy *buddy; - int ret; - - switch (mtt_type) { - case MTT_TYPE_WQE: - buddy = &mr_table->mtt_buddy; - table = &mr_table->mtt_table; - break; - case MTT_TYPE_CQE: - buddy = &mr_table->mtt_cqe_buddy; - table = &mr_table->mtt_cqe_table; - break; - case MTT_TYPE_SRQWQE: - buddy = &mr_table->mtt_srqwqe_buddy; - table = &mr_table->mtt_srqwqe_table; - break; - case MTT_TYPE_IDX: - buddy = &mr_table->mtt_idx_buddy; - table = &mr_table->mtt_idx_table; - break; - default: - dev_err(hr_dev->dev, "Unsupport MTT table type: %d\n", - mtt_type); - return -EINVAL; - } - - ret = hns_roce_buddy_alloc(buddy, order, seg); - if (ret) - return ret; - - ret = hns_roce_table_get_range(hr_dev, table, *seg, - *seg + (1 << order) - 1); - if (ret) { - hns_roce_buddy_free(buddy, *seg, order); - return ret; - } - - return 0; -} - -int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift, - struct hns_roce_mtt *mtt) -{ - int ret; - int i; - - /* Page num is zero, correspond to DMA memory register */ - if (!npages) { - mtt->order = -1; - mtt->page_shift = HNS_ROCE_HEM_PAGE_SHIFT; - return 0; - } - - /* Note: if page_shift is zero, FAST memory register */ - mtt->page_shift = page_shift; - - /* Compute MTT entry necessary */ - for (mtt->order = 0, i = HNS_ROCE_MTT_ENTRY_PER_SEG; i < npages; - i <<= 1) - ++mtt->order; - - /* Allocate MTT entry */ - ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg, - mtt->mtt_type); - if (ret) - return -ENOMEM; - - return 0; -} - -void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt) -{ - struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; - - if (mtt->order < 0) - return; - - switch (mtt->mtt_type) { - case MTT_TYPE_WQE: - hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, - mtt->order); - hns_roce_table_put_range(hr_dev, &mr_table->mtt_table, - mtt->first_seg, - mtt->first_seg + (1 << mtt->order) - 1); - break; - case MTT_TYPE_CQE: - hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg, - mtt->order); - hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table, - mtt->first_seg, - mtt->first_seg + (1 << mtt->order) - 1); - break; - case MTT_TYPE_SRQWQE: - hns_roce_buddy_free(&mr_table->mtt_srqwqe_buddy, mtt->first_seg, - mtt->order); - hns_roce_table_put_range(hr_dev, &mr_table->mtt_srqwqe_table, - mtt->first_seg, - mtt->first_seg + (1 << mtt->order) - 1); - break; - case MTT_TYPE_IDX: - hns_roce_buddy_free(&mr_table->mtt_idx_buddy, mtt->first_seg, - mtt->order); - hns_roce_table_put_range(hr_dev, &mr_table->mtt_idx_table, - mtt->first_seg, - mtt->first_seg + (1 << mtt->order) - 1); - break; - default: - dev_err(hr_dev->dev, - "Unsupport mtt type %d, clean mtt failed\n", - mtt->mtt_type); - break; - } -} - static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, u32 pd, u64 iova, u64 size, u32 access) { @@ -433,131 +206,6 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, return ret; } -static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, u32 start_index, - u32 npages, u64 *page_list) -{ - struct hns_roce_hem_table *table; - dma_addr_t dma_handle; - __le64 *mtts; - u32 bt_page_size; - u32 i; - - switch (mtt->mtt_type) { - case MTT_TYPE_WQE: - table = &hr_dev->mr_table.mtt_table; - bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); - break; - case MTT_TYPE_CQE: - table = &hr_dev->mr_table.mtt_cqe_table; - bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); - break; - case MTT_TYPE_SRQWQE: - table = &hr_dev->mr_table.mtt_srqwqe_table; - bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT); - break; - case MTT_TYPE_IDX: - table = &hr_dev->mr_table.mtt_idx_table; - bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT); - break; - default: - return -EINVAL; - } - - /* All MTTs must fit in the same page */ - if (start_index / (bt_page_size / sizeof(u64)) != - (start_index + npages - 1) / (bt_page_size / sizeof(u64))) - return -EINVAL; - - if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1)) - return -EINVAL; - - mtts = hns_roce_table_find(hr_dev, table, - mtt->first_seg + - start_index / HNS_ROCE_MTT_ENTRY_PER_SEG, - &dma_handle); - if (!mtts) - return -ENOMEM; - - /* Save page addr, low 12 bits : 0 */ - for (i = 0; i < npages; ++i) { - if (!hr_dev->caps.mtt_hop_num) - mtts[i] = cpu_to_le64(page_list[i] >> PAGE_ADDR_SHIFT); - else - mtts[i] = cpu_to_le64(page_list[i]); - } - - return 0; -} - -static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, u32 start_index, - u32 npages, u64 *page_list) -{ - int chunk; - int ret; - u32 bt_page_size; - - if (mtt->order < 0) - return -EINVAL; - - switch (mtt->mtt_type) { - case MTT_TYPE_WQE: - bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); - break; - case MTT_TYPE_CQE: - bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); - break; - case MTT_TYPE_SRQWQE: - bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT); - break; - case MTT_TYPE_IDX: - bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT); - break; - default: - dev_err(hr_dev->dev, - "Unsupport mtt type %d, write mtt failed\n", - mtt->mtt_type); - return -EINVAL; - } - - while (npages > 0) { - chunk = min_t(int, bt_page_size / sizeof(u64), npages); - - ret = hns_roce_write_mtt_chunk(hr_dev, mtt, start_index, chunk, - page_list); - if (ret) - return ret; - - npages -= chunk; - start_index += chunk; - page_list += chunk; - } - - return 0; -} - -int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, struct hns_roce_buf *buf) -{ - u64 *page_list; - int ret; - u32 i; - - page_list = kmalloc_array(buf->npages, sizeof(*page_list), GFP_KERNEL); - if (!page_list) - return -ENOMEM; - - for (i = 0; i < buf->npages; ++i) - page_list[i] = hns_roce_buf_page(buf, i); - - ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list); - - kfree(page_list); - - return ret; -} - int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev) { struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; @@ -567,50 +215,6 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev) hr_dev->caps.num_mtpts, hr_dev->caps.num_mtpts - 1, hr_dev->caps.reserved_mrws, 0); - if (ret) - return ret; - - ret = hns_roce_buddy_init(&mr_table->mtt_buddy, - ilog2(hr_dev->caps.num_mtt_segs)); - if (ret) - goto err_buddy; - - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) { - ret = hns_roce_buddy_init(&mr_table->mtt_cqe_buddy, - ilog2(hr_dev->caps.num_cqe_segs)); - if (ret) - goto err_buddy_cqe; - } - - if (hr_dev->caps.num_srqwqe_segs) { - ret = hns_roce_buddy_init(&mr_table->mtt_srqwqe_buddy, - ilog2(hr_dev->caps.num_srqwqe_segs)); - if (ret) - goto err_buddy_srqwqe; - } - - if (hr_dev->caps.num_idx_segs) { - ret = hns_roce_buddy_init(&mr_table->mtt_idx_buddy, - ilog2(hr_dev->caps.num_idx_segs)); - if (ret) - goto err_buddy_idx; - } - - return 0; - -err_buddy_idx: - if (hr_dev->caps.num_srqwqe_segs) - hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy); - -err_buddy_srqwqe: - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy); - -err_buddy_cqe: - hns_roce_buddy_cleanup(&mr_table->mtt_buddy); - -err_buddy: - hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap); return ret; } @@ -618,13 +222,6 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev) { struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; - if (hr_dev->caps.num_idx_segs) - hns_roce_buddy_cleanup(&mr_table->mtt_idx_buddy); - if (hr_dev->caps.num_srqwqe_segs) - hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy); - hns_roce_buddy_cleanup(&mr_table->mtt_buddy); - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy); hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap); } @@ -661,77 +258,6 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) return ERR_PTR(ret); } -int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, struct ib_umem *umem) -{ - struct device *dev = hr_dev->dev; - struct sg_dma_page_iter sg_iter; - unsigned int order; - int npage = 0; - int ret = 0; - int i; - u64 page_addr; - u64 *pages; - u32 bt_page_size; - u32 n; - - switch (mtt->mtt_type) { - case MTT_TYPE_WQE: - order = hr_dev->caps.mtt_ba_pg_sz; - break; - case MTT_TYPE_CQE: - order = hr_dev->caps.cqe_ba_pg_sz; - break; - case MTT_TYPE_SRQWQE: - order = hr_dev->caps.srqwqe_ba_pg_sz; - break; - case MTT_TYPE_IDX: - order = hr_dev->caps.idx_ba_pg_sz; - break; - default: - dev_err(dev, "Unsupport mtt type %d, write mtt failed\n", - mtt->mtt_type); - return -EINVAL; - } - - bt_page_size = 1 << (order + PAGE_SHIFT); - - pages = (u64 *) __get_free_pages(GFP_KERNEL, order); - if (!pages) - return -ENOMEM; - - i = n = 0; - - for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { - page_addr = sg_page_iter_dma_address(&sg_iter); - if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) { - if (page_addr & ((1 << mtt->page_shift) - 1)) { - dev_err(dev, - "page_addr is not page_shift %d alignment!\n", - mtt->page_shift); - ret = -EINVAL; - goto out; - } - pages[i++] = page_addr; - } - npage++; - if (i == bt_page_size / sizeof(u64)) { - ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages); - if (ret) - goto out; - n += i; - i = 0; - } - } - - if (i) - ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages); - -out: - free_pages((unsigned long) pages, order); - return ret; -} - struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) @@ -1112,20 +638,6 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw) return 0; } -void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift, - int buf_pg_shift) -{ - hns_roce_hem_list_init(&mtr->hem_list); - mtr->hem_cfg.buf_pg_shift = buf_pg_shift; - mtr->hem_cfg.ba_pg_shift = bt_pg_shift; -} - -void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev, - struct hns_roce_mtr *mtr) -{ - hns_roce_hem_list_release(hr_dev, &mtr->hem_list); -} - static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, dma_addr_t *pages, struct hns_roce_buf_region *region) { @@ -1165,39 +677,6 @@ static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, return 0; } -int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t **bufs, struct hns_roce_buf_region *regions, - int region_cnt) -{ - struct hns_roce_buf_region *r; - int ret; - int i; - - ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, regions, - region_cnt, mtr->hem_cfg.ba_pg_shift); - if (ret) - return ret; - - mtr->hem_cfg.root_ba = mtr->hem_list.root_ba; - for (i = 0; i < region_cnt; i++) { - r = ®ions[i]; - ret = mtr_map_region(hr_dev, mtr, bufs[i], r); - if (ret) { - dev_err(hr_dev->dev, - "write mtr[%d/%d] err %d,offset=%d.\n", - i, region_cnt, ret, r->offset); - goto err_write; - } - } - - return 0; - -err_write: - hns_roce_hem_list_release(hr_dev, &mtr->hem_list); - - return ret; -} - static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr) { int i; From 54d6638765b0ede9f3889af47d9d5412bef8f47d Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Tue, 28 Apr 2020 19:03:41 +0800 Subject: [PATCH 231/562] RDMA/hns: Optimize WQE buffer size calculating process Optimize the QP's WQE buffer parameters calculating process to make the codes more readable mainly by merging calculation of extended sge space of kernel and userspace. In addition, add some inline functions to simply codes about multi-hop addressing. Link: https://lore.kernel.org/r/1588071823-40200-4-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 25 ++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 111 +++---- drivers/infiniband/hw/hns/hns_roce_qp.c | 345 ++++++++------------ 3 files changed, 199 insertions(+), 282 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index da3850aeeb14..100224358a49 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1079,6 +1079,8 @@ static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx) return buf->page_list[idx].map; } +#define hr_hw_page_align(x) ALIGN(x, 1 << PAGE_ADDR_SHIFT) + static inline u64 to_hr_hw_page_addr(u64 addr) { return addr >> PAGE_ADDR_SHIFT; @@ -1089,6 +1091,29 @@ static inline u32 to_hr_hw_page_shift(u32 page_shift) return page_shift - PAGE_ADDR_SHIFT; } +static inline u32 to_hr_hem_hopnum(u32 hopnum, u32 count) +{ + if (count > 0) + return hopnum == HNS_ROCE_HOP_NUM_0 ? 0 : hopnum; + + return 0; +} + +static inline u32 to_hr_hem_entries_size(u32 count, u32 buf_shift) +{ + return hr_hw_page_align(count << buf_shift); +} + +static inline u32 to_hr_hem_entries_count(u32 count, u32 buf_shift) +{ + return hr_hw_page_align(count << buf_shift) >> buf_shift; +} + +static inline u32 to_hr_hem_entries_shift(u32 count, u32 buf_shift) +{ + return ilog2(to_hr_hem_entries_count(count, buf_shift)); +} + int hns_roce_init_uar_table(struct hns_roce_dev *dev); int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar); void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index e699932e6926..158b8317f604 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -154,47 +154,24 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, unsigned int *sge_ind, int valid_num_sge) { struct hns_roce_v2_wqe_data_seg *dseg; - struct ib_sge *sg; - int num_in_wqe = 0; - int extend_sge_num; - int fi_sge_num; - int se_sge_num; - int shift; - int i; + struct ib_sge *sge = wr->sg_list; + unsigned int idx = *sge_ind; + int cnt = valid_num_sge; - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) - num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; - extend_sge_num = valid_num_sge - num_in_wqe; - sg = wr->sg_list + num_in_wqe; - shift = qp->mtr.hem_cfg.buf_pg_shift; - - /* - * Check whether wr->num_sge sges are in the same page. If not, we - * should calculate how many sges in the first page and the second - * page. - */ - dseg = hns_roce_get_extend_sge(qp, (*sge_ind) & (qp->sge.sge_cnt - 1)); - fi_sge_num = (round_up((uintptr_t)dseg, 1 << shift) - - (uintptr_t)dseg) / - sizeof(struct hns_roce_v2_wqe_data_seg); - if (extend_sge_num > fi_sge_num) { - se_sge_num = extend_sge_num - fi_sge_num; - for (i = 0; i < fi_sge_num; i++) { - set_data_seg_v2(dseg++, sg + i); - (*sge_ind)++; - } - dseg = hns_roce_get_extend_sge(qp, - (*sge_ind) & (qp->sge.sge_cnt - 1)); - for (i = 0; i < se_sge_num; i++) { - set_data_seg_v2(dseg++, sg + fi_sge_num + i); - (*sge_ind)++; - } - } else { - for (i = 0; i < extend_sge_num; i++) { - set_data_seg_v2(dseg++, sg + i); - (*sge_ind)++; - } + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + cnt -= HNS_ROCE_SGE_IN_WQE; + sge += HNS_ROCE_SGE_IN_WQE; } + + while (cnt > 0) { + dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1)); + set_data_seg_v2(dseg, sge); + idx++; + sge++; + cnt--; + } + + *sge_ind = idx; } static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, @@ -232,7 +209,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, 1); } else { - if (valid_num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) { + if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) { for (i = 0; i < wr->num_sge; i++) { if (likely(wr->sg_list[i].length)) { set_data_seg_v2(dseg, wr->sg_list + i); @@ -245,8 +222,8 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, (*sge_ind) & (qp->sge.sge_cnt - 1)); - for (i = 0; i < wr->num_sge && - j < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) { + for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE; + i++) { if (likely(wr->sg_list[i].length)) { set_data_seg_v2(dseg, wr->sg_list + i); dseg++; @@ -675,7 +652,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, } /* rq support inline data */ - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { + if (hr_qp->rq_inl_buf.wqe_cnt) { sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list; hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = (u32)wr->num_sge; @@ -3491,29 +3468,18 @@ static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { - if (hr_qp->ibqp.qp_type == IB_QPT_GSI) - roce_set_field(context->byte_4_sqpn_tst, - V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, - ilog2((unsigned int)hr_qp->sge.sge_cnt)); - else - roce_set_field(context->byte_4_sqpn_tst, - V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, - hr_qp->sq.max_gs > - HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ? - ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0); + roce_set_field(context->byte_4_sqpn_tst, + V2_QPC_BYTE_4_SGE_SHIFT_M, V2_QPC_BYTE_4_SGE_SHIFT_S, + to_hr_hem_entries_shift(hr_qp->sge.sge_cnt, + hr_qp->sge.sge_shift)); roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, - ilog2((unsigned int)hr_qp->sq.wqe_cnt)); + ilog2(hr_qp->sq.wqe_cnt)); roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, - (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI || - hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || - hr_qp->ibqp.srq) ? 0 : - ilog2((unsigned int)hr_qp->rq.wqe_cnt)); + ilog2(hr_qp->rq.wqe_cnt)); } static void modify_qp_reset_to_init(struct ib_qp *ibqp, @@ -3781,17 +3747,16 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M, V2_QPC_BYTE_12_SQ_HOP_NUM_S, - hr_dev->caps.wqe_sq_hop_num == HNS_ROCE_HOP_NUM_0 ? - 0 : hr_dev->caps.wqe_sq_hop_num); + to_hr_hem_hopnum(hr_dev->caps.wqe_sq_hop_num, + hr_qp->sq.wqe_cnt)); roce_set_field(qpc_mask->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M, V2_QPC_BYTE_12_SQ_HOP_NUM_S, 0); roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_SGE_HOP_NUM_M, V2_QPC_BYTE_20_SGE_HOP_NUM_S, - ((ibqp->qp_type == IB_QPT_GSI) || - hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? - hr_dev->caps.wqe_sge_hop_num : 0); + to_hr_hem_hopnum(hr_dev->caps.wqe_sge_hop_num, + hr_qp->sge.sge_cnt)); roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_SGE_HOP_NUM_M, V2_QPC_BYTE_20_SGE_HOP_NUM_S, 0); @@ -3799,8 +3764,9 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_HOP_NUM_M, V2_QPC_BYTE_20_RQ_HOP_NUM_S, - hr_dev->caps.wqe_rq_hop_num == HNS_ROCE_HOP_NUM_0 ? - 0 : hr_dev->caps.wqe_rq_hop_num); + to_hr_hem_hopnum(hr_dev->caps.wqe_rq_hop_num, + hr_qp->rq.wqe_cnt)); + roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_HOP_NUM_M, V2_QPC_BYTE_20_RQ_HOP_NUM_S, 0); @@ -3977,7 +3943,7 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, return -EINVAL; } - if (hr_qp->sge.offset) { + if (hr_qp->sge.sge_cnt > 0) { page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift; count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->sge.offset / page_size, @@ -4011,15 +3977,12 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, 0); - context->sq_cur_sge_blk_addr = ((ibqp->qp_type == IB_QPT_GSI) || - hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? - cpu_to_le32(to_hr_hw_page_addr(sge_cur_blk)) : 0; + context->sq_cur_sge_blk_addr = + cpu_to_le32(to_hr_hw_page_addr(sge_cur_blk)); roce_set_field(context->byte_184_irrl_idx, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, - ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > - HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? - upper_32_bits(to_hr_hw_page_addr(sge_cur_blk)) : 0); + upper_32_bits(to_hr_hw_page_addr(sge_cur_blk))); qpc_mask->sq_cur_sge_blk_addr = 0; roce_set_field(qpc_mask->byte_184_irrl_idx, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M, diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index d05d3cb7de39..b5707596148d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -355,16 +355,16 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hns_roce_bitmap_free_range(&qp_table->bitmap, hr_qp->qpn, 1, BITMAP_RR); } -static int set_rq_size(struct hns_roce_dev *hr_dev, - struct ib_qp_cap *cap, bool is_user, int has_rq, - struct hns_roce_qp *hr_qp) +static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, + struct hns_roce_qp *hr_qp, int has_rq) { - u32 max_cnt; + u32 cnt; /* If srq exist, set zero for relative number of rq */ if (!has_rq) { hr_qp->rq.wqe_cnt = 0; hr_qp->rq.max_gs = 0; + hr_qp->rq_inl_buf.wqe_cnt = 0; cap->max_recv_wr = 0; cap->max_recv_sge = 0; @@ -379,17 +379,14 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, return -EINVAL; } - max_cnt = max(cap->max_recv_wr, hr_dev->caps.min_wqes); - - hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt); - if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) { + cnt = roundup_pow_of_two(max(cap->max_recv_wr, hr_dev->caps.min_wqes)); + if (cnt > hr_dev->caps.max_wqes) { ibdev_err(&hr_dev->ib_dev, "rq depth %u too large\n", cap->max_recv_wr); return -EINVAL; } - max_cnt = max(1U, cap->max_recv_sge); - hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt); + hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE) hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); @@ -397,12 +394,61 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz * hr_qp->rq.max_gs); - cap->max_recv_wr = hr_qp->rq.wqe_cnt; + hr_qp->rq.wqe_cnt = cnt; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) + hr_qp->rq_inl_buf.wqe_cnt = cnt; + else + hr_qp->rq_inl_buf.wqe_cnt = 0; + + cap->max_recv_wr = cnt; cap->max_recv_sge = hr_qp->rq.max_gs; return 0; } +static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, + struct hns_roce_qp *hr_qp, + struct ib_qp_cap *cap) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + u32 cnt; + + cnt = max(1U, cap->max_send_sge); + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { + hr_qp->sq.max_gs = roundup_pow_of_two(cnt); + hr_qp->sge.sge_cnt = 0; + + return 0; + } + + hr_qp->sq.max_gs = cnt; + + /* UD sqwqe's sge use extend sge */ + if (hr_qp->ibqp.qp_type == IB_QPT_GSI || + hr_qp->ibqp.qp_type == IB_QPT_UD) { + cnt = roundup_pow_of_two(sq_wqe_cnt * hr_qp->sq.max_gs); + } else if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) { + cnt = roundup_pow_of_two(sq_wqe_cnt * + (hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE)); + + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) { + if (cnt > hr_dev->caps.max_extend_sg) { + ibdev_err(ibdev, + "failed to check exSGE num, exSGE num = %d.\n", + cnt); + return -EINVAL; + } + } + } else { + cnt = 0; + } + + hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; + hr_qp->sge.sge_cnt = cnt; + + return 0; +} + static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, struct hns_roce_ib_create_qp *ucmd) @@ -430,82 +476,27 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp, struct hns_roce_ib_create_qp *ucmd) { - u32 ex_sge_num; - u32 page_size; - u32 max_cnt; + struct ib_device *ibdev = &hr_dev->ib_dev; + u32 cnt = 0; int ret; - if (check_shl_overflow(1, ucmd->log_sq_bb_count, &hr_qp->sq.wqe_cnt) || - hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) + if (check_shl_overflow(1, ucmd->log_sq_bb_count, &cnt) || + cnt > hr_dev->caps.max_wqes) return -EINVAL; ret = check_sq_size_with_integrity(hr_dev, cap, ucmd); if (ret) { - ibdev_err(&hr_dev->ib_dev, "Failed to check user SQ size limit\n"); + ibdev_err(ibdev, "failed to check user SQ size, ret = %d.\n", + ret); return ret; } + ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap); + if (ret) + return ret; + hr_qp->sq.wqe_shift = ucmd->log_sq_stride; - - max_cnt = max(1U, cap->max_send_sge); - if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) - hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt); - else - hr_qp->sq.max_gs = max_cnt; - - if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) - hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * - (hr_qp->sq.max_gs - 2)); - - if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE && - hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) { - if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) { - ibdev_err(&hr_dev->ib_dev, - "Failed to check extended SGE size limit %d\n", - hr_qp->sge.sge_cnt); - return -EINVAL; - } - } - - hr_qp->sge.sge_shift = 4; - ex_sge_num = hr_qp->sge.sge_cnt; - - /* Get buf size, SQ and RQ are aligned to page_szie */ - if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { - hr_qp->buff_size = round_up((hr_qp->rq.wqe_cnt << - hr_qp->rq.wqe_shift), PAGE_SIZE) + - round_up((hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), PAGE_SIZE); - - hr_qp->sq.offset = 0; - hr_qp->rq.offset = round_up((hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), PAGE_SIZE); - } else { - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); - hr_qp->sge.sge_cnt = ex_sge_num ? - max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num) : 0; - hr_qp->buff_size = round_up((hr_qp->rq.wqe_cnt << - hr_qp->rq.wqe_shift), page_size) + - round_up((hr_qp->sge.sge_cnt << - hr_qp->sge.sge_shift), page_size) + - round_up((hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), page_size); - - hr_qp->sq.offset = 0; - if (ex_sge_num) { - hr_qp->sge.offset = round_up((hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), - page_size); - hr_qp->rq.offset = hr_qp->sge.offset + - round_up((hr_qp->sge.sge_cnt << - hr_qp->sge.sge_shift), - page_size); - } else { - hr_qp->rq.offset = round_up((hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), - page_size); - } - } + hr_qp->sq.wqe_cnt = cnt; return 0; } @@ -514,147 +505,86 @@ static int split_wqe_buf_region(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_buf_attr *buf_attr) { - bool is_extend_sge; int buf_size; int idx = 0; + hr_qp->buff_size = 0; + + /* SQ WQE */ + hr_qp->sq.offset = 0; + buf_size = to_hr_hem_entries_size(hr_qp->sq.wqe_cnt, + hr_qp->sq.wqe_shift); + if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { + buf_attr->region[idx].size = buf_size; + buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sq_hop_num; + idx++; + hr_qp->buff_size += buf_size; + } + + /* extend SGE WQE in SQ */ + hr_qp->sge.offset = hr_qp->buff_size; + buf_size = to_hr_hem_entries_size(hr_qp->sge.sge_cnt, + hr_qp->sge.sge_shift); + if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { + buf_attr->region[idx].size = buf_size; + buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sge_hop_num; + idx++; + hr_qp->buff_size += buf_size; + } + + /* RQ WQE */ + hr_qp->rq.offset = hr_qp->buff_size; + buf_size = to_hr_hem_entries_size(hr_qp->rq.wqe_cnt, + hr_qp->rq.wqe_shift); + if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { + buf_attr->region[idx].size = buf_size; + buf_attr->region[idx].hopnum = hr_dev->caps.wqe_rq_hop_num; + idx++; + hr_qp->buff_size += buf_size; + } + if (hr_qp->buff_size < 1) return -EINVAL; buf_attr->page_shift = PAGE_ADDR_SHIFT + hr_dev->caps.mtt_buf_pg_sz; buf_attr->fixed_page = true; - buf_attr->region_count = 0; - - if (hr_qp->sge.sge_cnt > 0) - is_extend_sge = true; - else - is_extend_sge = false; - - /* SQ WQE */ - if (is_extend_sge) - buf_size = hr_qp->sge.offset - hr_qp->sq.offset; - else - buf_size = hr_qp->rq.offset - hr_qp->sq.offset; - - if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { - buf_attr->region[idx].size = buf_size; - buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sq_hop_num; - idx++; - } - - /* extend SGE in SQ WQE */ - buf_size = hr_qp->rq.offset - hr_qp->sge.offset; - if (buf_size > 0 && is_extend_sge && - idx < ARRAY_SIZE(buf_attr->region)) { - buf_attr->region[idx].size = buf_size; - buf_attr->region[idx].hopnum = - hr_dev->caps.wqe_sge_hop_num; - idx++; - } - - /* RQ WQE */ - buf_size = hr_qp->buff_size - hr_qp->rq.offset; - if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { - buf_attr->region[idx].size = buf_size; - buf_attr->region[idx].hopnum = hr_dev->caps.wqe_rq_hop_num; - idx++; - } - buf_attr->region_count = idx; return 0; } -static int set_extend_sge_param(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp) -{ - struct device *dev = hr_dev->dev; - - if (hr_qp->sq.max_gs > 2) { - hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * - (hr_qp->sq.max_gs - 2)); - hr_qp->sge.sge_shift = 4; - } - - /* ud sqwqe's sge use extend sge */ - if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 && - hr_qp->ibqp.qp_type == IB_QPT_GSI) { - hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * - hr_qp->sq.max_gs); - hr_qp->sge.sge_shift = 4; - } - - if (hr_qp->sq.max_gs > 2 && - hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) { - if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) { - dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n", - hr_qp->sge.sge_cnt); - return -EINVAL; - } - } - - return 0; -} - static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp) { - u32 page_size; - u32 max_cnt; - int size; + struct ib_device *ibdev = &hr_dev->ib_dev; + u32 cnt; int ret; if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes || cap->max_send_sge > hr_dev->caps.max_sq_sg || cap->max_inline_data > hr_dev->caps.max_sq_inline) { - ibdev_err(&hr_dev->ib_dev, - "SQ WR or sge or inline data error!\n"); + ibdev_err(ibdev, + "failed to check SQ WR, SGE or inline num, ret = %d.\n", + -EINVAL); + return -EINVAL; + } + + cnt = roundup_pow_of_two(max(cap->max_send_wr, hr_dev->caps.min_wqes)); + if (cnt > hr_dev->caps.max_wqes) { + ibdev_err(ibdev, "failed to check WQE num, WQE num = %d.\n", + cnt); return -EINVAL; } hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); + hr_qp->sq.wqe_cnt = cnt; - max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes); - - hr_qp->sq.wqe_cnt = roundup_pow_of_two(max_cnt); - if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) { - ibdev_err(&hr_dev->ib_dev, - "while setting kernel sq size, sq.wqe_cnt too large\n"); - return -EINVAL; - } - - /* Get data_seg numbers */ - max_cnt = max(1U, cap->max_send_sge); - if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) - hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt); - else - hr_qp->sq.max_gs = max_cnt; - - ret = set_extend_sge_param(hr_dev, hr_qp); - if (ret) { - ibdev_err(&hr_dev->ib_dev, "set extend sge parameters fail\n"); + ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap); + if (ret) return ret; - } - /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */ - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); - hr_qp->sq.offset = 0; - size = round_up(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, page_size); - - if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 && hr_qp->sge.sge_cnt) { - hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift), - (u32)hr_qp->sge.sge_cnt); - hr_qp->sge.offset = size; - size += round_up(hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift, - page_size); - } - - hr_qp->rq.offset = size; - size += round_up((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size); - hr_qp->buff_size = size; - - /* Get wr and sge number which send */ - cap->max_send_wr = hr_qp->sq.wqe_cnt; + /* sync the parameters of kernel QP to user's configuration */ + cap->max_send_wr = cnt; cap->max_send_sge = hr_qp->sq.max_gs; /* We don't support inline sends for kernel QPs (yet) */ @@ -685,8 +615,8 @@ static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp, struct ib_qp_init_attr *init_attr) { u32 max_recv_sge = init_attr->cap.max_recv_sge; + u32 wqe_cnt = hr_qp->rq_inl_buf.wqe_cnt; struct hns_roce_rinl_wqe *wqe_list; - u32 wqe_cnt = hr_qp->rq.wqe_cnt; int i; /* allocate recv inline buf */ @@ -708,7 +638,6 @@ static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp, wqe_list[i].sg_list = &wqe_list[0].sg_list[i * max_recv_sge]; hr_qp->rq_inl_buf.wqe_list = wqe_list; - hr_qp->rq_inl_buf.wqe_cnt = wqe_cnt; return 0; @@ -721,7 +650,8 @@ static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp, static void free_rq_inline_buf(struct hns_roce_qp *hr_qp) { - kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list); + if (hr_qp->rq_inl_buf.wqe_list) + kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list); kfree(hr_qp->rq_inl_buf.wqe_list); } @@ -731,36 +661,36 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_attr buf_attr = {}; - bool is_rq_buf_inline; int ret; - is_rq_buf_inline = (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && - hns_roce_qp_has_rq(init_attr); - if (is_rq_buf_inline) { + if (!udata && hr_qp->rq_inl_buf.wqe_cnt) { ret = alloc_rq_inline_buf(hr_qp, init_attr); if (ret) { - ibdev_err(ibdev, "Failed to alloc inline RQ buffer\n"); + ibdev_err(ibdev, + "failed to alloc inline buf, ret = %d.\n", + ret); return ret; } + } else { + hr_qp->rq_inl_buf.wqe_list = NULL; } ret = split_wqe_buf_region(hr_dev, hr_qp, &buf_attr); if (ret) { - ibdev_err(ibdev, "Failed to split WQE buf, ret %d\n", ret); + ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret); goto err_inline; } ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr, PAGE_ADDR_SHIFT + hr_dev->caps.mtt_ba_pg_sz, udata, addr); if (ret) { - ibdev_err(ibdev, "Failed to create WQE mtr, ret %d\n", ret); + ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret); goto err_inline; } return 0; err_inline: - if (is_rq_buf_inline) - free_rq_inline_buf(hr_qp); + free_rq_inline_buf(hr_qp); return ret; } @@ -768,9 +698,7 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr); - if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && - hr_qp->rq.wqe_cnt) - free_rq_inline_buf(hr_qp); + free_rq_inline_buf(hr_qp); } static inline bool user_qp_has_sdb(struct hns_roce_dev *hr_dev, @@ -935,10 +863,11 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, else hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR; - ret = set_rq_size(hr_dev, &init_attr->cap, udata, - hns_roce_qp_has_rq(init_attr), hr_qp); + ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp, + hns_roce_qp_has_rq(init_attr)); if (ret) { - ibdev_err(ibdev, "Failed to set user RQ size\n"); + ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n", + ret); return ret; } From ffb1308b88b6023205249f11332ebff83610899a Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Tue, 28 Apr 2020 19:03:42 +0800 Subject: [PATCH 232/562] RDMA/hns: Move SRQ code to the reasonable place Just move the SRQ related code to more reasonable place, and unify format of some prints. Link: https://lore.kernel.org/r/1588071823-40200-5-git-send-email-liweihang@huawei.com Signed-off-by: Yixian Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 609 ++++++++++----------- 1 file changed, 303 insertions(+), 306 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 158b8317f604..0b79dafe919e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -694,6 +694,129 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, return ret; } +static void *get_srq_wqe(struct hns_roce_srq *srq, int n) +{ + return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift); +} + +static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) +{ + /* always called with interrupts disabled. */ + spin_lock(&srq->lock); + + bitmap_clear(srq->idx_que.bitmap, wqe_index, 1); + srq->tail++; + + spin_unlock(&srq->lock); +} + +static int find_empty_entry(struct hns_roce_idx_que *idx_que, + unsigned long size) +{ + int wqe_idx; + + if (unlikely(bitmap_full(idx_que->bitmap, size))) + return -ENOSPC; + + wqe_idx = find_first_zero_bit(idx_que->bitmap, size); + + bitmap_set(idx_que->bitmap, wqe_idx, 1); + + return wqe_idx; +} + +static void fill_idx_queue(struct hns_roce_idx_que *idx_que, + int cur_idx, int wqe_idx) +{ + unsigned int *addr; + + addr = (unsigned int *)hns_roce_buf_offset(idx_que->mtr.kmem, + cur_idx * idx_que->entry_sz); + *addr = wqe_idx; +} + +static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, + const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + struct hns_roce_v2_wqe_data_seg *dseg; + struct hns_roce_v2_db srq_db; + unsigned long flags; + int ret = 0; + int wqe_idx; + void *wqe; + int nreq; + int ind; + int i; + + spin_lock_irqsave(&srq->lock, flags); + + ind = srq->head & (srq->wqe_cnt - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (unlikely(wr->num_sge >= srq->max_gs)) { + ret = -EINVAL; + *bad_wr = wr; + break; + } + + if (unlikely(srq->head == srq->tail)) { + ret = -ENOMEM; + *bad_wr = wr; + break; + } + + wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt); + if (wqe_idx < 0) { + ret = -ENOMEM; + *bad_wr = wr; + break; + } + + fill_idx_queue(&srq->idx_que, ind, wqe_idx); + wqe = get_srq_wqe(srq, wqe_idx); + dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; + + for (i = 0; i < wr->num_sge; ++i) { + dseg[i].len = cpu_to_le32(wr->sg_list[i].length); + dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey); + dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr); + } + + if (i < srq->max_gs) { + dseg[i].len = 0; + dseg[i].lkey = cpu_to_le32(0x100); + dseg[i].addr = 0; + } + + srq->wrid[wqe_idx] = wr->wr_id; + ind = (ind + 1) & (srq->wqe_cnt - 1); + } + + if (likely(nreq)) { + srq->head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + srq_db.byte_4 = + cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S | + (srq->srqn & V2_DB_BYTE_4_TAG_M)); + srq_db.parameter = cpu_to_le32(srq->head); + + hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l); + } + + spin_unlock_irqrestore(&srq->lock, flags); + + return ret; +} + static int hns_roce_v2_cmd_hw_reseted(struct hns_roce_dev *hr_dev, unsigned long instance_stage, unsigned long reset_stage) @@ -2667,22 +2790,6 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq) return get_sw_cqe_v2(hr_cq, hr_cq->cons_index); } -static void *get_srq_wqe(struct hns_roce_srq *srq, int n) -{ - return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift); -} - -static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) -{ - /* always called with interrupts disabled. */ - spin_lock(&srq->lock); - - bitmap_clear(srq->idx_que.bitmap, wqe_index, 1); - srq->tail++; - - spin_unlock(&srq->lock); -} - static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) { *hr_cq->set_ci_db = cons_index & V2_CQ_DB_PARAMETER_CONS_IDX_M; @@ -4777,6 +4884,186 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, return ret; } +static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq, u32 pdn, u16 xrcd, + u32 cqn, void *mb_buf, u64 *mtts_wqe, + u64 *mtts_idx, dma_addr_t dma_handle_wqe, + dma_addr_t dma_handle_idx) +{ + struct hns_roce_srq_context *srq_context; + + srq_context = mb_buf; + memset(srq_context, 0, sizeof(*srq_context)); + + roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M, + SRQC_BYTE_4_SRQ_ST_S, 1); + + roce_set_field(srq_context->byte_4_srqn_srqst, + SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M, + SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S, + (hr_dev->caps.srqwqe_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : + hr_dev->caps.srqwqe_hop_num)); + roce_set_field(srq_context->byte_4_srqn_srqst, + SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S, + ilog2(srq->wqe_cnt)); + + roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M, + SRQC_BYTE_4_SRQN_S, srq->srqn); + + roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); + + roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M, + SRQC_BYTE_12_SRQ_XRCD_S, xrcd); + + srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3)); + + roce_set_field(srq_context->byte_24_wqe_bt_ba, + SRQC_BYTE_24_SRQ_WQE_BT_BA_M, + SRQC_BYTE_24_SRQ_WQE_BT_BA_S, + dma_handle_wqe >> 35); + + roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M, + SRQC_BYTE_28_PD_S, pdn); + roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M, + SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 : + fls(srq->max_gs - 1)); + + srq_context->idx_bt_ba = cpu_to_le32(dma_handle_idx >> 3); + roce_set_field(srq_context->rsv_idx_bt_ba, + SRQC_BYTE_36_SRQ_IDX_BT_BA_M, + SRQC_BYTE_36_SRQ_IDX_BT_BA_S, + dma_handle_idx >> 35); + + srq_context->idx_cur_blk_addr = + cpu_to_le32(to_hr_hw_page_addr(mtts_idx[0])); + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M, + SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S, + upper_32_bits(to_hr_hw_page_addr(mtts_idx[0]))); + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M, + SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S, + hr_dev->caps.idx_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : + hr_dev->caps.idx_hop_num); + + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, + SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, + to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.ba_pg_shift)); + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, + SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, + to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.buf_pg_shift)); + + srq_context->idx_nxt_blk_addr = + cpu_to_le32(to_hr_hw_page_addr(mtts_idx[1])); + roce_set_field(srq_context->rsv_idxnxtblkaddr, + SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M, + SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S, + upper_32_bits(to_hr_hw_page_addr(mtts_idx[1]))); + roce_set_field(srq_context->byte_56_xrc_cqn, + SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S, + cqn); + roce_set_field(srq_context->byte_56_xrc_cqn, + SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M, + SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S, + to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift)); + roce_set_field(srq_context->byte_56_xrc_cqn, + SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M, + SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S, + to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift)); + + roce_set_bit(srq_context->db_record_addr_record_en, + SRQC_BYTE_60_SRQ_RECORD_EN_S, 0); +} + +static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, + struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + struct hns_roce_srq_context *srq_context; + struct hns_roce_srq_context *srqc_mask; + struct hns_roce_cmd_mailbox *mailbox; + int ret; + + if (srq_attr_mask & IB_SRQ_LIMIT) { + if (srq_attr->srq_limit >= srq->wqe_cnt) + return -EINVAL; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + srq_context = mailbox->buf; + srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1; + + memset(srqc_mask, 0xff, sizeof(*srqc_mask)); + + roce_set_field(srq_context->byte_8_limit_wl, + SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S, srq_attr->srq_limit); + roce_set_field(srqc_mask->byte_8_limit_wl, + SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); + + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, srq->srqn, 0, + HNS_ROCE_CMD_MODIFY_SRQC, + HNS_ROCE_CMD_TIMEOUT_MSECS); + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + if (ret) { + ibdev_err(&hr_dev->ib_dev, + "failed to handle cmd of modifying SRQ, ret = %d.\n", + ret); + return ret; + } + } + + return 0; +} + +static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + struct hns_roce_srq_context *srq_context; + struct hns_roce_cmd_mailbox *mailbox; + int limit_wl; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + srq_context = mailbox->buf; + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, srq->srqn, 0, + HNS_ROCE_CMD_QUERY_SRQC, + HNS_ROCE_CMD_TIMEOUT_MSECS); + if (ret) { + ibdev_err(&hr_dev->ib_dev, + "failed to process cmd of querying SRQ, ret = %d.\n", + ret); + goto out; + } + + limit_wl = roce_get_field(srq_context->byte_8_limit_wl, + SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S); + + attr->srq_limit = limit_wl; + attr->max_wr = srq->wqe_cnt; + attr->max_sge = srq->max_gs; + + memcpy(srq_context, mailbox->buf, sizeof(*srq_context)); + +out: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + return ret; +} + static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) { struct hns_roce_dev *hr_dev = to_hr_dev(cq->device); @@ -5588,296 +5875,6 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev) destroy_workqueue(hr_dev->irq_workq); } -static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq, u32 pdn, u16 xrcd, - u32 cqn, void *mb_buf, u64 *mtts_wqe, - u64 *mtts_idx, dma_addr_t dma_handle_wqe, - dma_addr_t dma_handle_idx) -{ - struct hns_roce_srq_context *srq_context; - - srq_context = mb_buf; - memset(srq_context, 0, sizeof(*srq_context)); - - roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M, - SRQC_BYTE_4_SRQ_ST_S, 1); - - roce_set_field(srq_context->byte_4_srqn_srqst, - SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M, - SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S, - (hr_dev->caps.srqwqe_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : - hr_dev->caps.srqwqe_hop_num)); - roce_set_field(srq_context->byte_4_srqn_srqst, - SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S, - ilog2(srq->wqe_cnt)); - - roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M, - SRQC_BYTE_4_SRQN_S, srq->srqn); - - roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M, - SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); - - roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M, - SRQC_BYTE_12_SRQ_XRCD_S, xrcd); - - srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3)); - - roce_set_field(srq_context->byte_24_wqe_bt_ba, - SRQC_BYTE_24_SRQ_WQE_BT_BA_M, - SRQC_BYTE_24_SRQ_WQE_BT_BA_S, - dma_handle_wqe >> 35); - - roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M, - SRQC_BYTE_28_PD_S, pdn); - roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M, - SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 : - fls(srq->max_gs - 1)); - - srq_context->idx_bt_ba = cpu_to_le32(dma_handle_idx >> 3); - roce_set_field(srq_context->rsv_idx_bt_ba, - SRQC_BYTE_36_SRQ_IDX_BT_BA_M, - SRQC_BYTE_36_SRQ_IDX_BT_BA_S, - dma_handle_idx >> 35); - - srq_context->idx_cur_blk_addr = - cpu_to_le32(to_hr_hw_page_addr(mtts_idx[0])); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M, - SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S, - upper_32_bits(to_hr_hw_page_addr(mtts_idx[0]))); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M, - SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S, - hr_dev->caps.idx_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : - hr_dev->caps.idx_hop_num); - - roce_set_field( - srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, - SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, - to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.ba_pg_shift)); - roce_set_field( - srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, - SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, - to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.buf_pg_shift)); - - srq_context->idx_nxt_blk_addr = - cpu_to_le32(to_hr_hw_page_addr(mtts_idx[1])); - roce_set_field(srq_context->rsv_idxnxtblkaddr, - SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M, - SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S, - upper_32_bits(to_hr_hw_page_addr(mtts_idx[1]))); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S, - cqn); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M, - SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S, - to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift)); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M, - SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S, - to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift)); - - roce_set_bit(srq_context->db_record_addr_record_en, - SRQC_BYTE_60_SRQ_RECORD_EN_S, 0); -} - -static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, - struct ib_srq_attr *srq_attr, - enum ib_srq_attr_mask srq_attr_mask, - struct ib_udata *udata) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); - struct hns_roce_srq *srq = to_hr_srq(ibsrq); - struct hns_roce_srq_context *srq_context; - struct hns_roce_srq_context *srqc_mask; - struct hns_roce_cmd_mailbox *mailbox; - int ret; - - if (srq_attr_mask & IB_SRQ_LIMIT) { - if (srq_attr->srq_limit >= srq->wqe_cnt) - return -EINVAL; - - mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); - - srq_context = mailbox->buf; - srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1; - - memset(srqc_mask, 0xff, sizeof(*srqc_mask)); - - roce_set_field(srq_context->byte_8_limit_wl, - SRQC_BYTE_8_SRQ_LIMIT_WL_M, - SRQC_BYTE_8_SRQ_LIMIT_WL_S, srq_attr->srq_limit); - roce_set_field(srqc_mask->byte_8_limit_wl, - SRQC_BYTE_8_SRQ_LIMIT_WL_M, - SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); - - ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, srq->srqn, 0, - HNS_ROCE_CMD_MODIFY_SRQC, - HNS_ROCE_CMD_TIMEOUT_MSECS); - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - if (ret) { - ibdev_err(&hr_dev->ib_dev, - "failed to process cmd when modifying SRQ, ret = %d\n", - ret); - return ret; - } - } - - return 0; -} - -static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); - struct hns_roce_srq *srq = to_hr_srq(ibsrq); - struct hns_roce_srq_context *srq_context; - struct hns_roce_cmd_mailbox *mailbox; - int limit_wl; - int ret; - - mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); - - srq_context = mailbox->buf; - ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, srq->srqn, 0, - HNS_ROCE_CMD_QUERY_SRQC, - HNS_ROCE_CMD_TIMEOUT_MSECS); - if (ret) { - ibdev_err(&hr_dev->ib_dev, - "failed to process cmd when querying SRQ, ret = %d\n", - ret); - goto out; - } - - limit_wl = roce_get_field(srq_context->byte_8_limit_wl, - SRQC_BYTE_8_SRQ_LIMIT_WL_M, - SRQC_BYTE_8_SRQ_LIMIT_WL_S); - - attr->srq_limit = limit_wl; - attr->max_wr = srq->wqe_cnt - 1; - attr->max_sge = srq->max_gs; - - memcpy(srq_context, mailbox->buf, sizeof(*srq_context)); - -out: - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - return ret; -} - -static int find_empty_entry(struct hns_roce_idx_que *idx_que, - unsigned long size) -{ - int wqe_idx; - - if (unlikely(bitmap_full(idx_que->bitmap, size))) - return -ENOSPC; - - wqe_idx = find_first_zero_bit(idx_que->bitmap, size); - - bitmap_set(idx_que->bitmap, wqe_idx, 1); - - return wqe_idx; -} - -static void fill_idx_queue(struct hns_roce_idx_que *idx_que, - int cur_idx, int wqe_idx) -{ - unsigned int *addr; - - addr = (unsigned int *)hns_roce_buf_offset(idx_que->mtr.kmem, - cur_idx * idx_que->entry_sz); - *addr = wqe_idx; -} - -static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, - const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); - struct hns_roce_srq *srq = to_hr_srq(ibsrq); - struct hns_roce_v2_wqe_data_seg *dseg; - struct hns_roce_v2_db srq_db; - unsigned long flags; - int ret = 0; - int wqe_idx; - void *wqe; - int nreq; - int ind; - int i; - - spin_lock_irqsave(&srq->lock, flags); - - ind = srq->head & (srq->wqe_cnt - 1); - - for (nreq = 0; wr; ++nreq, wr = wr->next) { - if (unlikely(wr->num_sge > srq->max_gs)) { - ret = -EINVAL; - *bad_wr = wr; - break; - } - - if (unlikely(srq->head == srq->tail)) { - ret = -ENOMEM; - *bad_wr = wr; - break; - } - - wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt); - if (wqe_idx < 0) { - ret = -ENOMEM; - *bad_wr = wr; - break; - } - - fill_idx_queue(&srq->idx_que, ind, wqe_idx); - wqe = get_srq_wqe(srq, wqe_idx); - dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; - - for (i = 0; i < wr->num_sge; ++i) { - dseg[i].len = cpu_to_le32(wr->sg_list[i].length); - dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey); - dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr); - } - - if (i < srq->max_gs) { - dseg[i].len = 0; - dseg[i].lkey = cpu_to_le32(0x100); - dseg[i].addr = 0; - } - - srq->wrid[wqe_idx] = wr->wr_id; - ind = (ind + 1) & (srq->wqe_cnt - 1); - } - - if (likely(nreq)) { - srq->head += nreq; - - /* - * Make sure that descriptors are written before - * doorbell record. - */ - wmb(); - - srq_db.byte_4 = - cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S | - (srq->srqn & V2_DB_BYTE_4_TAG_M)); - srq_db.parameter = cpu_to_le32(srq->head); - - hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l); - - } - - spin_unlock_irqrestore(&srq->lock, flags); - - return ret; -} - static const struct hns_roce_dfx_hw hns_roce_dfx_hw_v2 = { .query_cqc_info = hns_roce_v2_query_cqc_info, }; From 67954a6e379b4678536ef14a1d49ea78fcdc4a1f Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Tue, 28 Apr 2020 19:03:43 +0800 Subject: [PATCH 233/562] RDMA/hns: Optimize SRQ buffer size calculating process Optimize the SRQ's WQE buffer parameters calculating process to make the codes more readable by using new functions about multi-hop addressing to calculating capabilities of SRQ. Link: https://lore.kernel.org/r/1588071823-40200-6-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 29 ++++++++++----------- drivers/infiniband/hw/hns/hns_roce_srq.c | 16 ++++++------ 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 100224358a49..5cac14d7be90 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -472,7 +472,7 @@ struct hns_roce_cq { struct hns_roce_idx_que { struct hns_roce_mtr mtr; - int entry_sz; + int entry_shift; unsigned long *bitmap; }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0b79dafe919e..f70370d24512 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -699,6 +699,12 @@ static void *get_srq_wqe(struct hns_roce_srq *srq, int n) return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift); } +static void *get_idx_buf(struct hns_roce_idx_que *idx_que, int n) +{ + return hns_roce_buf_offset(idx_que->mtr.kmem, + n << idx_que->entry_shift); +} + static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) { /* always called with interrupts disabled. */ @@ -725,16 +731,6 @@ static int find_empty_entry(struct hns_roce_idx_que *idx_que, return wqe_idx; } -static void fill_idx_queue(struct hns_roce_idx_que *idx_que, - int cur_idx, int wqe_idx) -{ - unsigned int *addr; - - addr = (unsigned int *)hns_roce_buf_offset(idx_que->mtr.kmem, - cur_idx * idx_que->entry_sz); - *addr = wqe_idx; -} - static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) @@ -744,6 +740,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, struct hns_roce_v2_wqe_data_seg *dseg; struct hns_roce_v2_db srq_db; unsigned long flags; + __le32 *srq_idx; int ret = 0; int wqe_idx; void *wqe; @@ -775,7 +772,6 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, break; } - fill_idx_queue(&srq->idx_que, ind, wqe_idx); wqe = get_srq_wqe(srq, wqe_idx); dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; @@ -791,6 +787,9 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, dseg[i].addr = 0; } + srq_idx = get_idx_buf(&srq->idx_que, ind); + *srq_idx = cpu_to_le32(wqe_idx); + srq->wrid[wqe_idx] = wr->wr_id; ind = (ind + 1) & (srq->wqe_cnt - 1); } @@ -4901,8 +4900,8 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M, SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S, - (hr_dev->caps.srqwqe_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : - hr_dev->caps.srqwqe_hop_num)); + to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num, + srq->wqe_cnt)); roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S, ilog2(srq->wqe_cnt)); @@ -4944,8 +4943,8 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M, SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S, - hr_dev->caps.idx_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : - hr_dev->caps.idx_hop_num); + to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, + srq->wqe_cnt)); roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index e413a9737ae6..6e5a2adc2ab2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -181,16 +181,15 @@ static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_attr buf_attr = {}; - int sge_size; int err; - sge_size = roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, - HNS_ROCE_SGE_SIZE * srq->max_gs)); - - srq->wqe_shift = ilog2(sge_size); + srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, + HNS_ROCE_SGE_SIZE * + srq->max_gs))); buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + PAGE_ADDR_SHIFT; - buf_attr.region[0].size = srq->wqe_cnt * sge_size; + buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, + srq->wqe_shift); buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; buf_attr.region_count = 1; buf_attr.fixed_page = true; @@ -217,10 +216,11 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, struct hns_roce_buf_attr buf_attr = {}; int err; - srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ; + srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ); buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + PAGE_ADDR_SHIFT; - buf_attr.region[0].size = srq->wqe_cnt * HNS_ROCE_IDX_QUE_ENTRY_SZ; + buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, + srq->idx_que.entry_shift); buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num; buf_attr.region_count = 1; buf_attr.fixed_page = true; From 23190b8f47de2d2983d6052f6eb7781779db8eb3 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Thu, 30 Apr 2020 18:31:29 +0800 Subject: [PATCH 234/562] RDMA/hns: Fix comments with non-English symbols There is a comments with some chinese semicolons that cause encoding issues each time hns_roc_hw_v2.h was modified from a IDE. So fix this by using correct symbols. Link: https://lore.kernel.org/r/1588242691-12913-2-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 82dd9f6f4845..05bfe078d537 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1241,10 +1241,9 @@ struct hns_roce_func_clear { }; #define FUNC_CLEAR_RST_FUN_DONE_S 0 -/* Each physical function manages up to 248 virtual functionsï¼› - * it takes up to 100ms for each function to execute clearï¼› - * if an abnormal reset occurs, it is executed twice at most; - * so it takes up to 249 * 2 * 100ms. +/* Each physical function manages up to 248 virtual functions, it takes up to + * 100ms for each function to execute clear. If an abnormal reset occurs, it is + * executed twice at most, so it takes up to 249 * 2 * 100ms. */ #define HNS_ROCE_V2_FUNC_CLEAR_TIMEOUT_MSECS (249 * 2 * 100) #define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL 40 From b713128de7a17c7173f869323f8a5f0593e52538 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Thu, 30 Apr 2020 18:31:30 +0800 Subject: [PATCH 235/562] RDMA/hns: Adjust lp_pktn_ini dynamically lp_pktn_ini means the number of loopback slice packets for long messages, it should depend on MTU(fixed to 4096B currently) and max size of SQ inline. Link: https://lore.kernel.org/r/1588242691-12913-3-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f70370d24512..7643b066deea 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3977,7 +3977,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, /* mtu*(2^LP_PKTN_INI) should not bigger than 1 message length 64kb */ roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, - V2_QPC_BYTE_56_LP_PKTN_INI_S, 0); + V2_QPC_BYTE_56_LP_PKTN_INI_S, + ilog2(hr_dev->caps.max_sq_inline / IB_MTU_4096)); roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, V2_QPC_BYTE_56_LP_PKTN_INI_S, 0); From e4faa478c6b8af5fc4afe20de693417354a57e4d Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Thu, 30 Apr 2020 18:31:31 +0800 Subject: [PATCH 236/562] RDMA/hns: Remove redundant assignment of caps These caps are assigned in query_pf_caps() or set_default_caps(), and should not be assigned out of these two functions. Link: https://lore.kernel.org/r/1588242691-12913-4-git-send-email-liweihang@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7643b066deea..ad9a11a2cd0d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2079,11 +2079,6 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) hr_dev->vendor_part_id = hr_dev->pci_dev->device; hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid); - caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS; - caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS; - caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS; - caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS; - caps->pbl_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_16K; caps->pbl_buf_pg_sz = 0; caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM; From 31578defe4eb816439d5e3351923e90f6321b3c8 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 6 May 2020 09:55:11 +0300 Subject: [PATCH 237/562] RDMA/mlx5: Update mlx5_ib to use new cmd interface Reuse newly introduced mlx5_cmd_exec_in() and mlx5_cmd_exec_inout() to reduce code duplication in mlx5_ib module. Link: https://lore.kernel.org/r/20200506065513.4668-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cmd.c | 114 ++++++++------------------ drivers/infiniband/hw/mlx5/cmd.h | 4 +- drivers/infiniband/hw/mlx5/cong.c | 4 +- drivers/infiniband/hw/mlx5/main.c | 5 +- drivers/infiniband/hw/mlx5/odp.c | 5 +- drivers/infiniband/hw/mlx5/srq_cmd.c | 115 ++++++++++++--------------- 6 files changed, 91 insertions(+), 156 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index a2fcbc49131e..cc24c711e92a 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -1,46 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright (c) 2017, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2017-2020, Mellanox Technologies inc. All rights reserved. */ #include "cmd.h" int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey) { - u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; int err; MLX5_SET(query_special_contexts_in, in, opcode, MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + err = mlx5_cmd_exec_inout(dev, query_special_contexts, in, out); if (!err) *mkey = MLX5_GET(query_special_contexts_out, out, dump_fill_mkey); @@ -50,12 +23,12 @@ int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey) int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey) { u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {}; - u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; + u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; int err; MLX5_SET(query_special_contexts_in, in, opcode, MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + err = mlx5_cmd_exec_inout(dev, query_special_contexts, in, out); if (!err) *null_mkey = MLX5_GET(query_special_contexts_out, out, null_mkey); @@ -63,23 +36,15 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey) } int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, - void *out, int out_size) + void *out) { - u32 in[MLX5_ST_SZ_DW(query_cong_params_in)] = { }; + u32 in[MLX5_ST_SZ_DW(query_cong_params_in)] = {}; MLX5_SET(query_cong_params_in, in, opcode, MLX5_CMD_OP_QUERY_CONG_PARAMS); MLX5_SET(query_cong_params_in, in, cong_protocol, cong_point); - return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); -} - -int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev, - void *in, int in_size) -{ - u32 out[MLX5_ST_SZ_DW(modify_cong_params_out)] = { }; - - return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out)); + return mlx5_cmd_exec_inout(dev, query_cong_params, in, out); } int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, @@ -133,7 +98,7 @@ int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, MLX5_SET64(alloc_memic_in, in, range_start_addr, hw_start_addr + (page_idx * PAGE_SIZE)); - ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + ret = mlx5_cmd_exec_inout(dev, alloc_memic, in, out); if (ret) { spin_lock(&dm->lock); bitmap_clear(dm->memic_alloc_pages, @@ -162,8 +127,7 @@ void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length) struct mlx5_core_dev *dev = dm->dev; u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr); u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE); - u32 out[MLX5_ST_SZ_DW(dealloc_memic_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {0}; + u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {}; u64 start_page_idx; int err; @@ -174,7 +138,7 @@ void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length) MLX5_SET64(dealloc_memic_in, in, memic_start_addr, addr); MLX5_SET(dealloc_memic_in, in, memic_size, length); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + err = mlx5_cmd_exec_in(dev, dealloc_memic, in); if (err) return; @@ -198,49 +162,46 @@ int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out) void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid) { - u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {}; - u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {}; + u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {}; MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR); MLX5_SET(destroy_tir_in, in, tirn, tirn); MLX5_SET(destroy_tir_in, in, uid, uid); - mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec_in(dev, destroy_tir, in); } void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid) { - u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {}; MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); MLX5_SET(destroy_tis_in, in, tisn, tisn); MLX5_SET(destroy_tis_in, in, uid, uid); - mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec_in(dev, destroy_tis, in); } void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid) { - u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {}; - u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {}; + u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {}; MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); MLX5_SET(destroy_rqt_in, in, rqtn, rqtn); MLX5_SET(destroy_rqt_in, in, uid, uid); - mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec_in(dev, destroy_rqt, in); } int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, u16 uid) { - u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {}; + u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {}; int err; MLX5_SET(alloc_transport_domain_in, in, opcode, MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN); MLX5_SET(alloc_transport_domain_in, in, uid, uid); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + err = mlx5_cmd_exec_inout(dev, alloc_transport_domain, in, out); if (!err) *tdn = MLX5_GET(alloc_transport_domain_out, out, transport_domain); @@ -251,32 +212,29 @@ int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, u16 uid) { - u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {}; MLX5_SET(dealloc_transport_domain_in, in, opcode, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); MLX5_SET(dealloc_transport_domain_in, in, uid, uid); MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn); - mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec_in(dev, dealloc_transport_domain, in); } void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid) { - u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {}; - u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {}; + u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {}; MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD); MLX5_SET(dealloc_pd_in, in, pd, pdn); MLX5_SET(dealloc_pd_in, in, uid, uid); - mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec_in(dev, dealloc_pd, in); } int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn, u16 uid) { - u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {}; - u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {}; + u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {}; void *gid; MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG); @@ -284,14 +242,13 @@ int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, MLX5_SET(attach_to_mcg_in, in, uid, uid); gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid); memcpy(gid, mgid, sizeof(*mgid)); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return mlx5_cmd_exec_in(dev, attach_to_mcg, in); } int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn, u16 uid) { - u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {}; - u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {}; + u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {}; void *gid; MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); @@ -299,18 +256,18 @@ int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, MLX5_SET(detach_from_mcg_in, in, uid, uid); gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid); memcpy(gid, mgid, sizeof(*mgid)); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return mlx5_cmd_exec_in(dev, detach_from_mcg, in); } int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid) { u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {}; - u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {}; int err; MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD); MLX5_SET(alloc_xrcd_in, in, uid, uid); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + err = mlx5_cmd_exec_inout(dev, alloc_xrcd, in, out); if (!err) *xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd); return err; @@ -318,13 +275,12 @@ int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid) int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid) { - u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {}; - u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {}; + u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {}; MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD); MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn); MLX5_SET(dealloc_xrcd_in, in, uid, uid); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return mlx5_cmd_exec_in(dev, dealloc_xrcd, in); } int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, @@ -350,7 +306,7 @@ int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, data = MLX5_ADDR_OF(mad_ifc_in, in, mad); memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); - err = mlx5_cmd_exec(dev, in, inlen, out, outlen); + err = mlx5_cmd_exec_inout(dev, mad_ifc, in, out); if (err) goto out; diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 43079b18d9b4..f4d8558db434 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -40,10 +40,8 @@ int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey); int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, - void *out, int out_size); + void *out); int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out); -int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, - void *in, int in_size); int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, u64 length, u32 alignment); void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length); diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c index de4da92b81a6..b9291e482428 100644 --- a/drivers/infiniband/hw/mlx5/cong.c +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -290,7 +290,7 @@ static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, u8 port_num, node = mlx5_ib_param_to_node(offset); - err = mlx5_cmd_query_cong_params(mdev, node, out, outlen); + err = mlx5_cmd_query_cong_params(mdev, node, out); if (err) goto free; @@ -339,7 +339,7 @@ static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u8 port_num, MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp, attr_mask); - err = mlx5_cmd_modify_cong_params(mdev, in, inlen); + err = mlx5_cmd_exec_in(dev->mdev, modify_cong_params, in); kvfree(in); alloc_err: mlx5_ib_put_native_port_mdev(dev, port_num + 1); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 0b8cc219e085..0c0fbf4160e0 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2562,7 +2562,7 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) struct mlx5_ib_alloc_pd_resp resp; int err; u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {}; - u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; u16 uid = 0; struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); @@ -2570,8 +2570,7 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) uid = context ? context->devx_uid : 0; MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); MLX5_SET(alloc_pd_in, in, uid, uid); - err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in), - out, sizeof(out)); + err = mlx5_cmd_exec_inout(to_mdev(ibdev)->mdev, alloc_pd, in, out); if (err) return err; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 70577d546567..7d2ec9ee5097 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -447,8 +447,7 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, { int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ? pfault->wqe.wq_num : pfault->token; - u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = { }; - u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = { }; + u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {}; int err; MLX5_SET(page_fault_resume_in, in, opcode, MLX5_CMD_OP_PAGE_FAULT_RESUME); @@ -457,7 +456,7 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, MLX5_SET(page_fault_resume_in, in, wq_number, wq_num); MLX5_SET(page_fault_resume_in, in, error, !!error); - err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); + err = mlx5_cmd_exec_in(dev->mdev, page_fault_resume, in); if (err) mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x err %d\n", wq_num, err); diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index c851570791af..bc50a712bf2e 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -132,38 +132,33 @@ static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, static int destroy_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { - u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0}; - u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_srq_in)] = {}; - MLX5_SET(destroy_srq_in, srq_in, opcode, - MLX5_CMD_OP_DESTROY_SRQ); - MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); - MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid); + MLX5_SET(destroy_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, in, srqn, srq->srqn); + MLX5_SET(destroy_srq_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out, - sizeof(srq_out)); + return mlx5_cmd_exec_in(dev->mdev, destroy_srq, in); } static int arm_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq) { - u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; - u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {}; - MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ); - MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ); - MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn); - MLX5_SET(arm_rq_in, srq_in, lwm, lwm); - MLX5_SET(arm_rq_in, srq_in, uid, srq->uid); + MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ); + MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ); + MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); + MLX5_SET(arm_rq_in, in, lwm, lwm); + MLX5_SET(arm_rq_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out, - sizeof(srq_out)); + return mlx5_cmd_exec_in(dev->mdev, arm_rq, in); } static int query_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *out) { - u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0}; + u32 in[MLX5_ST_SZ_DW(query_srq_in)] = {}; u32 *srq_out; void *srqc; int err; @@ -172,20 +167,18 @@ static int query_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, if (!srq_out) return -ENOMEM; - MLX5_SET(query_srq_in, srq_in, opcode, - MLX5_CMD_OP_QUERY_SRQ); - MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); - err = mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out, - MLX5_ST_SZ_BYTES(query_srq_out)); + MLX5_SET(query_srq_in, in, opcode, MLX5_CMD_OP_QUERY_SRQ); + MLX5_SET(query_srq_in, in, srqn, srq->srqn); + err = mlx5_cmd_exec_inout(dev->mdev, query_srq, in, out); if (err) goto out; - srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry); + srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); get_srqc(srqc, out); if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD) out->flags |= MLX5_SRQ_FLAG_ERR; out: - kvfree(srq_out); + kvfree(out); return err; } @@ -234,39 +227,35 @@ static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev, static int destroy_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { - u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; - u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {}; - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, - MLX5_CMD_OP_DESTROY_XRC_SRQ); - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid); + MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, srq->srqn); + MLX5_SET(destroy_xrc_srq_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, sizeof(xrcsrq_out)); + return mlx5_cmd_exec_in(dev->mdev, destroy_xrc_srq, in); } static int arm_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, u16 lwm) { - u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; - u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {}; - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid); + MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, in, op_mod, + MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, srq->srqn); + MLX5_SET(arm_xrc_srq_in, in, lwm, lwm); + MLX5_SET(arm_xrc_srq_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, sizeof(xrcsrq_out)); + return mlx5_cmd_exec_in(dev->mdev, arm_xrc_srq, in); } static int query_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *out) { - u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; + u32 in[MLX5_ST_SZ_DW(query_xrc_srq_in)] = {}; u32 *xrcsrq_out; void *xrc_srqc; int err; @@ -274,14 +263,11 @@ static int query_xrc_srq_cmd(struct mlx5_ib_dev *dev, xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL); if (!xrcsrq_out) return -ENOMEM; - memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); - MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode, - MLX5_CMD_OP_QUERY_XRC_SRQ); - MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + MLX5_SET(query_xrc_srq_in, in, opcode, MLX5_CMD_OP_QUERY_XRC_SRQ); + MLX5_SET(query_xrc_srq_in, in, xrc_srqn, srq->srqn); - err = mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + err = mlx5_cmd_exec_inout(dev->mdev, query_xrc_srq, in, xrcsrq_out); if (err) goto out; @@ -341,13 +327,12 @@ static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, static int destroy_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { - u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {}; - u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {}; + u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {}; MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn); MLX5_SET(destroy_rmp_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); + return mlx5_cmd_exec_in(dev->mdev, destroy_rmp, in); } static int arm_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, @@ -384,7 +369,7 @@ static int arm_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); - err = mlx5_cmd_exec(dev->mdev, in, inlen, out, outlen); + err = mlx5_cmd_exec_inout(dev->mdev, modify_rmp, in, out); out: kvfree(in); @@ -414,7 +399,7 @@ static int query_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(query_rmp_in, rmp_in, opcode, MLX5_CMD_OP_QUERY_RMP); MLX5_SET(query_rmp_in, rmp_in, rmpn, srq->srqn); - err = mlx5_cmd_exec(dev->mdev, rmp_in, inlen, rmp_out, outlen); + err = mlx5_cmd_exec_inout(dev->mdev, query_rmp, rmp_in, rmp_out); if (err) goto out; @@ -477,36 +462,34 @@ static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, static int destroy_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { - u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {}; MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ); - MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); + MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); MLX5_SET(destroy_xrq_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); + return mlx5_cmd_exec_in(dev->mdev, destroy_xrq, in); } static int arm_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, u16 lwm) { - u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; + u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {}; - MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ); - MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); + MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ); + MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); - MLX5_SET(arm_rq_in, in, lwm, lwm); + MLX5_SET(arm_rq_in, in, lwm, lwm); MLX5_SET(arm_rq_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); + return mlx5_cmd_exec_in(dev->mdev, arm_rq, in); } static int query_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *out) { - u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0}; + u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {}; u32 *xrq_out; int outlen = MLX5_ST_SZ_BYTES(query_xrq_out); void *xrqc; @@ -519,7 +502,7 @@ static int query_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ); MLX5_SET(query_xrq_in, in, xrqn, srq->srqn); - err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), xrq_out, outlen); + err = mlx5_cmd_exec_inout(dev->mdev, query_xrq, in, xrq_out); if (err) goto out; From 6671cde83ddb9a65fd0a69e0896d089ace0e195c Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 6 May 2020 09:55:12 +0300 Subject: [PATCH 238/562] RDMA/mlx5: Refactor mlx5_post_send() to improve readability Add small helpers in order to avoid code duplication and improve code readability. Decrease the amount of code in the gigantic post_send function and divide it to readable methods that will help in code maintenance in the future. Link: https://lore.kernel.org/r/20200506065513.4668-3-leon@kernel.org Signed-off-by: Max Gurtovoy Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 490 ++++++++++++++++++-------------- 1 file changed, 276 insertions(+), 214 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index e624886bcf85..1e3dcfd1b230 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -5234,18 +5234,279 @@ static void finish_wqe(struct mlx5_ib_qp *qp, cur_edge; } +static void handle_rdma_op(const struct ib_send_wr *wr, void **seg, int *size) +{ + set_raddr_seg(*seg, rdma_wr(wr)->remote_addr, rdma_wr(wr)->rkey); + *seg += sizeof(struct mlx5_wqe_raddr_seg); + *size += sizeof(struct mlx5_wqe_raddr_seg) / 16; +} + +static void handle_local_inv(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, + struct mlx5_wqe_ctrl_seg **ctrl, void **seg, + int *size, void **cur_edge, unsigned int idx) +{ + qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; + (*ctrl)->imm = cpu_to_be32(wr->ex.invalidate_rkey); + set_linv_wr(qp, seg, size, cur_edge); +} + +static int handle_reg_mr(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, + struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size, + void **cur_edge, unsigned int idx) +{ + qp->sq.wr_data[idx] = IB_WR_REG_MR; + (*ctrl)->imm = cpu_to_be32(reg_wr(wr)->key); + return set_reg_wr(qp, reg_wr(wr), seg, size, cur_edge, true); +} + +static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + const struct ib_send_wr *wr, + struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size, + void **cur_edge, unsigned int *idx, int nreq, + struct ib_sig_domain *domain, u32 psv_index, + u8 next_fence) +{ + int err; + + /* + * SET_PSV WQEs are not signaled and solicited on error. + */ + err = __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, + false, true); + if (unlikely(err)) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + goto out; + } + err = set_psv_wr(domain, psv_index, seg, size); + if (unlikely(err)) { + mlx5_ib_warn(dev, "\n"); + goto out; + } + finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq, + next_fence, MLX5_OPCODE_SET_PSV); + +out: + return err; +} + +static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, + struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size, + void **cur_edge, unsigned int *idx, int nreq, u8 fence, + u8 next_fence) +{ + struct mlx5_ib_mr *mr; + struct mlx5_ib_mr *pi_mr; + struct mlx5_ib_mr pa_pi_mr; + struct ib_sig_attrs *sig_attrs; + struct ib_reg_wr reg_pi_wr; + int err; + + qp->sq.wr_data[*idx] = IB_WR_REG_MR_INTEGRITY; + + mr = to_mmr(reg_wr(wr)->mr); + pi_mr = mr->pi_mr; + + if (pi_mr) { + memset(®_pi_wr, 0, + sizeof(struct ib_reg_wr)); + + reg_pi_wr.mr = &pi_mr->ibmr; + reg_pi_wr.access = reg_wr(wr)->access; + reg_pi_wr.key = pi_mr->ibmr.rkey; + + (*ctrl)->imm = cpu_to_be32(reg_pi_wr.key); + /* UMR for data + prot registration */ + err = set_reg_wr(qp, ®_pi_wr, seg, size, cur_edge, false); + if (unlikely(err)) + goto out; + + finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, + nreq, fence, MLX5_OPCODE_UMR); + + err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq); + if (unlikely(err)) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + goto out; + } + } else { + memset(&pa_pi_mr, 0, sizeof(struct mlx5_ib_mr)); + /* No UMR, use local_dma_lkey */ + pa_pi_mr.ibmr.lkey = mr->ibmr.pd->local_dma_lkey; + pa_pi_mr.ndescs = mr->ndescs; + pa_pi_mr.data_length = mr->data_length; + pa_pi_mr.data_iova = mr->data_iova; + if (mr->meta_ndescs) { + pa_pi_mr.meta_ndescs = mr->meta_ndescs; + pa_pi_mr.meta_length = mr->meta_length; + pa_pi_mr.pi_iova = mr->pi_iova; + } + + pa_pi_mr.ibmr.length = mr->ibmr.length; + mr->pi_mr = &pa_pi_mr; + } + (*ctrl)->imm = cpu_to_be32(mr->ibmr.rkey); + /* UMR for sig MR */ + err = set_pi_umr_wr(wr, qp, seg, size, cur_edge); + if (unlikely(err)) { + mlx5_ib_warn(dev, "\n"); + goto out; + } + finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq, + fence, MLX5_OPCODE_UMR); + + sig_attrs = mr->ibmr.sig_attrs; + err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq, + &sig_attrs->mem, mr->sig->psv_memory.psv_idx, + next_fence); + if (unlikely(err)) + goto out; + + err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq, + &sig_attrs->wire, mr->sig->psv_wire.psv_idx, + next_fence); + if (unlikely(err)) + goto out; + + qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + +out: + return err; +} + +static int handle_qpt_rc(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + const struct ib_send_wr *wr, + struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size, + void **cur_edge, unsigned int *idx, int nreq, u8 fence, + u8 next_fence, int *num_sge) +{ + int err = 0; + + switch (wr->opcode) { + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + handle_rdma_op(wr, seg, size); + break; + + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: + mlx5_ib_warn(dev, "Atomic operations are not supported yet\n"); + err = -EOPNOTSUPP; + goto out; + + case IB_WR_LOCAL_INV: + handle_local_inv(qp, wr, ctrl, seg, size, cur_edge, *idx); + *num_sge = 0; + break; + + case IB_WR_REG_MR: + err = handle_reg_mr(qp, wr, ctrl, seg, size, cur_edge, *idx); + if (unlikely(err)) + goto out; + *num_sge = 0; + break; + + case IB_WR_REG_MR_INTEGRITY: + err = handle_reg_mr_integrity(dev, qp, wr, ctrl, seg, size, + cur_edge, idx, nreq, fence, + next_fence); + if (unlikely(err)) + goto out; + *num_sge = 0; + break; + + default: + break; + } + +out: + return err; +} + +static void handle_qpt_uc(const struct ib_send_wr *wr, void **seg, int *size) +{ + switch (wr->opcode) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + handle_rdma_op(wr, seg, size); + break; + default: + break; + } +} + +static void handle_qpt_hw_gsi(struct mlx5_ib_qp *qp, + const struct ib_send_wr *wr, void **seg, + int *size, void **cur_edge) +{ + set_datagram_seg(*seg, wr); + *seg += sizeof(struct mlx5_wqe_datagram_seg); + *size += sizeof(struct mlx5_wqe_datagram_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); +} + +static void handle_qpt_ud(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, + void **seg, int *size, void **cur_edge) +{ + set_datagram_seg(*seg, wr); + *seg += sizeof(struct mlx5_wqe_datagram_seg); + *size += sizeof(struct mlx5_wqe_datagram_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + + /* handle qp that supports ud offload */ + if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) { + struct mlx5_wqe_eth_pad *pad; + + pad = *seg; + memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad)); + *seg += sizeof(struct mlx5_wqe_eth_pad); + *size += sizeof(struct mlx5_wqe_eth_pad) / 16; + set_eth_seg(wr, qp, seg, size, cur_edge); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + } +} + +static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + const struct ib_send_wr *wr, + struct mlx5_wqe_ctrl_seg **ctrl, void **seg, + int *size, void **cur_edge, unsigned int idx) +{ + int err = 0; + + if (unlikely(wr->opcode != MLX5_IB_WR_UMR)) { + err = -EINVAL; + mlx5_ib_warn(dev, "bad opcode %d\n", wr->opcode); + goto out; + } + + qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; + (*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey); + err = set_reg_umr_segment(dev, *seg, wr, + !!(MLX5_CAP_GEN(dev->mdev, atomic))); + if (unlikely(err)) + goto out; + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + set_reg_mkey_segment(*seg, wr); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); +out: + return err; +} + static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr, bool drain) { struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_core_dev *mdev = dev->mdev; - struct ib_reg_wr reg_pi_wr; struct mlx5_ib_qp *qp; - struct mlx5_ib_mr *mr; - struct mlx5_ib_mr *pi_mr; - struct mlx5_ib_mr pa_pi_mr; - struct ib_sig_attrs *sig_attrs; struct mlx5_wqe_xrc_seg *xrc; struct mlx5_bf *bf; void *cur_edge; @@ -5321,186 +5582,20 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, size += sizeof(*xrc) / 16; /* fall through */ case IB_QPT_RC: - switch (wr->opcode) { - case IB_WR_RDMA_READ: - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(seg, rdma_wr(wr)->remote_addr, - rdma_wr(wr)->rkey); - seg += sizeof(struct mlx5_wqe_raddr_seg); - size += sizeof(struct mlx5_wqe_raddr_seg) / 16; - break; - - case IB_WR_ATOMIC_CMP_AND_SWP: - case IB_WR_ATOMIC_FETCH_AND_ADD: - case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: - mlx5_ib_warn(dev, "Atomic operations are not supported yet\n"); - err = -ENOSYS; + err = handle_qpt_rc(dev, qp, wr, &ctrl, &seg, &size, + &cur_edge, &idx, nreq, fence, + next_fence, &num_sge); + if (unlikely(err)) { *bad_wr = wr; goto out; - - case IB_WR_LOCAL_INV: - qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; - ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey); - set_linv_wr(qp, &seg, &size, &cur_edge); - num_sge = 0; - break; - - case IB_WR_REG_MR: - qp->sq.wr_data[idx] = IB_WR_REG_MR; - ctrl->imm = cpu_to_be32(reg_wr(wr)->key); - err = set_reg_wr(qp, reg_wr(wr), &seg, &size, - &cur_edge, true); - if (err) { - *bad_wr = wr; - goto out; - } - num_sge = 0; - break; - - case IB_WR_REG_MR_INTEGRITY: - qp->sq.wr_data[idx] = IB_WR_REG_MR_INTEGRITY; - - mr = to_mmr(reg_wr(wr)->mr); - pi_mr = mr->pi_mr; - - if (pi_mr) { - memset(®_pi_wr, 0, - sizeof(struct ib_reg_wr)); - - reg_pi_wr.mr = &pi_mr->ibmr; - reg_pi_wr.access = reg_wr(wr)->access; - reg_pi_wr.key = pi_mr->ibmr.rkey; - - ctrl->imm = cpu_to_be32(reg_pi_wr.key); - /* UMR for data + prot registration */ - err = set_reg_wr(qp, ®_pi_wr, &seg, - &size, &cur_edge, - false); - if (err) { - *bad_wr = wr; - goto out; - } - finish_wqe(qp, ctrl, seg, size, - cur_edge, idx, wr->wr_id, - nreq, fence, - MLX5_OPCODE_UMR); - - err = begin_wqe(qp, &seg, &ctrl, wr, - &idx, &size, &cur_edge, - nreq); - if (err) { - mlx5_ib_warn(dev, "\n"); - err = -ENOMEM; - *bad_wr = wr; - goto out; - } - } else { - memset(&pa_pi_mr, 0, - sizeof(struct mlx5_ib_mr)); - /* No UMR, use local_dma_lkey */ - pa_pi_mr.ibmr.lkey = - mr->ibmr.pd->local_dma_lkey; - - pa_pi_mr.ndescs = mr->ndescs; - pa_pi_mr.data_length = mr->data_length; - pa_pi_mr.data_iova = mr->data_iova; - if (mr->meta_ndescs) { - pa_pi_mr.meta_ndescs = - mr->meta_ndescs; - pa_pi_mr.meta_length = - mr->meta_length; - pa_pi_mr.pi_iova = mr->pi_iova; - } - - pa_pi_mr.ibmr.length = mr->ibmr.length; - mr->pi_mr = &pa_pi_mr; - } - ctrl->imm = cpu_to_be32(mr->ibmr.rkey); - /* UMR for sig MR */ - err = set_pi_umr_wr(wr, qp, &seg, &size, - &cur_edge); - if (err) { - mlx5_ib_warn(dev, "\n"); - *bad_wr = wr; - goto out; - } - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, - wr->wr_id, nreq, fence, - MLX5_OPCODE_UMR); - - /* - * SET_PSV WQEs are not signaled and solicited - * on error - */ - sig_attrs = mr->ibmr.sig_attrs; - err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, - &size, &cur_edge, nreq, false, - true); - if (err) { - mlx5_ib_warn(dev, "\n"); - err = -ENOMEM; - *bad_wr = wr; - goto out; - } - err = set_psv_wr(&sig_attrs->mem, - mr->sig->psv_memory.psv_idx, - &seg, &size); - if (err) { - mlx5_ib_warn(dev, "\n"); - *bad_wr = wr; - goto out; - } - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, - wr->wr_id, nreq, next_fence, - MLX5_OPCODE_SET_PSV); - - err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, - &size, &cur_edge, nreq, false, - true); - if (err) { - mlx5_ib_warn(dev, "\n"); - err = -ENOMEM; - *bad_wr = wr; - goto out; - } - err = set_psv_wr(&sig_attrs->wire, - mr->sig->psv_wire.psv_idx, - &seg, &size); - if (err) { - mlx5_ib_warn(dev, "\n"); - *bad_wr = wr; - goto out; - } - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, - wr->wr_id, nreq, next_fence, - MLX5_OPCODE_SET_PSV); - - qp->next_fence = - MLX5_FENCE_MODE_INITIATOR_SMALL; - num_sge = 0; + } else if (wr->opcode == IB_WR_REG_MR_INTEGRITY) { goto skip_psv; - - default: - break; } break; case IB_QPT_UC: - switch (wr->opcode) { - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(seg, rdma_wr(wr)->remote_addr, - rdma_wr(wr)->rkey); - seg += sizeof(struct mlx5_wqe_raddr_seg); - size += sizeof(struct mlx5_wqe_raddr_seg) / 16; - break; - - default: - break; - } + handle_qpt_uc(wr, &seg, &size); break; - case IB_QPT_SMI: if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) { mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n"); @@ -5510,49 +5605,16 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, } /* fall through */ case MLX5_IB_QPT_HW_GSI: - set_datagram_seg(seg, wr); - seg += sizeof(struct mlx5_wqe_datagram_seg); - size += sizeof(struct mlx5_wqe_datagram_seg) / 16; - handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); - + handle_qpt_hw_gsi(qp, wr, &seg, &size, &cur_edge); break; case IB_QPT_UD: - set_datagram_seg(seg, wr); - seg += sizeof(struct mlx5_wqe_datagram_seg); - size += sizeof(struct mlx5_wqe_datagram_seg) / 16; - handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); - - /* handle qp that supports ud offload */ - if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) { - struct mlx5_wqe_eth_pad *pad; - - pad = seg; - memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad)); - seg += sizeof(struct mlx5_wqe_eth_pad); - size += sizeof(struct mlx5_wqe_eth_pad) / 16; - set_eth_seg(wr, qp, &seg, &size, &cur_edge); - handle_post_send_edge(&qp->sq, &seg, size, - &cur_edge); - } + handle_qpt_ud(qp, wr, &seg, &size, &cur_edge); break; case MLX5_IB_QPT_REG_UMR: - if (wr->opcode != MLX5_IB_WR_UMR) { - err = -EINVAL; - mlx5_ib_warn(dev, "bad opcode\n"); - goto out; - } - qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; - ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey); - err = set_reg_umr_segment(dev, seg, wr, !!(MLX5_CAP_GEN(mdev, atomic))); + err = handle_qpt_reg_umr(dev, qp, wr, &ctrl, &seg, + &size, &cur_edge, idx); if (unlikely(err)) goto out; - seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); - size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); - set_reg_mkey_segment(seg, wr); - seg += sizeof(struct mlx5_mkey_seg); - size += sizeof(struct mlx5_mkey_seg) / 16; - handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); break; default: From 029e88fd1e6142ded73f07e2baef3e8a2a87e0ed Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 6 May 2020 09:55:13 +0300 Subject: [PATCH 239/562] RDMA/mlx5: Move all WR logic from qp.c to separate file Split qp.c by removing all WR logic to separate file. Link: https://lore.kernel.org/r/20200506065513.4668-4-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/Makefile | 3 +- drivers/infiniband/hw/mlx5/main.c | 5 +- drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 - drivers/infiniband/hw/mlx5/qp.c | 1550 +------------------------- drivers/infiniband/hw/mlx5/wr.c | 1504 +++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/wr.h | 76 ++ 6 files changed, 1589 insertions(+), 1553 deletions(-) create mode 100644 drivers/infiniband/hw/mlx5/wr.c create mode 100644 drivers/infiniband/hw/mlx5/wr.h diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 228be05fbaf8..8cca61c671f8 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -16,7 +16,8 @@ mlx5_ib-y := ah.o \ qpc.o \ restrack.o \ srq.o \ - srq_cmd.o + srq_cmd.o \ + wr.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 0c0fbf4160e0..40bf71efaeb0 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -61,6 +61,7 @@ #include "cmd.h" #include "srq.h" #include "qp.h" +#include "wr.h" #include #include #include @@ -6657,8 +6658,8 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .modify_qp = mlx5_ib_modify_qp, .modify_srq = mlx5_ib_modify_srq, .poll_cq = mlx5_ib_poll_cq, - .post_recv = mlx5_ib_post_recv, - .post_send = mlx5_ib_post_send, + .post_recv = mlx5_ib_post_recv_nodrain, + .post_send = mlx5_ib_post_send_nodrain, .post_srq_recv = mlx5_ib_post_srq_recv, .process_mad = mlx5_ib_process_mad, .query_ah = mlx5_ib_query_ah, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 3041808773e6..482b54eb9764 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1178,10 +1178,6 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata); void mlx5_ib_drain_sq(struct ib_qp *qp); void mlx5_ib_drain_rq(struct ib_qp *qp); -int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr); -int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr); int mlx5_ib_read_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, size_t buflen, size_t *bc); int mlx5_ib_read_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 1e3dcfd1b230..fb2ea3bf9be4 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -40,6 +40,7 @@ #include "ib_rep.h" #include "cmd.h" #include "qp.h" +#include "wr.h" enum { MLX5_IB_ACK_REQ_FREQ = 8, @@ -52,32 +53,6 @@ enum { MLX5_IB_LINK_TYPE_ETH = 1 }; -enum { - MLX5_IB_SQ_STRIDE = 6, - MLX5_IB_SQ_UMR_INLINE_THRESHOLD = 64, -}; - -static const u32 mlx5_ib_opcode[] = { - [IB_WR_SEND] = MLX5_OPCODE_SEND, - [IB_WR_LSO] = MLX5_OPCODE_LSO, - [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM, - [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE, - [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM, - [IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ, - [IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS, - [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA, - [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL, - [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR, - [IB_WR_REG_MR] = MLX5_OPCODE_UMR, - [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS, - [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA, - [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, -}; - -struct mlx5_wqe_eth_pad { - u8 rsvd0[16]; -}; - enum raw_qp_set_mask_map { MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID = 1UL << 0, MLX5_RAW_QP_RATE_LIMIT = 1UL << 1, @@ -1061,30 +1036,6 @@ static void destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, mlx5_frag_buf_free(dev->mdev, &qp->buf); } -/* get_sq_edge - Get the next nearby edge. - * - * An 'edge' is defined as the first following address after the end - * of the fragment or the SQ. Accordingly, during the WQE construction - * which repetitively increases the pointer to write the next data, it - * simply should check if it gets to an edge. - * - * @sq - SQ buffer. - * @idx - Stride index in the SQ buffer. - * - * Return: - * The new edge. - */ -static void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx) -{ - void *fragment_end; - - fragment_end = mlx5_frag_buf_get_wqe - (&sq->fbc, - mlx5_frag_buf_get_idx_last_contig_stride(&sq->fbc, idx)); - - return fragment_end + MLX5_SEND_WQE_BB; -} - static int _create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *init_attr, struct mlx5_ib_qp *qp, u32 **in, int *inlen, @@ -2335,11 +2286,6 @@ static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *re } } -static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp) -{ - return to_mpd(qp->ibqp.pd); -} - static void get_cqs(enum ib_qp_type qp_type, struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq, struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq) @@ -3758,7 +3704,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (!context) return -ENOMEM; - pd = get_pd(qp); + pd = to_mpd(qp->ibqp.pd); context->flags = cpu_to_be32(mlx5_st << 16); if (!(attr_mask & IB_QP_PATH_MIG_STATE)) { @@ -4286,1494 +4232,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, return err; } -static void _handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, - u32 wqe_sz, void **cur_edge) -{ - u32 idx; - - idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1); - *cur_edge = get_sq_edge(sq, idx); - - *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx); -} - -/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the - * next nearby edge and get new address translation for current WQE position. - * @sq - SQ buffer. - * @seg: Current WQE position (16B aligned). - * @wqe_sz: Total current WQE size [16B]. - * @cur_edge: Updated current edge. - */ -static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, - u32 wqe_sz, void **cur_edge) -{ - if (likely(*seg != *cur_edge)) - return; - - _handle_post_send_edge(sq, seg, wqe_sz, cur_edge); -} - -/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's - * pointers. At the end @seg is aligned to 16B regardless the copied size. - * @sq - SQ buffer. - * @cur_edge: Updated current edge. - * @seg: Current WQE position (16B aligned). - * @wqe_sz: Total current WQE size [16B]. - * @src: Pointer to copy from. - * @n: Number of bytes to copy. - */ -static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge, - void **seg, u32 *wqe_sz, const void *src, - size_t n) -{ - while (likely(n)) { - size_t leftlen = *cur_edge - *seg; - size_t copysz = min_t(size_t, leftlen, n); - size_t stride; - - memcpy(*seg, src, copysz); - - n -= copysz; - src += copysz; - stride = !n ? ALIGN(copysz, 16) : copysz; - *seg += stride; - *wqe_sz += stride >> 4; - handle_post_send_edge(sq, seg, *wqe_sz, cur_edge); - } -} - -static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq) -{ - struct mlx5_ib_cq *cq; - unsigned cur; - - cur = wq->head - wq->tail; - if (likely(cur + nreq < wq->max_post)) - return 0; - - cq = to_mcq(ib_cq); - spin_lock(&cq->lock); - cur = wq->head - wq->tail; - spin_unlock(&cq->lock); - - return cur + nreq >= wq->max_post; -} - -static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, - u64 remote_addr, u32 rkey) -{ - rseg->raddr = cpu_to_be64(remote_addr); - rseg->rkey = cpu_to_be32(rkey); - rseg->reserved = 0; -} - -static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp, - void **seg, int *size, void **cur_edge) -{ - struct mlx5_wqe_eth_seg *eseg = *seg; - - memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg)); - - if (wr->send_flags & IB_SEND_IP_CSUM) - eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | - MLX5_ETH_WQE_L4_CSUM; - - if (wr->opcode == IB_WR_LSO) { - struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr); - size_t left, copysz; - void *pdata = ud_wr->header; - size_t stride; - - left = ud_wr->hlen; - eseg->mss = cpu_to_be16(ud_wr->mss); - eseg->inline_hdr.sz = cpu_to_be16(left); - - /* memcpy_send_wqe should get a 16B align address. Hence, we - * first copy up to the current edge and then, if needed, - * fall-through to memcpy_send_wqe. - */ - copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start, - left); - memcpy(eseg->inline_hdr.start, pdata, copysz); - stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) - - sizeof(eseg->inline_hdr.start) + copysz, 16); - *size += stride / 16; - *seg += stride; - - if (copysz < left) { - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - left -= copysz; - pdata += copysz; - memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata, - left); - } - - return; - } - - *seg += sizeof(struct mlx5_wqe_eth_seg); - *size += sizeof(struct mlx5_wqe_eth_seg) / 16; -} - -static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, - const struct ib_send_wr *wr) -{ - memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av)); - dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV); - dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey); -} - -static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg) -{ - dseg->byte_count = cpu_to_be32(sg->length); - dseg->lkey = cpu_to_be32(sg->lkey); - dseg->addr = cpu_to_be64(sg->addr); -} - -static u64 get_xlt_octo(u64 bytes) -{ - return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) / - MLX5_IB_UMR_OCTOWORD; -} - -static __be64 frwr_mkey_mask(bool atomic) -{ - u64 result; - - result = MLX5_MKEY_MASK_LEN | - MLX5_MKEY_MASK_PAGE_SIZE | - MLX5_MKEY_MASK_START_ADDR | - MLX5_MKEY_MASK_EN_RINVAL | - MLX5_MKEY_MASK_KEY | - MLX5_MKEY_MASK_LR | - MLX5_MKEY_MASK_LW | - MLX5_MKEY_MASK_RR | - MLX5_MKEY_MASK_RW | - MLX5_MKEY_MASK_SMALL_FENCE | - MLX5_MKEY_MASK_FREE; - - if (atomic) - result |= MLX5_MKEY_MASK_A; - - return cpu_to_be64(result); -} - -static __be64 sig_mkey_mask(void) -{ - u64 result; - - result = MLX5_MKEY_MASK_LEN | - MLX5_MKEY_MASK_PAGE_SIZE | - MLX5_MKEY_MASK_START_ADDR | - MLX5_MKEY_MASK_EN_SIGERR | - MLX5_MKEY_MASK_EN_RINVAL | - MLX5_MKEY_MASK_KEY | - MLX5_MKEY_MASK_LR | - MLX5_MKEY_MASK_LW | - MLX5_MKEY_MASK_RR | - MLX5_MKEY_MASK_RW | - MLX5_MKEY_MASK_SMALL_FENCE | - MLX5_MKEY_MASK_FREE | - MLX5_MKEY_MASK_BSF_EN; - - return cpu_to_be64(result); -} - -static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, - struct mlx5_ib_mr *mr, u8 flags, bool atomic) -{ - int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; - - memset(umr, 0, sizeof(*umr)); - - umr->flags = flags; - umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); - umr->mkey_mask = frwr_mkey_mask(atomic); -} - -static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr) -{ - memset(umr, 0, sizeof(*umr)); - umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); - umr->flags = MLX5_UMR_INLINE; -} - -static __be64 get_umr_enable_mr_mask(void) -{ - u64 result; - - result = MLX5_MKEY_MASK_KEY | - MLX5_MKEY_MASK_FREE; - - return cpu_to_be64(result); -} - -static __be64 get_umr_disable_mr_mask(void) -{ - u64 result; - - result = MLX5_MKEY_MASK_FREE; - - return cpu_to_be64(result); -} - -static __be64 get_umr_update_translation_mask(void) -{ - u64 result; - - result = MLX5_MKEY_MASK_LEN | - MLX5_MKEY_MASK_PAGE_SIZE | - MLX5_MKEY_MASK_START_ADDR; - - return cpu_to_be64(result); -} - -static __be64 get_umr_update_access_mask(int atomic) -{ - u64 result; - - result = MLX5_MKEY_MASK_LR | - MLX5_MKEY_MASK_LW | - MLX5_MKEY_MASK_RR | - MLX5_MKEY_MASK_RW; - - if (atomic) - result |= MLX5_MKEY_MASK_A; - - return cpu_to_be64(result); -} - -static __be64 get_umr_update_pd_mask(void) -{ - u64 result; - - result = MLX5_MKEY_MASK_PD; - - return cpu_to_be64(result); -} - -static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) -{ - if ((mask & MLX5_MKEY_MASK_PAGE_SIZE && - MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) || - (mask & MLX5_MKEY_MASK_A && - MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))) - return -EPERM; - return 0; -} - -static int set_reg_umr_segment(struct mlx5_ib_dev *dev, - struct mlx5_wqe_umr_ctrl_seg *umr, - const struct ib_send_wr *wr, int atomic) -{ - const struct mlx5_umr_wr *umrwr = umr_wr(wr); - - memset(umr, 0, sizeof(*umr)); - - if (!umrwr->ignore_free_state) { - if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE) - /* fail if free */ - umr->flags = MLX5_UMR_CHECK_FREE; - else - /* fail if not free */ - umr->flags = MLX5_UMR_CHECK_NOT_FREE; - } - - umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size)); - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) { - u64 offset = get_xlt_octo(umrwr->offset); - - umr->xlt_offset = cpu_to_be16(offset & 0xffff); - umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16); - umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; - } - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION) - umr->mkey_mask |= get_umr_update_translation_mask(); - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) { - umr->mkey_mask |= get_umr_update_access_mask(atomic); - umr->mkey_mask |= get_umr_update_pd_mask(); - } - if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR) - umr->mkey_mask |= get_umr_enable_mr_mask(); - if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) - umr->mkey_mask |= get_umr_disable_mr_mask(); - - if (!wr->num_sge) - umr->flags |= MLX5_UMR_INLINE; - - return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask)); -} - -static u8 get_umr_flags(int acc) -{ - return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | - (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | - (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | - MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN; -} - -static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, - struct mlx5_ib_mr *mr, - u32 key, int access) -{ - int ndescs = ALIGN(mr->ndescs + mr->meta_ndescs, 8) >> 1; - - memset(seg, 0, sizeof(*seg)); - - if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT) - seg->log2_page_size = ilog2(mr->ibmr.page_size); - else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) - /* KLMs take twice the size of MTTs */ - ndescs *= 2; - - seg->flags = get_umr_flags(access) | mr->access_mode; - seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00); - seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); - seg->start_addr = cpu_to_be64(mr->ibmr.iova); - seg->len = cpu_to_be64(mr->ibmr.length); - seg->xlt_oct_size = cpu_to_be32(ndescs); -} - -static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) -{ - memset(seg, 0, sizeof(*seg)); - seg->status = MLX5_MKEY_STATUS_FREE; -} - -static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, - const struct ib_send_wr *wr) -{ - const struct mlx5_umr_wr *umrwr = umr_wr(wr); - - memset(seg, 0, sizeof(*seg)); - if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) - seg->status = MLX5_MKEY_STATUS_FREE; - - seg->flags = convert_access(umrwr->access_flags); - if (umrwr->pd) - seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION && - !umrwr->length) - seg->flags_pd |= cpu_to_be32(MLX5_MKEY_LEN64); - - seg->start_addr = cpu_to_be64(umrwr->virt_addr); - seg->len = cpu_to_be64(umrwr->length); - seg->log2_page_size = umrwr->page_shift; - seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 | - mlx5_mkey_variant(umrwr->mkey)); -} - -static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, - struct mlx5_ib_mr *mr, - struct mlx5_ib_pd *pd) -{ - int bcount = mr->desc_size * (mr->ndescs + mr->meta_ndescs); - - dseg->addr = cpu_to_be64(mr->desc_map); - dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64)); - dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); -} - -static __be32 send_ieth(const struct ib_send_wr *wr) -{ - switch (wr->opcode) { - case IB_WR_SEND_WITH_IMM: - case IB_WR_RDMA_WRITE_WITH_IMM: - return wr->ex.imm_data; - - case IB_WR_SEND_WITH_INV: - return cpu_to_be32(wr->ex.invalidate_rkey); - - default: - return 0; - } -} - -static u8 calc_sig(void *wqe, int size) -{ - u8 *p = wqe; - u8 res = 0; - int i; - - for (i = 0; i < size; i++) - res ^= p[i]; - - return ~res; -} - -static u8 wq_sig(void *wqe) -{ - return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4); -} - -static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, - void **wqe, int *wqe_sz, void **cur_edge) -{ - struct mlx5_wqe_inline_seg *seg; - size_t offset; - int inl = 0; - int i; - - seg = *wqe; - *wqe += sizeof(*seg); - offset = sizeof(*seg); - - for (i = 0; i < wr->num_sge; i++) { - size_t len = wr->sg_list[i].length; - void *addr = (void *)(unsigned long)(wr->sg_list[i].addr); - - inl += len; - - if (unlikely(inl > qp->max_inline_data)) - return -ENOMEM; - - while (likely(len)) { - size_t leftlen; - size_t copysz; - - handle_post_send_edge(&qp->sq, wqe, - *wqe_sz + (offset >> 4), - cur_edge); - - leftlen = *cur_edge - *wqe; - copysz = min_t(size_t, leftlen, len); - - memcpy(*wqe, addr, copysz); - len -= copysz; - addr += copysz; - *wqe += copysz; - offset += copysz; - } - } - - seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG); - - *wqe_sz += ALIGN(inl + sizeof(seg->byte_count), 16) / 16; - - return 0; -} - -static u16 prot_field_size(enum ib_signature_type type) -{ - switch (type) { - case IB_SIG_TYPE_T10_DIF: - return MLX5_DIF_SIZE; - default: - return 0; - } -} - -static u8 bs_selector(int block_size) -{ - switch (block_size) { - case 512: return 0x1; - case 520: return 0x2; - case 4096: return 0x3; - case 4160: return 0x4; - case 1073741824: return 0x5; - default: return 0; - } -} - -static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain, - struct mlx5_bsf_inl *inl) -{ - /* Valid inline section and allow BSF refresh */ - inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID | - MLX5_BSF_REFRESH_DIF); - inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag); - inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag); - /* repeating block */ - inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK; - inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ? - MLX5_DIF_CRC : MLX5_DIF_IPCS; - - if (domain->sig.dif.ref_remap) - inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG; - - if (domain->sig.dif.app_escape) { - if (domain->sig.dif.ref_escape) - inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE; - else - inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE; - } - - inl->dif_app_bitmask_check = - cpu_to_be16(domain->sig.dif.apptag_check_mask); -} - -static int mlx5_set_bsf(struct ib_mr *sig_mr, - struct ib_sig_attrs *sig_attrs, - struct mlx5_bsf *bsf, u32 data_size) -{ - struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig; - struct mlx5_bsf_basic *basic = &bsf->basic; - struct ib_sig_domain *mem = &sig_attrs->mem; - struct ib_sig_domain *wire = &sig_attrs->wire; - - memset(bsf, 0, sizeof(*bsf)); - - /* Basic + Extended + Inline */ - basic->bsf_size_sbs = 1 << 7; - /* Input domain check byte mask */ - basic->check_byte_mask = sig_attrs->check_mask; - basic->raw_data_size = cpu_to_be32(data_size); - - /* Memory domain */ - switch (sig_attrs->mem.sig_type) { - case IB_SIG_TYPE_NONE: - break; - case IB_SIG_TYPE_T10_DIF: - basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); - basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx); - mlx5_fill_inl_bsf(mem, &bsf->m_inl); - break; - default: - return -EINVAL; - } - - /* Wire domain */ - switch (sig_attrs->wire.sig_type) { - case IB_SIG_TYPE_NONE: - break; - case IB_SIG_TYPE_T10_DIF: - if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval && - mem->sig_type == wire->sig_type) { - /* Same block structure */ - basic->bsf_size_sbs |= 1 << 4; - if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) - basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK; - if (mem->sig.dif.app_tag == wire->sig.dif.app_tag) - basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK; - if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag) - basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK; - } else - basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval); - - basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx); - mlx5_fill_inl_bsf(wire, &bsf->w_inl); - break; - default: - return -EINVAL; - } - - return 0; -} - -static int set_sig_data_segment(const struct ib_send_wr *send_wr, - struct ib_mr *sig_mr, - struct ib_sig_attrs *sig_attrs, - struct mlx5_ib_qp *qp, void **seg, int *size, - void **cur_edge) -{ - struct mlx5_bsf *bsf; - u32 data_len; - u32 data_key; - u64 data_va; - u32 prot_len = 0; - u32 prot_key = 0; - u64 prot_va = 0; - bool prot = false; - int ret; - int wqe_size; - struct mlx5_ib_mr *mr = to_mmr(sig_mr); - struct mlx5_ib_mr *pi_mr = mr->pi_mr; - - data_len = pi_mr->data_length; - data_key = pi_mr->ibmr.lkey; - data_va = pi_mr->data_iova; - if (pi_mr->meta_ndescs) { - prot_len = pi_mr->meta_length; - prot_key = pi_mr->ibmr.lkey; - prot_va = pi_mr->pi_iova; - prot = true; - } - - if (!prot || (data_key == prot_key && data_va == prot_va && - data_len == prot_len)) { - /** - * Source domain doesn't contain signature information - * or data and protection are interleaved in memory. - * So need construct: - * ------------------ - * | data_klm | - * ------------------ - * | BSF | - * ------------------ - **/ - struct mlx5_klm *data_klm = *seg; - - data_klm->bcount = cpu_to_be32(data_len); - data_klm->key = cpu_to_be32(data_key); - data_klm->va = cpu_to_be64(data_va); - wqe_size = ALIGN(sizeof(*data_klm), 64); - } else { - /** - * Source domain contains signature information - * So need construct a strided block format: - * --------------------------- - * | stride_block_ctrl | - * --------------------------- - * | data_klm | - * --------------------------- - * | prot_klm | - * --------------------------- - * | BSF | - * --------------------------- - **/ - struct mlx5_stride_block_ctrl_seg *sblock_ctrl; - struct mlx5_stride_block_entry *data_sentry; - struct mlx5_stride_block_entry *prot_sentry; - u16 block_size = sig_attrs->mem.sig.dif.pi_interval; - int prot_size; - - sblock_ctrl = *seg; - data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl); - prot_sentry = (void *)data_sentry + sizeof(*data_sentry); - - prot_size = prot_field_size(sig_attrs->mem.sig_type); - if (!prot_size) { - pr_err("Bad block size given: %u\n", block_size); - return -EINVAL; - } - sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size + - prot_size); - sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP); - sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size); - sblock_ctrl->num_entries = cpu_to_be16(2); - - data_sentry->bcount = cpu_to_be16(block_size); - data_sentry->key = cpu_to_be32(data_key); - data_sentry->va = cpu_to_be64(data_va); - data_sentry->stride = cpu_to_be16(block_size); - - prot_sentry->bcount = cpu_to_be16(prot_size); - prot_sentry->key = cpu_to_be32(prot_key); - prot_sentry->va = cpu_to_be64(prot_va); - prot_sentry->stride = cpu_to_be16(prot_size); - - wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) + - sizeof(*prot_sentry), 64); - } - - *seg += wqe_size; - *size += wqe_size / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - - bsf = *seg; - ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len); - if (ret) - return -EINVAL; - - *seg += sizeof(*bsf); - *size += sizeof(*bsf) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - - return 0; -} - -static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, - struct ib_mr *sig_mr, int access_flags, - u32 size, u32 length, u32 pdn) -{ - u32 sig_key = sig_mr->rkey; - u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; - - memset(seg, 0, sizeof(*seg)); - - seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS; - seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); - seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | - MLX5_MKEY_BSF_EN | pdn); - seg->len = cpu_to_be64(length); - seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size)); - seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); -} - -static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, - u32 size) -{ - memset(umr, 0, sizeof(*umr)); - - umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE; - umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); - umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE); - umr->mkey_mask = sig_mkey_mask(); -} - -static int set_pi_umr_wr(const struct ib_send_wr *send_wr, - struct mlx5_ib_qp *qp, void **seg, int *size, - void **cur_edge) -{ - const struct ib_reg_wr *wr = reg_wr(send_wr); - struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr); - struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr; - struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs; - u32 pdn = get_pd(qp)->pdn; - u32 xlt_size; - int region_len, ret; - - if (unlikely(send_wr->num_sge != 0) || - unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) || - unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) || - unlikely(!sig_mr->sig->sig_status_checked)) - return -EINVAL; - - /* length of the protected region, data + protection */ - region_len = pi_mr->ibmr.length; - - /** - * KLM octoword size - if protection was provided - * then we use strided block format (3 octowords), - * else we use single KLM (1 octoword) - **/ - if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE) - xlt_size = 0x30; - else - xlt_size = sizeof(struct mlx5_klm); - - set_sig_umr_segment(*seg, xlt_size); - *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); - *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - - set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len, - pdn); - *seg += sizeof(struct mlx5_mkey_seg); - *size += sizeof(struct mlx5_mkey_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - - ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size, - cur_edge); - if (ret) - return ret; - - sig_mr->sig->sig_status_checked = false; - return 0; -} - -static int set_psv_wr(struct ib_sig_domain *domain, - u32 psv_idx, void **seg, int *size) -{ - struct mlx5_seg_set_psv *psv_seg = *seg; - - memset(psv_seg, 0, sizeof(*psv_seg)); - psv_seg->psv_num = cpu_to_be32(psv_idx); - switch (domain->sig_type) { - case IB_SIG_TYPE_NONE: - break; - case IB_SIG_TYPE_T10_DIF: - psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 | - domain->sig.dif.app_tag); - psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag); - break; - default: - pr_err("Bad signature type (%d) is given.\n", - domain->sig_type); - return -EINVAL; - } - - *seg += sizeof(*psv_seg); - *size += sizeof(*psv_seg) / 16; - - return 0; -} - -static int set_reg_wr(struct mlx5_ib_qp *qp, - const struct ib_reg_wr *wr, - void **seg, int *size, void **cur_edge, - bool check_not_free) -{ - struct mlx5_ib_mr *mr = to_mmr(wr->mr); - struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); - struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device); - int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; - bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; - bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC; - u8 flags = 0; - - if (!mlx5_ib_can_use_umr(dev, atomic, wr->access)) { - mlx5_ib_warn(to_mdev(qp->ibqp.device), - "Fast update of %s for MR is disabled\n", - (MLX5_CAP_GEN(dev->mdev, - umr_modify_entity_size_disabled)) ? - "entity size" : - "atomic access"); - return -EINVAL; - } - - if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { - mlx5_ib_warn(to_mdev(qp->ibqp.device), - "Invalid IB_SEND_INLINE send flag\n"); - return -EINVAL; - } - - if (check_not_free) - flags |= MLX5_UMR_CHECK_NOT_FREE; - if (umr_inline) - flags |= MLX5_UMR_INLINE; - - set_reg_umr_seg(*seg, mr, flags, atomic); - *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); - *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - - set_reg_mkey_seg(*seg, mr, wr->key, wr->access); - *seg += sizeof(struct mlx5_mkey_seg); - *size += sizeof(struct mlx5_mkey_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - - if (umr_inline) { - memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs, - mr_list_size); - *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4); - } else { - set_reg_data_seg(*seg, mr, pd); - *seg += sizeof(struct mlx5_wqe_data_seg); - *size += (sizeof(struct mlx5_wqe_data_seg) / 16); - } - return 0; -} - -static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size, - void **cur_edge) -{ - set_linv_umr_seg(*seg); - *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); - *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - set_linv_mkey_seg(*seg); - *seg += sizeof(struct mlx5_mkey_seg); - *size += sizeof(struct mlx5_mkey_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); -} - -static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16) -{ - __be32 *p = NULL; - int i, j; - - pr_debug("dump WQE index %u:\n", idx); - for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) { - if ((i & 0xf) == 0) { - p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx); - pr_debug("WQBB at %p:\n", (void *)p); - j = 0; - idx = (idx + 1) & (qp->sq.wqe_cnt - 1); - } - pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]), - be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]), - be32_to_cpu(p[j + 3])); - } -} - -static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, - struct mlx5_wqe_ctrl_seg **ctrl, - const struct ib_send_wr *wr, unsigned int *idx, - int *size, void **cur_edge, int nreq, - bool send_signaled, bool solicited) -{ - if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) - return -ENOMEM; - - *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); - *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx); - *ctrl = *seg; - *(uint32_t *)(*seg + 8) = 0; - (*ctrl)->imm = send_ieth(wr); - (*ctrl)->fm_ce_se = qp->sq_signal_bits | - (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) | - (solicited ? MLX5_WQE_CTRL_SOLICITED : 0); - - *seg += sizeof(**ctrl); - *size = sizeof(**ctrl) / 16; - *cur_edge = qp->sq.cur_edge; - - return 0; -} - -static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, - struct mlx5_wqe_ctrl_seg **ctrl, - const struct ib_send_wr *wr, unsigned *idx, - int *size, void **cur_edge, int nreq) -{ - return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, - wr->send_flags & IB_SEND_SIGNALED, - wr->send_flags & IB_SEND_SOLICITED); -} - -static void finish_wqe(struct mlx5_ib_qp *qp, - struct mlx5_wqe_ctrl_seg *ctrl, - void *seg, u8 size, void *cur_edge, - unsigned int idx, u64 wr_id, int nreq, u8 fence, - u32 mlx5_opcode) -{ - u8 opmod = 0; - - ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | - mlx5_opcode | ((u32)opmod << 24)); - ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8)); - ctrl->fm_ce_se |= fence; - if (unlikely(qp->flags_en & MLX5_QP_FLAG_SIGNATURE)) - ctrl->signature = wq_sig(ctrl); - - qp->sq.wrid[idx] = wr_id; - qp->sq.w_list[idx].opcode = mlx5_opcode; - qp->sq.wqe_head[idx] = qp->sq.head + nreq; - qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); - qp->sq.w_list[idx].next = qp->sq.cur_post; - - /* We save the edge which was possibly updated during the WQE - * construction, into SQ's cache. - */ - seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB); - qp->sq.cur_edge = (unlikely(seg == cur_edge)) ? - get_sq_edge(&qp->sq, qp->sq.cur_post & - (qp->sq.wqe_cnt - 1)) : - cur_edge; -} - -static void handle_rdma_op(const struct ib_send_wr *wr, void **seg, int *size) -{ - set_raddr_seg(*seg, rdma_wr(wr)->remote_addr, rdma_wr(wr)->rkey); - *seg += sizeof(struct mlx5_wqe_raddr_seg); - *size += sizeof(struct mlx5_wqe_raddr_seg) / 16; -} - -static void handle_local_inv(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, - struct mlx5_wqe_ctrl_seg **ctrl, void **seg, - int *size, void **cur_edge, unsigned int idx) -{ - qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; - (*ctrl)->imm = cpu_to_be32(wr->ex.invalidate_rkey); - set_linv_wr(qp, seg, size, cur_edge); -} - -static int handle_reg_mr(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, - struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size, - void **cur_edge, unsigned int idx) -{ - qp->sq.wr_data[idx] = IB_WR_REG_MR; - (*ctrl)->imm = cpu_to_be32(reg_wr(wr)->key); - return set_reg_wr(qp, reg_wr(wr), seg, size, cur_edge, true); -} - -static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - const struct ib_send_wr *wr, - struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size, - void **cur_edge, unsigned int *idx, int nreq, - struct ib_sig_domain *domain, u32 psv_index, - u8 next_fence) -{ - int err; - - /* - * SET_PSV WQEs are not signaled and solicited on error. - */ - err = __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, - false, true); - if (unlikely(err)) { - mlx5_ib_warn(dev, "\n"); - err = -ENOMEM; - goto out; - } - err = set_psv_wr(domain, psv_index, seg, size); - if (unlikely(err)) { - mlx5_ib_warn(dev, "\n"); - goto out; - } - finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq, - next_fence, MLX5_OPCODE_SET_PSV); - -out: - return err; -} - -static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev, - struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, - struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size, - void **cur_edge, unsigned int *idx, int nreq, u8 fence, - u8 next_fence) -{ - struct mlx5_ib_mr *mr; - struct mlx5_ib_mr *pi_mr; - struct mlx5_ib_mr pa_pi_mr; - struct ib_sig_attrs *sig_attrs; - struct ib_reg_wr reg_pi_wr; - int err; - - qp->sq.wr_data[*idx] = IB_WR_REG_MR_INTEGRITY; - - mr = to_mmr(reg_wr(wr)->mr); - pi_mr = mr->pi_mr; - - if (pi_mr) { - memset(®_pi_wr, 0, - sizeof(struct ib_reg_wr)); - - reg_pi_wr.mr = &pi_mr->ibmr; - reg_pi_wr.access = reg_wr(wr)->access; - reg_pi_wr.key = pi_mr->ibmr.rkey; - - (*ctrl)->imm = cpu_to_be32(reg_pi_wr.key); - /* UMR for data + prot registration */ - err = set_reg_wr(qp, ®_pi_wr, seg, size, cur_edge, false); - if (unlikely(err)) - goto out; - - finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, - nreq, fence, MLX5_OPCODE_UMR); - - err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq); - if (unlikely(err)) { - mlx5_ib_warn(dev, "\n"); - err = -ENOMEM; - goto out; - } - } else { - memset(&pa_pi_mr, 0, sizeof(struct mlx5_ib_mr)); - /* No UMR, use local_dma_lkey */ - pa_pi_mr.ibmr.lkey = mr->ibmr.pd->local_dma_lkey; - pa_pi_mr.ndescs = mr->ndescs; - pa_pi_mr.data_length = mr->data_length; - pa_pi_mr.data_iova = mr->data_iova; - if (mr->meta_ndescs) { - pa_pi_mr.meta_ndescs = mr->meta_ndescs; - pa_pi_mr.meta_length = mr->meta_length; - pa_pi_mr.pi_iova = mr->pi_iova; - } - - pa_pi_mr.ibmr.length = mr->ibmr.length; - mr->pi_mr = &pa_pi_mr; - } - (*ctrl)->imm = cpu_to_be32(mr->ibmr.rkey); - /* UMR for sig MR */ - err = set_pi_umr_wr(wr, qp, seg, size, cur_edge); - if (unlikely(err)) { - mlx5_ib_warn(dev, "\n"); - goto out; - } - finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq, - fence, MLX5_OPCODE_UMR); - - sig_attrs = mr->ibmr.sig_attrs; - err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq, - &sig_attrs->mem, mr->sig->psv_memory.psv_idx, - next_fence); - if (unlikely(err)) - goto out; - - err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq, - &sig_attrs->wire, mr->sig->psv_wire.psv_idx, - next_fence); - if (unlikely(err)) - goto out; - - qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; - -out: - return err; -} - -static int handle_qpt_rc(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - const struct ib_send_wr *wr, - struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size, - void **cur_edge, unsigned int *idx, int nreq, u8 fence, - u8 next_fence, int *num_sge) -{ - int err = 0; - - switch (wr->opcode) { - case IB_WR_RDMA_READ: - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - handle_rdma_op(wr, seg, size); - break; - - case IB_WR_ATOMIC_CMP_AND_SWP: - case IB_WR_ATOMIC_FETCH_AND_ADD: - case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: - mlx5_ib_warn(dev, "Atomic operations are not supported yet\n"); - err = -EOPNOTSUPP; - goto out; - - case IB_WR_LOCAL_INV: - handle_local_inv(qp, wr, ctrl, seg, size, cur_edge, *idx); - *num_sge = 0; - break; - - case IB_WR_REG_MR: - err = handle_reg_mr(qp, wr, ctrl, seg, size, cur_edge, *idx); - if (unlikely(err)) - goto out; - *num_sge = 0; - break; - - case IB_WR_REG_MR_INTEGRITY: - err = handle_reg_mr_integrity(dev, qp, wr, ctrl, seg, size, - cur_edge, idx, nreq, fence, - next_fence); - if (unlikely(err)) - goto out; - *num_sge = 0; - break; - - default: - break; - } - -out: - return err; -} - -static void handle_qpt_uc(const struct ib_send_wr *wr, void **seg, int *size) -{ - switch (wr->opcode) { - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - handle_rdma_op(wr, seg, size); - break; - default: - break; - } -} - -static void handle_qpt_hw_gsi(struct mlx5_ib_qp *qp, - const struct ib_send_wr *wr, void **seg, - int *size, void **cur_edge) -{ - set_datagram_seg(*seg, wr); - *seg += sizeof(struct mlx5_wqe_datagram_seg); - *size += sizeof(struct mlx5_wqe_datagram_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); -} - -static void handle_qpt_ud(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, - void **seg, int *size, void **cur_edge) -{ - set_datagram_seg(*seg, wr); - *seg += sizeof(struct mlx5_wqe_datagram_seg); - *size += sizeof(struct mlx5_wqe_datagram_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - - /* handle qp that supports ud offload */ - if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) { - struct mlx5_wqe_eth_pad *pad; - - pad = *seg; - memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad)); - *seg += sizeof(struct mlx5_wqe_eth_pad); - *size += sizeof(struct mlx5_wqe_eth_pad) / 16; - set_eth_seg(wr, qp, seg, size, cur_edge); - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - } -} - -static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - const struct ib_send_wr *wr, - struct mlx5_wqe_ctrl_seg **ctrl, void **seg, - int *size, void **cur_edge, unsigned int idx) -{ - int err = 0; - - if (unlikely(wr->opcode != MLX5_IB_WR_UMR)) { - err = -EINVAL; - mlx5_ib_warn(dev, "bad opcode %d\n", wr->opcode); - goto out; - } - - qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; - (*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey); - err = set_reg_umr_segment(dev, *seg, wr, - !!(MLX5_CAP_GEN(dev->mdev, atomic))); - if (unlikely(err)) - goto out; - *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); - *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - set_reg_mkey_segment(*seg, wr); - *seg += sizeof(struct mlx5_mkey_seg); - *size += sizeof(struct mlx5_mkey_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); -out: - return err; -} - -static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr, bool drain) -{ - struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ - struct mlx5_ib_dev *dev = to_mdev(ibqp->device); - struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_ib_qp *qp; - struct mlx5_wqe_xrc_seg *xrc; - struct mlx5_bf *bf; - void *cur_edge; - int uninitialized_var(size); - unsigned long flags; - unsigned idx; - int err = 0; - int num_sge; - void *seg; - int nreq; - int i; - u8 next_fence = 0; - u8 fence; - - if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && - !drain)) { - *bad_wr = wr; - return -EIO; - } - - if (unlikely(ibqp->qp_type == IB_QPT_GSI)) - return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr); - - qp = to_mqp(ibqp); - bf = &qp->bf; - - spin_lock_irqsave(&qp->sq.lock, flags); - - for (nreq = 0; wr; nreq++, wr = wr->next) { - if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) { - mlx5_ib_warn(dev, "\n"); - err = -EINVAL; - *bad_wr = wr; - goto out; - } - - num_sge = wr->num_sge; - if (unlikely(num_sge > qp->sq.max_gs)) { - mlx5_ib_warn(dev, "\n"); - err = -EINVAL; - *bad_wr = wr; - goto out; - } - - err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge, - nreq); - if (err) { - mlx5_ib_warn(dev, "\n"); - err = -ENOMEM; - *bad_wr = wr; - goto out; - } - - if (wr->opcode == IB_WR_REG_MR || - wr->opcode == IB_WR_REG_MR_INTEGRITY) { - fence = dev->umr_fence; - next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; - } else { - if (wr->send_flags & IB_SEND_FENCE) { - if (qp->next_fence) - fence = MLX5_FENCE_MODE_SMALL_AND_FENCE; - else - fence = MLX5_FENCE_MODE_FENCE; - } else { - fence = qp->next_fence; - } - } - - switch (ibqp->qp_type) { - case IB_QPT_XRC_INI: - xrc = seg; - seg += sizeof(*xrc); - size += sizeof(*xrc) / 16; - /* fall through */ - case IB_QPT_RC: - err = handle_qpt_rc(dev, qp, wr, &ctrl, &seg, &size, - &cur_edge, &idx, nreq, fence, - next_fence, &num_sge); - if (unlikely(err)) { - *bad_wr = wr; - goto out; - } else if (wr->opcode == IB_WR_REG_MR_INTEGRITY) { - goto skip_psv; - } - break; - - case IB_QPT_UC: - handle_qpt_uc(wr, &seg, &size); - break; - case IB_QPT_SMI: - if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) { - mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n"); - err = -EPERM; - *bad_wr = wr; - goto out; - } - /* fall through */ - case MLX5_IB_QPT_HW_GSI: - handle_qpt_hw_gsi(qp, wr, &seg, &size, &cur_edge); - break; - case IB_QPT_UD: - handle_qpt_ud(qp, wr, &seg, &size, &cur_edge); - break; - case MLX5_IB_QPT_REG_UMR: - err = handle_qpt_reg_umr(dev, qp, wr, &ctrl, &seg, - &size, &cur_edge, idx); - if (unlikely(err)) - goto out; - break; - - default: - break; - } - - if (wr->send_flags & IB_SEND_INLINE && num_sge) { - err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge); - if (unlikely(err)) { - mlx5_ib_warn(dev, "\n"); - *bad_wr = wr; - goto out; - } - } else { - for (i = 0; i < num_sge; i++) { - handle_post_send_edge(&qp->sq, &seg, size, - &cur_edge); - if (likely(wr->sg_list[i].length)) { - set_data_ptr_seg - ((struct mlx5_wqe_data_seg *)seg, - wr->sg_list + i); - size += sizeof(struct mlx5_wqe_data_seg) / 16; - seg += sizeof(struct mlx5_wqe_data_seg); - } - } - } - - qp->next_fence = next_fence; - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq, - fence, mlx5_ib_opcode[wr->opcode]); -skip_psv: - if (0) - dump_wqe(qp, idx, size); - } - -out: - if (likely(nreq)) { - qp->sq.head += nreq; - - /* Make sure that descriptors are written before - * updating doorbell record and ringing the doorbell - */ - wmb(); - - qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); - - /* Make sure doorbell record is visible to the HCA before - * we hit doorbell */ - wmb(); - - mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset); - /* Make sure doorbells don't leak out of SQ spinlock - * and reach the HCA out of order. - */ - bf->offset ^= bf->buf_size; - } - - spin_unlock_irqrestore(&qp->sq.lock, flags); - - return err; -} - -int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr) -{ - return _mlx5_ib_post_send(ibqp, wr, bad_wr, false); -} - -static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size) -{ - sig->signature = calc_sig(sig, size); -} - -static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr, bool drain) -{ - struct mlx5_ib_qp *qp = to_mqp(ibqp); - struct mlx5_wqe_data_seg *scat; - struct mlx5_rwqe_sig *sig; - struct mlx5_ib_dev *dev = to_mdev(ibqp->device); - struct mlx5_core_dev *mdev = dev->mdev; - unsigned long flags; - int err = 0; - int nreq; - int ind; - int i; - - if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && - !drain)) { - *bad_wr = wr; - return -EIO; - } - - if (unlikely(ibqp->qp_type == IB_QPT_GSI)) - return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr); - - spin_lock_irqsave(&qp->rq.lock, flags); - - ind = qp->rq.head & (qp->rq.wqe_cnt - 1); - - for (nreq = 0; wr; nreq++, wr = wr->next) { - if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { - err = -ENOMEM; - *bad_wr = wr; - goto out; - } - - if (unlikely(wr->num_sge > qp->rq.max_gs)) { - err = -EINVAL; - *bad_wr = wr; - goto out; - } - - scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind); - if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) - scat++; - - for (i = 0; i < wr->num_sge; i++) - set_data_ptr_seg(scat + i, wr->sg_list + i); - - if (i < qp->rq.max_gs) { - scat[i].byte_count = 0; - scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY); - scat[i].addr = 0; - } - - if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) { - sig = (struct mlx5_rwqe_sig *)scat; - set_sig_seg(sig, (qp->rq.max_gs + 1) << 2); - } - - qp->rq.wrid[ind] = wr->wr_id; - - ind = (ind + 1) & (qp->rq.wqe_cnt - 1); - } - -out: - if (likely(nreq)) { - qp->rq.head += nreq; - - /* Make sure that descriptors are written before - * doorbell record. - */ - wmb(); - - *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff); - } - - spin_unlock_irqrestore(&qp->rq.lock, flags); - - return err; -} - -int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr) -{ - return _mlx5_ib_post_recv(ibqp, wr, bad_wr, false); -} - static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state) { switch (mlx5_state) { @@ -6808,7 +5266,7 @@ void mlx5_ib_drain_sq(struct ib_qp *qp) sdrain.cqe.done = mlx5_ib_drain_qp_done; init_completion(&sdrain.done); - ret = _mlx5_ib_post_send(qp, &swr.wr, &bad_swr, true); + ret = mlx5_ib_post_send_drain(qp, &swr.wr, &bad_swr); if (ret) { WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); return; @@ -6838,7 +5296,7 @@ void mlx5_ib_drain_rq(struct ib_qp *qp) rdrain.cqe.done = mlx5_ib_drain_qp_done; init_completion(&rdrain.done); - ret = _mlx5_ib_post_recv(qp, &rwr, &bad_rwr, true); + ret = mlx5_ib_post_recv_drain(qp, &rwr, &bad_rwr); if (ret) { WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); return; diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c new file mode 100644 index 000000000000..2c6df1c43b55 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -0,0 +1,1504 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. + */ + +#include +#include +#include +#include "wr.h" + +static const u32 mlx5_ib_opcode[] = { + [IB_WR_SEND] = MLX5_OPCODE_SEND, + [IB_WR_LSO] = MLX5_OPCODE_LSO, + [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM, + [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM, + [IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS, + [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA, + [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL, + [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR, + [IB_WR_REG_MR] = MLX5_OPCODE_UMR, + [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS, + [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA, + [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, +}; + +/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the + * next nearby edge and get new address translation for current WQE position. + * @sq - SQ buffer. + * @seg: Current WQE position (16B aligned). + * @wqe_sz: Total current WQE size [16B]. + * @cur_edge: Updated current edge. + */ +static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, + u32 wqe_sz, void **cur_edge) +{ + u32 idx; + + if (likely(*seg != *cur_edge)) + return; + + idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1); + *cur_edge = get_sq_edge(sq, idx); + + *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx); +} + +/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's + * pointers. At the end @seg is aligned to 16B regardless the copied size. + * @sq - SQ buffer. + * @cur_edge: Updated current edge. + * @seg: Current WQE position (16B aligned). + * @wqe_sz: Total current WQE size [16B]. + * @src: Pointer to copy from. + * @n: Number of bytes to copy. + */ +static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge, + void **seg, u32 *wqe_sz, const void *src, + size_t n) +{ + while (likely(n)) { + size_t leftlen = *cur_edge - *seg; + size_t copysz = min_t(size_t, leftlen, n); + size_t stride; + + memcpy(*seg, src, copysz); + + n -= copysz; + src += copysz; + stride = !n ? ALIGN(copysz, 16) : copysz; + *seg += stride; + *wqe_sz += stride >> 4; + handle_post_send_edge(sq, seg, *wqe_sz, cur_edge); + } +} + +static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, + struct ib_cq *ib_cq) +{ + struct mlx5_ib_cq *cq; + unsigned int cur; + + cur = wq->head - wq->tail; + if (likely(cur + nreq < wq->max_post)) + return 0; + + cq = to_mcq(ib_cq); + spin_lock(&cq->lock); + cur = wq->head - wq->tail; + spin_unlock(&cq->lock); + + return cur + nreq >= wq->max_post; +} + +static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, + u64 remote_addr, u32 rkey) +{ + rseg->raddr = cpu_to_be64(remote_addr); + rseg->rkey = cpu_to_be32(rkey); + rseg->reserved = 0; +} + +static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + void **seg, int *size, void **cur_edge) +{ + struct mlx5_wqe_eth_seg *eseg = *seg; + + memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg)); + + if (wr->send_flags & IB_SEND_IP_CSUM) + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | + MLX5_ETH_WQE_L4_CSUM; + + if (wr->opcode == IB_WR_LSO) { + struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr); + size_t left, copysz; + void *pdata = ud_wr->header; + size_t stride; + + left = ud_wr->hlen; + eseg->mss = cpu_to_be16(ud_wr->mss); + eseg->inline_hdr.sz = cpu_to_be16(left); + + /* memcpy_send_wqe should get a 16B align address. Hence, we + * first copy up to the current edge and then, if needed, + * continue to memcpy_send_wqe. + */ + copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start, + left); + memcpy(eseg->inline_hdr.start, pdata, copysz); + stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) - + sizeof(eseg->inline_hdr.start) + copysz, 16); + *size += stride / 16; + *seg += stride; + + if (copysz < left) { + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + left -= copysz; + pdata += copysz; + memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata, + left); + } + + return; + } + + *seg += sizeof(struct mlx5_wqe_eth_seg); + *size += sizeof(struct mlx5_wqe_eth_seg) / 16; +} + +static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, + const struct ib_send_wr *wr) +{ + memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av)); + dseg->av.dqp_dct = + cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV); + dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey); +} + +static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg) +{ + dseg->byte_count = cpu_to_be32(sg->length); + dseg->lkey = cpu_to_be32(sg->lkey); + dseg->addr = cpu_to_be64(sg->addr); +} + +static u64 get_xlt_octo(u64 bytes) +{ + return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) / + MLX5_IB_UMR_OCTOWORD; +} + +static __be64 frwr_mkey_mask(bool atomic) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_EN_RINVAL | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_SMALL_FENCE | + MLX5_MKEY_MASK_FREE; + + if (atomic) + result |= MLX5_MKEY_MASK_A; + + return cpu_to_be64(result); +} + +static __be64 sig_mkey_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_EN_SIGERR | + MLX5_MKEY_MASK_EN_RINVAL | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_SMALL_FENCE | + MLX5_MKEY_MASK_FREE | + MLX5_MKEY_MASK_BSF_EN; + + return cpu_to_be64(result); +} + +static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, + struct mlx5_ib_mr *mr, u8 flags, bool atomic) +{ + int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; + + memset(umr, 0, sizeof(*umr)); + + umr->flags = flags; + umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); + umr->mkey_mask = frwr_mkey_mask(atomic); +} + +static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr) +{ + memset(umr, 0, sizeof(*umr)); + umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); + umr->flags = MLX5_UMR_INLINE; +} + +static __be64 get_umr_enable_mr_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static __be64 get_umr_disable_mr_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_translation_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_access_mask(int atomic) +{ + u64 result; + + result = MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW; + + if (atomic) + result |= MLX5_MKEY_MASK_A; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_pd_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_PD; + + return cpu_to_be64(result); +} + +static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) +{ + if ((mask & MLX5_MKEY_MASK_PAGE_SIZE && + MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) || + (mask & MLX5_MKEY_MASK_A && + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))) + return -EPERM; + return 0; +} + +static int set_reg_umr_segment(struct mlx5_ib_dev *dev, + struct mlx5_wqe_umr_ctrl_seg *umr, + const struct ib_send_wr *wr, int atomic) +{ + const struct mlx5_umr_wr *umrwr = umr_wr(wr); + + memset(umr, 0, sizeof(*umr)); + + if (!umrwr->ignore_free_state) { + if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE) + /* fail if free */ + umr->flags = MLX5_UMR_CHECK_FREE; + else + /* fail if not free */ + umr->flags = MLX5_UMR_CHECK_NOT_FREE; + } + + umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size)); + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) { + u64 offset = get_xlt_octo(umrwr->offset); + + umr->xlt_offset = cpu_to_be16(offset & 0xffff); + umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16); + umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; + } + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION) + umr->mkey_mask |= get_umr_update_translation_mask(); + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) { + umr->mkey_mask |= get_umr_update_access_mask(atomic); + umr->mkey_mask |= get_umr_update_pd_mask(); + } + if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR) + umr->mkey_mask |= get_umr_enable_mr_mask(); + if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) + umr->mkey_mask |= get_umr_disable_mr_mask(); + + if (!wr->num_sge) + umr->flags |= MLX5_UMR_INLINE; + + return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask)); +} + +static u8 get_umr_flags(int acc) +{ + return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | + MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN; +} + +static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, + struct mlx5_ib_mr *mr, + u32 key, int access) +{ + int ndescs = ALIGN(mr->ndescs + mr->meta_ndescs, 8) >> 1; + + memset(seg, 0, sizeof(*seg)); + + if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT) + seg->log2_page_size = ilog2(mr->ibmr.page_size); + else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) + /* KLMs take twice the size of MTTs */ + ndescs *= 2; + + seg->flags = get_umr_flags(access) | mr->access_mode; + seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00); + seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); + seg->start_addr = cpu_to_be64(mr->ibmr.iova); + seg->len = cpu_to_be64(mr->ibmr.length); + seg->xlt_oct_size = cpu_to_be32(ndescs); +} + +static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) +{ + memset(seg, 0, sizeof(*seg)); + seg->status = MLX5_MKEY_STATUS_FREE; +} + +static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, + const struct ib_send_wr *wr) +{ + const struct mlx5_umr_wr *umrwr = umr_wr(wr); + + memset(seg, 0, sizeof(*seg)); + if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) + seg->status = MLX5_MKEY_STATUS_FREE; + + seg->flags = convert_access(umrwr->access_flags); + if (umrwr->pd) + seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION && + !umrwr->length) + seg->flags_pd |= cpu_to_be32(MLX5_MKEY_LEN64); + + seg->start_addr = cpu_to_be64(umrwr->virt_addr); + seg->len = cpu_to_be64(umrwr->length); + seg->log2_page_size = umrwr->page_shift; + seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 | + mlx5_mkey_variant(umrwr->mkey)); +} + +static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, + struct mlx5_ib_mr *mr, + struct mlx5_ib_pd *pd) +{ + int bcount = mr->desc_size * (mr->ndescs + mr->meta_ndescs); + + dseg->addr = cpu_to_be64(mr->desc_map); + dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64)); + dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); +} + +static __be32 send_ieth(const struct ib_send_wr *wr) +{ + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + case IB_WR_RDMA_WRITE_WITH_IMM: + return wr->ex.imm_data; + + case IB_WR_SEND_WITH_INV: + return cpu_to_be32(wr->ex.invalidate_rkey); + + default: + return 0; + } +} + +static u8 calc_sig(void *wqe, int size) +{ + u8 *p = wqe; + u8 res = 0; + int i; + + for (i = 0; i < size; i++) + res ^= p[i]; + + return ~res; +} + +static u8 wq_sig(void *wqe) +{ + return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4); +} + +static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, + void **wqe, int *wqe_sz, void **cur_edge) +{ + struct mlx5_wqe_inline_seg *seg; + size_t offset; + int inl = 0; + int i; + + seg = *wqe; + *wqe += sizeof(*seg); + offset = sizeof(*seg); + + for (i = 0; i < wr->num_sge; i++) { + size_t len = wr->sg_list[i].length; + void *addr = (void *)(unsigned long)(wr->sg_list[i].addr); + + inl += len; + + if (unlikely(inl > qp->max_inline_data)) + return -ENOMEM; + + while (likely(len)) { + size_t leftlen; + size_t copysz; + + handle_post_send_edge(&qp->sq, wqe, + *wqe_sz + (offset >> 4), + cur_edge); + + leftlen = *cur_edge - *wqe; + copysz = min_t(size_t, leftlen, len); + + memcpy(*wqe, addr, copysz); + len -= copysz; + addr += copysz; + *wqe += copysz; + offset += copysz; + } + } + + seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG); + + *wqe_sz += ALIGN(inl + sizeof(seg->byte_count), 16) / 16; + + return 0; +} + +static u16 prot_field_size(enum ib_signature_type type) +{ + switch (type) { + case IB_SIG_TYPE_T10_DIF: + return MLX5_DIF_SIZE; + default: + return 0; + } +} + +static u8 bs_selector(int block_size) +{ + switch (block_size) { + case 512: return 0x1; + case 520: return 0x2; + case 4096: return 0x3; + case 4160: return 0x4; + case 1073741824: return 0x5; + default: return 0; + } +} + +static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain, + struct mlx5_bsf_inl *inl) +{ + /* Valid inline section and allow BSF refresh */ + inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID | + MLX5_BSF_REFRESH_DIF); + inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag); + inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag); + /* repeating block */ + inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK; + inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ? + MLX5_DIF_CRC : MLX5_DIF_IPCS; + + if (domain->sig.dif.ref_remap) + inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG; + + if (domain->sig.dif.app_escape) { + if (domain->sig.dif.ref_escape) + inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE; + else + inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE; + } + + inl->dif_app_bitmask_check = + cpu_to_be16(domain->sig.dif.apptag_check_mask); +} + +static int mlx5_set_bsf(struct ib_mr *sig_mr, + struct ib_sig_attrs *sig_attrs, + struct mlx5_bsf *bsf, u32 data_size) +{ + struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig; + struct mlx5_bsf_basic *basic = &bsf->basic; + struct ib_sig_domain *mem = &sig_attrs->mem; + struct ib_sig_domain *wire = &sig_attrs->wire; + + memset(bsf, 0, sizeof(*bsf)); + + /* Basic + Extended + Inline */ + basic->bsf_size_sbs = 1 << 7; + /* Input domain check byte mask */ + basic->check_byte_mask = sig_attrs->check_mask; + basic->raw_data_size = cpu_to_be32(data_size); + + /* Memory domain */ + switch (sig_attrs->mem.sig_type) { + case IB_SIG_TYPE_NONE: + break; + case IB_SIG_TYPE_T10_DIF: + basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); + basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx); + mlx5_fill_inl_bsf(mem, &bsf->m_inl); + break; + default: + return -EINVAL; + } + + /* Wire domain */ + switch (sig_attrs->wire.sig_type) { + case IB_SIG_TYPE_NONE: + break; + case IB_SIG_TYPE_T10_DIF: + if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval && + mem->sig_type == wire->sig_type) { + /* Same block structure */ + basic->bsf_size_sbs |= 1 << 4; + if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) + basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK; + if (mem->sig.dif.app_tag == wire->sig.dif.app_tag) + basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK; + if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag) + basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK; + } else + basic->wire.bs_selector = + bs_selector(wire->sig.dif.pi_interval); + + basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx); + mlx5_fill_inl_bsf(wire, &bsf->w_inl); + break; + default: + return -EINVAL; + } + + return 0; +} + + +static int set_sig_data_segment(const struct ib_send_wr *send_wr, + struct ib_mr *sig_mr, + struct ib_sig_attrs *sig_attrs, + struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) +{ + struct mlx5_bsf *bsf; + u32 data_len; + u32 data_key; + u64 data_va; + u32 prot_len = 0; + u32 prot_key = 0; + u64 prot_va = 0; + bool prot = false; + int ret; + int wqe_size; + struct mlx5_ib_mr *mr = to_mmr(sig_mr); + struct mlx5_ib_mr *pi_mr = mr->pi_mr; + + data_len = pi_mr->data_length; + data_key = pi_mr->ibmr.lkey; + data_va = pi_mr->data_iova; + if (pi_mr->meta_ndescs) { + prot_len = pi_mr->meta_length; + prot_key = pi_mr->ibmr.lkey; + prot_va = pi_mr->pi_iova; + prot = true; + } + + if (!prot || (data_key == prot_key && data_va == prot_va && + data_len == prot_len)) { + /** + * Source domain doesn't contain signature information + * or data and protection are interleaved in memory. + * So need construct: + * ------------------ + * | data_klm | + * ------------------ + * | BSF | + * ------------------ + **/ + struct mlx5_klm *data_klm = *seg; + + data_klm->bcount = cpu_to_be32(data_len); + data_klm->key = cpu_to_be32(data_key); + data_klm->va = cpu_to_be64(data_va); + wqe_size = ALIGN(sizeof(*data_klm), 64); + } else { + /** + * Source domain contains signature information + * So need construct a strided block format: + * --------------------------- + * | stride_block_ctrl | + * --------------------------- + * | data_klm | + * --------------------------- + * | prot_klm | + * --------------------------- + * | BSF | + * --------------------------- + **/ + struct mlx5_stride_block_ctrl_seg *sblock_ctrl; + struct mlx5_stride_block_entry *data_sentry; + struct mlx5_stride_block_entry *prot_sentry; + u16 block_size = sig_attrs->mem.sig.dif.pi_interval; + int prot_size; + + sblock_ctrl = *seg; + data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl); + prot_sentry = (void *)data_sentry + sizeof(*data_sentry); + + prot_size = prot_field_size(sig_attrs->mem.sig_type); + if (!prot_size) { + pr_err("Bad block size given: %u\n", block_size); + return -EINVAL; + } + sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size + + prot_size); + sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP); + sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size); + sblock_ctrl->num_entries = cpu_to_be16(2); + + data_sentry->bcount = cpu_to_be16(block_size); + data_sentry->key = cpu_to_be32(data_key); + data_sentry->va = cpu_to_be64(data_va); + data_sentry->stride = cpu_to_be16(block_size); + + prot_sentry->bcount = cpu_to_be16(prot_size); + prot_sentry->key = cpu_to_be32(prot_key); + prot_sentry->va = cpu_to_be64(prot_va); + prot_sentry->stride = cpu_to_be16(prot_size); + + wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) + + sizeof(*prot_sentry), 64); + } + + *seg += wqe_size; + *size += wqe_size / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + + bsf = *seg; + ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len); + if (ret) + return -EINVAL; + + *seg += sizeof(*bsf); + *size += sizeof(*bsf) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + + return 0; +} + +static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, + struct ib_mr *sig_mr, int access_flags, + u32 size, u32 length, u32 pdn) +{ + u32 sig_key = sig_mr->rkey; + u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; + + memset(seg, 0, sizeof(*seg)); + + seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS; + seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); + seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | + MLX5_MKEY_BSF_EN | pdn); + seg->len = cpu_to_be64(length); + seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size)); + seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); +} + +static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, + u32 size) +{ + memset(umr, 0, sizeof(*umr)); + + umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE; + umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); + umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE); + umr->mkey_mask = sig_mkey_mask(); +} + +static int set_pi_umr_wr(const struct ib_send_wr *send_wr, + struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) +{ + const struct ib_reg_wr *wr = reg_wr(send_wr); + struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr); + struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr; + struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs; + u32 pdn = to_mpd(qp->ibqp.pd)->pdn; + u32 xlt_size; + int region_len, ret; + + if (unlikely(send_wr->num_sge != 0) || + unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) || + unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) || + unlikely(!sig_mr->sig->sig_status_checked)) + return -EINVAL; + + /* length of the protected region, data + protection */ + region_len = pi_mr->ibmr.length; + + /** + * KLM octoword size - if protection was provided + * then we use strided block format (3 octowords), + * else we use single KLM (1 octoword) + **/ + if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE) + xlt_size = 0x30; + else + xlt_size = sizeof(struct mlx5_klm); + + set_sig_umr_segment(*seg, xlt_size); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + + set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len, + pdn); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + + ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size, + cur_edge); + if (ret) + return ret; + + sig_mr->sig->sig_status_checked = false; + return 0; +} + +static int set_psv_wr(struct ib_sig_domain *domain, + u32 psv_idx, void **seg, int *size) +{ + struct mlx5_seg_set_psv *psv_seg = *seg; + + memset(psv_seg, 0, sizeof(*psv_seg)); + psv_seg->psv_num = cpu_to_be32(psv_idx); + switch (domain->sig_type) { + case IB_SIG_TYPE_NONE: + break; + case IB_SIG_TYPE_T10_DIF: + psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 | + domain->sig.dif.app_tag); + psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag); + break; + default: + pr_err("Bad signature type (%d) is given.\n", + domain->sig_type); + return -EINVAL; + } + + *seg += sizeof(*psv_seg); + *size += sizeof(*psv_seg) / 16; + + return 0; +} + +static int set_reg_wr(struct mlx5_ib_qp *qp, + const struct ib_reg_wr *wr, + void **seg, int *size, void **cur_edge, + bool check_not_free) +{ + struct mlx5_ib_mr *mr = to_mmr(wr->mr); + struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); + struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device); + int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; + bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; + bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC; + u8 flags = 0; + + if (!mlx5_ib_can_use_umr(dev, atomic, wr->access)) { + mlx5_ib_warn(to_mdev(qp->ibqp.device), + "Fast update of %s for MR is disabled\n", + (MLX5_CAP_GEN(dev->mdev, + umr_modify_entity_size_disabled)) ? + "entity size" : + "atomic access"); + return -EINVAL; + } + + if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { + mlx5_ib_warn(to_mdev(qp->ibqp.device), + "Invalid IB_SEND_INLINE send flag\n"); + return -EINVAL; + } + + if (check_not_free) + flags |= MLX5_UMR_CHECK_NOT_FREE; + if (umr_inline) + flags |= MLX5_UMR_INLINE; + + set_reg_umr_seg(*seg, mr, flags, atomic); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + + set_reg_mkey_seg(*seg, mr, wr->key, wr->access); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + + if (umr_inline) { + memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs, + mr_list_size); + *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4); + } else { + set_reg_data_seg(*seg, mr, pd); + *seg += sizeof(struct mlx5_wqe_data_seg); + *size += (sizeof(struct mlx5_wqe_data_seg) / 16); + } + return 0; +} + +static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) +{ + set_linv_umr_seg(*seg); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + set_linv_mkey_seg(*seg); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); +} + +static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16) +{ + __be32 *p = NULL; + int i, j; + + pr_debug("dump WQE index %u:\n", idx); + for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) { + if ((i & 0xf) == 0) { + p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx); + pr_debug("WQBB at %p:\n", (void *)p); + j = 0; + idx = (idx + 1) & (qp->sq.wqe_cnt - 1); + } + pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]), + be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]), + be32_to_cpu(p[j + 3])); + } +} + +static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, + struct mlx5_wqe_ctrl_seg **ctrl, + const struct ib_send_wr *wr, unsigned int *idx, + int *size, void **cur_edge, int nreq, + bool send_signaled, bool solicited) +{ + if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) + return -ENOMEM; + + *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); + *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx); + *ctrl = *seg; + *(uint32_t *)(*seg + 8) = 0; + (*ctrl)->imm = send_ieth(wr); + (*ctrl)->fm_ce_se = qp->sq_signal_bits | + (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) | + (solicited ? MLX5_WQE_CTRL_SOLICITED : 0); + + *seg += sizeof(**ctrl); + *size = sizeof(**ctrl) / 16; + *cur_edge = qp->sq.cur_edge; + + return 0; +} + +static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, + struct mlx5_wqe_ctrl_seg **ctrl, + const struct ib_send_wr *wr, unsigned int *idx, int *size, + void **cur_edge, int nreq) +{ + return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, + wr->send_flags & IB_SEND_SIGNALED, + wr->send_flags & IB_SEND_SOLICITED); +} + +static void finish_wqe(struct mlx5_ib_qp *qp, + struct mlx5_wqe_ctrl_seg *ctrl, + void *seg, u8 size, void *cur_edge, + unsigned int idx, u64 wr_id, int nreq, u8 fence, + u32 mlx5_opcode) +{ + u8 opmod = 0; + + ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | + mlx5_opcode | ((u32)opmod << 24)); + ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8)); + ctrl->fm_ce_se |= fence; + if (unlikely(qp->flags_en & MLX5_QP_FLAG_SIGNATURE)) + ctrl->signature = wq_sig(ctrl); + + qp->sq.wrid[idx] = wr_id; + qp->sq.w_list[idx].opcode = mlx5_opcode; + qp->sq.wqe_head[idx] = qp->sq.head + nreq; + qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); + qp->sq.w_list[idx].next = qp->sq.cur_post; + + /* We save the edge which was possibly updated during the WQE + * construction, into SQ's cache. + */ + seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB); + qp->sq.cur_edge = (unlikely(seg == cur_edge)) ? + get_sq_edge(&qp->sq, qp->sq.cur_post & + (qp->sq.wqe_cnt - 1)) : + cur_edge; +} + +static void handle_rdma_op(const struct ib_send_wr *wr, void **seg, int *size) +{ + set_raddr_seg(*seg, rdma_wr(wr)->remote_addr, rdma_wr(wr)->rkey); + *seg += sizeof(struct mlx5_wqe_raddr_seg); + *size += sizeof(struct mlx5_wqe_raddr_seg) / 16; +} + +static void handle_local_inv(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, + struct mlx5_wqe_ctrl_seg **ctrl, void **seg, + int *size, void **cur_edge, unsigned int idx) +{ + qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; + (*ctrl)->imm = cpu_to_be32(wr->ex.invalidate_rkey); + set_linv_wr(qp, seg, size, cur_edge); +} + +static int handle_reg_mr(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, + struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size, + void **cur_edge, unsigned int idx) +{ + qp->sq.wr_data[idx] = IB_WR_REG_MR; + (*ctrl)->imm = cpu_to_be32(reg_wr(wr)->key); + return set_reg_wr(qp, reg_wr(wr), seg, size, cur_edge, true); +} + +static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + const struct ib_send_wr *wr, + struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size, + void **cur_edge, unsigned int *idx, int nreq, + struct ib_sig_domain *domain, u32 psv_index, + u8 next_fence) +{ + int err; + + /* + * SET_PSV WQEs are not signaled and solicited on error. + */ + err = __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, + false, true); + if (unlikely(err)) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + goto out; + } + err = set_psv_wr(domain, psv_index, seg, size); + if (unlikely(err)) { + mlx5_ib_warn(dev, "\n"); + goto out; + } + finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq, + next_fence, MLX5_OPCODE_SET_PSV); + +out: + return err; +} + +static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, + const struct ib_send_wr *wr, + struct mlx5_wqe_ctrl_seg **ctrl, void **seg, + int *size, void **cur_edge, + unsigned int *idx, int nreq, u8 fence, + u8 next_fence) +{ + struct mlx5_ib_mr *mr; + struct mlx5_ib_mr *pi_mr; + struct mlx5_ib_mr pa_pi_mr; + struct ib_sig_attrs *sig_attrs; + struct ib_reg_wr reg_pi_wr; + int err; + + qp->sq.wr_data[*idx] = IB_WR_REG_MR_INTEGRITY; + + mr = to_mmr(reg_wr(wr)->mr); + pi_mr = mr->pi_mr; + + if (pi_mr) { + memset(®_pi_wr, 0, + sizeof(struct ib_reg_wr)); + + reg_pi_wr.mr = &pi_mr->ibmr; + reg_pi_wr.access = reg_wr(wr)->access; + reg_pi_wr.key = pi_mr->ibmr.rkey; + + (*ctrl)->imm = cpu_to_be32(reg_pi_wr.key); + /* UMR for data + prot registration */ + err = set_reg_wr(qp, ®_pi_wr, seg, size, cur_edge, false); + if (unlikely(err)) + goto out; + + finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, + nreq, fence, MLX5_OPCODE_UMR); + + err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq); + if (unlikely(err)) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + goto out; + } + } else { + memset(&pa_pi_mr, 0, sizeof(struct mlx5_ib_mr)); + /* No UMR, use local_dma_lkey */ + pa_pi_mr.ibmr.lkey = mr->ibmr.pd->local_dma_lkey; + pa_pi_mr.ndescs = mr->ndescs; + pa_pi_mr.data_length = mr->data_length; + pa_pi_mr.data_iova = mr->data_iova; + if (mr->meta_ndescs) { + pa_pi_mr.meta_ndescs = mr->meta_ndescs; + pa_pi_mr.meta_length = mr->meta_length; + pa_pi_mr.pi_iova = mr->pi_iova; + } + + pa_pi_mr.ibmr.length = mr->ibmr.length; + mr->pi_mr = &pa_pi_mr; + } + (*ctrl)->imm = cpu_to_be32(mr->ibmr.rkey); + /* UMR for sig MR */ + err = set_pi_umr_wr(wr, qp, seg, size, cur_edge); + if (unlikely(err)) { + mlx5_ib_warn(dev, "\n"); + goto out; + } + finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq, + fence, MLX5_OPCODE_UMR); + + sig_attrs = mr->ibmr.sig_attrs; + err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq, + &sig_attrs->mem, mr->sig->psv_memory.psv_idx, + next_fence); + if (unlikely(err)) + goto out; + + err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq, + &sig_attrs->wire, mr->sig->psv_wire.psv_idx, + next_fence); + if (unlikely(err)) + goto out; + + qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + +out: + return err; +} + +static int handle_qpt_rc(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + const struct ib_send_wr *wr, + struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size, + void **cur_edge, unsigned int *idx, int nreq, u8 fence, + u8 next_fence, int *num_sge) +{ + int err = 0; + + switch (wr->opcode) { + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + handle_rdma_op(wr, seg, size); + break; + + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: + mlx5_ib_warn(dev, "Atomic operations are not supported yet\n"); + err = -EOPNOTSUPP; + goto out; + + case IB_WR_LOCAL_INV: + handle_local_inv(qp, wr, ctrl, seg, size, cur_edge, *idx); + *num_sge = 0; + break; + + case IB_WR_REG_MR: + err = handle_reg_mr(qp, wr, ctrl, seg, size, cur_edge, *idx); + if (unlikely(err)) + goto out; + *num_sge = 0; + break; + + case IB_WR_REG_MR_INTEGRITY: + err = handle_reg_mr_integrity(dev, qp, wr, ctrl, seg, size, + cur_edge, idx, nreq, fence, + next_fence); + if (unlikely(err)) + goto out; + *num_sge = 0; + break; + + default: + break; + } + +out: + return err; +} + +static void handle_qpt_uc(const struct ib_send_wr *wr, void **seg, int *size) +{ + switch (wr->opcode) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + handle_rdma_op(wr, seg, size); + break; + default: + break; + } +} + +static void handle_qpt_hw_gsi(struct mlx5_ib_qp *qp, + const struct ib_send_wr *wr, void **seg, + int *size, void **cur_edge) +{ + set_datagram_seg(*seg, wr); + *seg += sizeof(struct mlx5_wqe_datagram_seg); + *size += sizeof(struct mlx5_wqe_datagram_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); +} + +static void handle_qpt_ud(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, + void **seg, int *size, void **cur_edge) +{ + set_datagram_seg(*seg, wr); + *seg += sizeof(struct mlx5_wqe_datagram_seg); + *size += sizeof(struct mlx5_wqe_datagram_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + + /* handle qp that supports ud offload */ + if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) { + struct mlx5_wqe_eth_pad *pad; + + pad = *seg; + memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad)); + *seg += sizeof(struct mlx5_wqe_eth_pad); + *size += sizeof(struct mlx5_wqe_eth_pad) / 16; + set_eth_seg(wr, qp, seg, size, cur_edge); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + } +} + +static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + const struct ib_send_wr *wr, + struct mlx5_wqe_ctrl_seg **ctrl, void **seg, + int *size, void **cur_edge, unsigned int idx) +{ + int err = 0; + + if (unlikely(wr->opcode != MLX5_IB_WR_UMR)) { + err = -EINVAL; + mlx5_ib_warn(dev, "bad opcode %d\n", wr->opcode); + goto out; + } + + qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; + (*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey); + err = set_reg_umr_segment(dev, *seg, wr, + !!(MLX5_CAP_GEN(dev->mdev, atomic))); + if (unlikely(err)) + goto out; + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + set_reg_mkey_segment(*seg, wr); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); +out: + return err; +} + +int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr, bool drain) +{ + struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_ib_qp *qp; + struct mlx5_wqe_xrc_seg *xrc; + struct mlx5_bf *bf; + void *cur_edge; + int uninitialized_var(size); + unsigned long flags; + unsigned int idx; + int err = 0; + int num_sge; + void *seg; + int nreq; + int i; + u8 next_fence = 0; + u8 fence; + + if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && + !drain)) { + *bad_wr = wr; + return -EIO; + } + + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr); + + qp = to_mqp(ibqp); + bf = &qp->bf; + + spin_lock_irqsave(&qp->sq.lock, flags); + + for (nreq = 0; wr; nreq++, wr = wr->next) { + if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) { + mlx5_ib_warn(dev, "\n"); + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + num_sge = wr->num_sge; + if (unlikely(num_sge > qp->sq.max_gs)) { + mlx5_ib_warn(dev, "\n"); + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge, + nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + if (wr->opcode == IB_WR_REG_MR || + wr->opcode == IB_WR_REG_MR_INTEGRITY) { + fence = dev->umr_fence; + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + } else { + if (wr->send_flags & IB_SEND_FENCE) { + if (qp->next_fence) + fence = MLX5_FENCE_MODE_SMALL_AND_FENCE; + else + fence = MLX5_FENCE_MODE_FENCE; + } else { + fence = qp->next_fence; + } + } + + switch (ibqp->qp_type) { + case IB_QPT_XRC_INI: + xrc = seg; + seg += sizeof(*xrc); + size += sizeof(*xrc) / 16; + fallthrough; + case IB_QPT_RC: + err = handle_qpt_rc(dev, qp, wr, &ctrl, &seg, &size, + &cur_edge, &idx, nreq, fence, + next_fence, &num_sge); + if (unlikely(err)) { + *bad_wr = wr; + goto out; + } else if (wr->opcode == IB_WR_REG_MR_INTEGRITY) { + goto skip_psv; + } + break; + + case IB_QPT_UC: + handle_qpt_uc(wr, &seg, &size); + break; + case IB_QPT_SMI: + if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) { + mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n"); + err = -EPERM; + *bad_wr = wr; + goto out; + } + fallthrough; + case MLX5_IB_QPT_HW_GSI: + handle_qpt_hw_gsi(qp, wr, &seg, &size, &cur_edge); + break; + case IB_QPT_UD: + handle_qpt_ud(qp, wr, &seg, &size, &cur_edge); + break; + case MLX5_IB_QPT_REG_UMR: + err = handle_qpt_reg_umr(dev, qp, wr, &ctrl, &seg, + &size, &cur_edge, idx); + if (unlikely(err)) + goto out; + break; + + default: + break; + } + + if (wr->send_flags & IB_SEND_INLINE && num_sge) { + err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge); + if (unlikely(err)) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + } else { + for (i = 0; i < num_sge; i++) { + handle_post_send_edge(&qp->sq, &seg, size, + &cur_edge); + if (unlikely(!wr->sg_list[i].length)) + continue; + + set_data_ptr_seg( + (struct mlx5_wqe_data_seg *)seg, + wr->sg_list + i); + size += sizeof(struct mlx5_wqe_data_seg) / 16; + seg += sizeof(struct mlx5_wqe_data_seg); + } + } + + qp->next_fence = next_fence; + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq, + fence, mlx5_ib_opcode[wr->opcode]); +skip_psv: + if (0) + dump_wqe(qp, idx, size); + } + +out: + if (likely(nreq)) { + qp->sq.head += nreq; + + /* Make sure that descriptors are written before + * updating doorbell record and ringing the doorbell + */ + wmb(); + + qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); + + /* Make sure doorbell record is visible to the HCA before + * we hit doorbell. + */ + wmb(); + + mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset); + /* Make sure doorbells don't leak out of SQ spinlock + * and reach the HCA out of order. + */ + bf->offset ^= bf->buf_size; + } + + spin_unlock_irqrestore(&qp->sq.lock, flags); + + return err; +} + +static void set_sig_seg(struct mlx5_rwqe_sig *sig, int max_gs) +{ + sig->signature = calc_sig(sig, (max_gs + 1) << 2); +} + +int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr, bool drain) +{ + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_wqe_data_seg *scat; + struct mlx5_rwqe_sig *sig; + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_core_dev *mdev = dev->mdev; + unsigned long flags; + int err = 0; + int nreq; + int ind; + int i; + + if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && + !drain)) { + *bad_wr = wr; + return -EIO; + } + + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr); + + spin_lock_irqsave(&qp->rq.lock, flags); + + ind = qp->rq.head & (qp->rq.wqe_cnt - 1); + + for (nreq = 0; wr; nreq++, wr = wr->next) { + if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + if (unlikely(wr->num_sge > qp->rq.max_gs)) { + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind); + if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) + scat++; + + for (i = 0; i < wr->num_sge; i++) + set_data_ptr_seg(scat + i, wr->sg_list + i); + + if (i < qp->rq.max_gs) { + scat[i].byte_count = 0; + scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY); + scat[i].addr = 0; + } + + if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) { + sig = (struct mlx5_rwqe_sig *)scat; + set_sig_seg(sig, qp->rq.max_gs); + } + + qp->rq.wrid[ind] = wr->wr_id; + + ind = (ind + 1) & (qp->rq.wqe_cnt - 1); + } + +out: + if (likely(nreq)) { + qp->rq.head += nreq; + + /* Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff); + } + + spin_unlock_irqrestore(&qp->rq.lock, flags); + + return err; +} diff --git a/drivers/infiniband/hw/mlx5/wr.h b/drivers/infiniband/hw/mlx5/wr.h new file mode 100644 index 000000000000..4f0057516402 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/wr.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. + */ + +#ifndef _MLX5_IB_WR_H +#define _MLX5_IB_WR_H + +#include "mlx5_ib.h" + +enum { + MLX5_IB_SQ_UMR_INLINE_THRESHOLD = 64, +}; + +struct mlx5_wqe_eth_pad { + u8 rsvd0[16]; +}; + + +/* get_sq_edge - Get the next nearby edge. + * + * An 'edge' is defined as the first following address after the end + * of the fragment or the SQ. Accordingly, during the WQE construction + * which repetitively increases the pointer to write the next data, it + * simply should check if it gets to an edge. + * + * @sq - SQ buffer. + * @idx - Stride index in the SQ buffer. + * + * Return: + * The new edge. + */ +static inline void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx) +{ + void *fragment_end; + + fragment_end = mlx5_frag_buf_get_wqe + (&sq->fbc, + mlx5_frag_buf_get_idx_last_contig_stride(&sq->fbc, idx)); + + return fragment_end + MLX5_SEND_WQE_BB; +} + +int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr, bool drain); +int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr, bool drain); + +static inline int mlx5_ib_post_send_nodrain(struct ib_qp *ibqp, + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + return mlx5_ib_post_send(ibqp, wr, bad_wr, false); +} + +static inline int mlx5_ib_post_send_drain(struct ib_qp *ibqp, + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + return mlx5_ib_post_send(ibqp, wr, bad_wr, true); +} + +static inline int mlx5_ib_post_recv_nodrain(struct ib_qp *ibqp, + const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + return mlx5_ib_post_recv(ibqp, wr, bad_wr, false); +} + +static inline int mlx5_ib_post_recv_drain(struct ib_qp *ibqp, + const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + return mlx5_ib_post_recv(ibqp, wr, bad_wr, true); +} +#endif /* _MLX5_IB_WR_H */ From 8d93efb8c5e07706651a992fc07f37a2fb69baca Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 6 May 2020 10:16:01 +0300 Subject: [PATCH 240/562] RDMA/mlx5: Assign profile before calling stages Assign the profile to the IB device before executing stages. This will allow to check which profile is being used from within a stage. Link: https://lore.kernel.org/r/20200506071602.7177-2-leon@kernel.org Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/ib_rep.h | 2 +- drivers/infiniband/hw/mlx5/main.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index 3b6750cba796..5b30d3fa8f8d 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -9,9 +9,9 @@ #include #include "mlx5_ib.h" -#ifdef CONFIG_MLX5_ESWITCH extern const struct mlx5_ib_profile raw_eth_profile; +#ifdef CONFIG_MLX5_ESWITCH u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw); struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, u16 vport_num); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 40bf71efaeb0..1a7464b669d5 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -7160,6 +7160,8 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev, int err; int i; + dev->profile = profile; + for (i = 0; i < MLX5_IB_STAGE_MAX; i++) { if (profile->stage[i].init) { err = profile->stage[i].init(dev); @@ -7168,7 +7170,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev, } } - dev->profile = profile; dev->ib_active = true; return dev; From 42caf9cb59370cc6f296c7a9fe39fa66963236ff Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 6 May 2020 10:16:02 +0300 Subject: [PATCH 241/562] RDMA/mlx5: Allow only raw Ethernet QPs when RoCE isn't enabled When operating in switchdev mode or using devlink to disable RoCE only raw Ethernet QPs are allowed to be created. When in switchdev mode this can lead to passing an invalid port number as part of the modify qp firmware cmd and will lead to a syndrome reported back to the user, such as: * mlx5_cmd_check:803:(pid 50148): RST2INIT_QP(0x502) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x177405). Internal UD QP might be used to test for write combining support (even if externally we report RoCE as disabled) check for that specific flag and allow is specifically. Fixes: b5ca15ad7e61 ("IB/mlx5: Add proper representors support") Link: https://lore.kernel.org/r/20200506071602.7177-3-leon@kernel.org Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index fb2ea3bf9be4..40150595fdbb 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2436,15 +2436,17 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, if (!MLX5_CAP_GEN(dev->mdev, xrc)) goto out; fallthrough; - case IB_QPT_RAW_PACKET: case IB_QPT_RC: case IB_QPT_UC: - case IB_QPT_UD: case IB_QPT_SMI: case MLX5_IB_QPT_HW_GSI: - case MLX5_IB_QPT_REG_UMR: case IB_QPT_DRIVER: case IB_QPT_GSI: + if (dev->profile == &raw_eth_profile) + goto out; + case IB_QPT_RAW_PACKET: + case IB_QPT_UD: + case MLX5_IB_QPT_REG_UMR: break; default: goto out; @@ -2641,6 +2643,10 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, int create_flags = attr->create_flags; bool cond; + if (qp->type == IB_QPT_UD && dev->profile == &raw_eth_profile) + if (create_flags & ~MLX5_IB_QP_CREATE_WC_TEST) + return -EINVAL; + if (qp_type == MLX5_IB_QPT_DCT) return (create_flags) ? -EINVAL : 0; From 52c81f47f0d2680f0b2e7b61c1fa4d8ad35f3020 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 7 May 2020 16:16:10 +0100 Subject: [PATCH 242/562] RDMA/mlx5: Remove duplicated assignment to variable rcqe_sz The variable rcqe_sz is being unnecessarily assigned twice, fix this by removing one of the duplicates. Fixes: 8bde2c509e40 ("RDMA/mlx5: Update all DRIVER QP places to use QP subtype") Link: https://lore.kernel.org/r/20200507151610.52636-1-colin.king@canonical.com Addresses-Coverity: ("Evaluation order violation") Signed-off-by: Colin Ian King Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 40150595fdbb..c571b7a97f10 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1994,8 +1994,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if ((qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) && (init_attr->qp_type == IB_QPT_RC || init_attr->qp_type == IB_QPT_UC)) { - int rcqe_sz = rcqe_sz = - mlx5_ib_get_cqe_size(init_attr->recv_cq); + int rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq); MLX5_SET(qpc, qpc, cs_res, rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE : From 108e36f0d8bf91839613d8053a6d1354965801b0 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 7 May 2020 11:35:26 +0900 Subject: [PATCH 243/562] scsi: scsi_debug: Disallow zone sizes that are not powers of 2 Allowing a non-power-of-2 zone size forces the use of direct division operations of 64-bit sector values to obtain a zone number or number of zones. Doing so without using do_div() leads to compilation errors on 32-bit architectures. Devices with a zone size that is not a power of 2 do not exist today so allowing their emulation is of limited interest as the sd driver will not support them anyway. To fix this compilation error, instead of using do_div() for sector values divisions, simply disallow zone size values that are not a power of 2. [mkp: commit desc] Link: https://lore.kernel.org/r/20200507023526.221574-1-damien.lemoal@wdc.com Fixes: 98e0a689868c ("scsi: scsi_debug: Add zone_size_mb module parameter") Fixes: f0d1cf9378bd ("scsi: scsi_debug: Add ZBC zone commands") Reviewed-by: Johannes Thumshirn Acked-by: Geert Uytterhoeven Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index d3ea16f3c12e..105e563d87b4 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -2657,13 +2657,7 @@ static inline bool sdebug_dev_is_zoned(struct sdebug_dev_info *devip) static struct sdeb_zone_state *zbc_zone(struct sdebug_dev_info *devip, unsigned long long lba) { - unsigned int zno; - - if (devip->zsize_shift) - zno = lba >> devip->zsize_shift; - else - zno = lba / devip->zsize; - return &devip->zstate[zno]; + return &devip->zstate[lba >> devip->zsize_shift]; } static inline bool zbc_zone_is_conv(struct sdeb_zone_state *zsp) @@ -4306,7 +4300,7 @@ static int resp_report_zones(struct scsi_cmnd *scp, return check_condition_result; } - max_zones = devip->nr_zones - zs_lba / devip->zsize; + max_zones = devip->nr_zones - (zs_lba >> devip->zsize_shift); rep_max_zones = min((alloc_len - 64) >> ilog2(RZONES_DESC_HD), max_zones); @@ -4826,6 +4820,10 @@ static int sdebug_device_create_zones(struct sdebug_dev_info *devip) return -EINVAL; } } else { + if (!is_power_of_2(sdeb_zbc_zone_size_mb)) { + pr_err("Zone size is not a power of 2\n"); + return -EINVAL; + } devip->zsize = (sdeb_zbc_zone_size_mb * SZ_1M) >> ilog2(sdebug_sector_size); if (devip->zsize >= capacity) { @@ -4834,8 +4832,7 @@ static int sdebug_device_create_zones(struct sdebug_dev_info *devip) } } - if (is_power_of_2(devip->zsize)) - devip->zsize_shift = ilog2(devip->zsize); + devip->zsize_shift = ilog2(devip->zsize); devip->nr_zones = (capacity + devip->zsize - 1) >> devip->zsize_shift; if (sdeb_zbc_nr_conv >= devip->nr_zones) { From 646d4b507626f4c19d2d256ef5fc14a8d52521c6 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 7 May 2020 08:26:42 +0200 Subject: [PATCH 244/562] scsi: core: Remove 'list' entry from struct scsi_cmnd Leftover from cmd_list removal. Link: https://lore.kernel.org/r/20200507062642.100612-1-hare@suse.de Fixes: c5a9707672fe ("scsi: core: Remove cmd_list functionality") Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/aachba.c | 1 - include/scsi/scsi_cmnd.h | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index eb72ac8136c3..2b868f8db8ff 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -814,7 +814,6 @@ int aac_probe_container(struct aac_dev *dev, int cid) kfree(scsidev); return -ENOMEM; } - scsicmd->list.next = NULL; scsicmd->scsi_done = aac_probe_container_scsi_done; scsicmd->device = scsidev; diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 80ac89e47b47..7f047fdd34ac 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -68,7 +68,6 @@ struct scsi_pointer { struct scsi_cmnd { struct scsi_request req; struct scsi_device *device; - struct list_head list; /* scsi_cmnd participates in queue lists */ struct list_head eh_entry; /* entry for the host eh_cmd_q */ struct delayed_work abort_work; From f839544ccff60cbe534282aac68858fc3fb278ca Mon Sep 17 00:00:00 2001 From: Viacheslav Dubeyko Date: Fri, 10 Apr 2020 11:07:08 +0300 Subject: [PATCH 245/562] scsi: qla2xxx: Fix warning after FC target reset Currently, FC target reset finishes with the warning message: [84010.596893] ------------[ cut here ]------------ [84010.596917] WARNING: CPU: 238 PID: 279973 at ../drivers/scsi/qla2xxx/qla_target.c:6644 qlt_enable_vha+0x1d0/0x260 [qla2xxx] [84010.596918] Modules linked in: vrf af_packet 8021q garp mrp stp llc netlink_diag target_tatlin_tblock(OEX) dm_ec(OEX) ttln_rdma(OEX) dm_frontend(OEX) nvme_rdma nvmet tcm_qla2xxx iscsi_target_mod target_core_mod at24 nvmem_core pnv_php ipmi_watchdog ipmi_ssif vmx_crypto gf128mul crct10dif_vpmsum qla2xxx rpcrdma nvme_fc powernv_flash(X) nvme_fabrics uio_pdrv_genirq mtd rtc_opal(X) ibmpowernv(X) opal_prd(X) uio scsi_transport_fc i2c_opal(X) ses enclosure ipmi_poweroff ast i2c_algo_bit ttm bmc_mcu(OEX) drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm drm_panel_orientation_quirks agpgart nfsd auth_rpcgss nfs_acl ipmi_powernv(X) lockd ipmi_devintf ipmi_msghandler grace dummy ext4 crc16 jbd2 mbcache sd_mod rdma_ucm ib_iser rdma_cm ib_umad iw_cm ib_ipoib libiscsi scsi_transport_iscsi ib_cm [84010.596975] configfs mlx5_ib ib_uverbs ib_core mlx5_core crc32c_vpmsum xhci_pci xhci_hcd mpt3sas(OEX) tg3 usbcore mlxfw tls raid_class libphy scsi_transport_sas devlink ptp pps_core nvme nvme_core sunrpc dm_mirror dm_region_hash dm_log sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua scsi_mod autofs4 [84010.597001] Supported: Yes, External [84010.597004] CPU: 238 PID: 279973 Comm: bash Tainted: G OE 4.12.14-197.29-default #1 SLE15-SP1 [84010.597006] task: c000000a104c0000 task.stack: c000000b52188000 [84010.597007] NIP: d00000001ffd7f78 LR: d00000001ffd7f6c CTR: c0000000001676c0 [84010.597008] REGS: c000000b5218b910 TRAP: 0700 Tainted: G OE (4.12.14-197.29-default) [84010.597008] MSR: 900000010282b033 [84010.597015] CR: 48242424 XER: 00000000 [84010.597016] CFAR: d00000001ff45d08 SOFTE: 1 GPR00: d00000001ffd7f6c c000000b5218bb90 d00000002001b228 0000000000000102 GPR04: 0000000000000001 0000000000000001 00013d91ed0a5e2d 0000000000000000 GPR08: c000000007793300 0000000000000000 0000000000000000 c000000a086e7818 GPR12: 0000000000002200 c000000007793300 0000000000000000 000000012bc937c0 GPR16: 000000012bbf7ed0 0000000000000000 000000012bc3dd10 0000000000000000 GPR20: 000000012bc4db28 0000010036442810 000000012bc97828 000000012bc96c70 GPR24: 00000100365b1550 0000000000000000 00000100363f3d80 c000000be20d3080 GPR28: c000000bda7eae00 c000000be20db7e8 c000000be20d3778 c000000be20db7e8 [84010.597042] NIP [d00000001ffd7f78] qlt_enable_vha+0x1d0/0x260 [qla2xxx] [84010.597051] LR [d00000001ffd7f6c] qlt_enable_vha+0x1c4/0x260 [qla2xxx] [84010.597051] Call Trace: [84010.597061] [c000000b5218bb90] [d00000001ffd7f6c] qlt_enable_vha+0x1c4/0x260 [qla2xxx] (unreliable) [84010.597064] [c000000b5218bc20] [d000000009820b6c] tcm_qla2xxx_tpg_enable_store+0xc4/0x130 [tcm_qla2xxx] [84010.597067] [c000000b5218bcb0] [d0000000185d0e68] configfs_write_file+0xd0/0x190 [configfs] [84010.597072] [c000000b5218bd00] [c0000000003d0edc] __vfs_write+0x3c/0x1e0 [84010.597074] [c000000b5218bd90] [c0000000003d2ea8] vfs_write+0xd8/0x220 [84010.597076] [c000000b5218bde0] [c0000000003d4ddc] SyS_write+0x6c/0x110 [84010.597079] [c000000b5218be30] [c00000000000b188] system_call+0x3c/0x130 [84010.597080] Instruction dump: [84010.597082] 7d0050a8 7d084b78 7d0051ad 40c2fff4 7fa3eb78 4bf73965 60000000 7fa3eb78 [84010.597086] 4bf6dcd9 60000000 2fa30000 419eff40 <0fe00000> 4bffff38 e95f0058 a12a0180 [84010.597090] ---[ end trace e32abaf6e6fee826 ]--- To reproduce: echo 0x7fffffff > /sys/module/qla2xxx/parameters/logging modprobe target_core_mod modprobe tcm_qla2xxx mkdir /sys/kernel/config/target/qla2xxx mkdir /sys/kernel/config/target/qla2xxx/ mkdir /sys/kernel/config/target/qla2xxx//tpgt_1 echo 1 > /sys/kernel/config/target/qla2xxx//tpgt_1/enable echo 0 > /sys/kernel/config/target/qla2xxx//tpgt_1/enable echo 1 > /sys/kernel/config/target/qla2xxx//tpgt_1/enable SYSTEM START kernel: pid 327:drivers/scsi/qla2xxx/qla_init.c:2174 qla2x00_initialize_adapter(): vha->flags.online 0x0 <...> kernel: pid 327:drivers/scsi/qla2xxx/qla_os.c:3444 qla2x00_probe_one(): vha->flags.online 0x1 echo 1 > /sys/kernel/config/target/qla2xxx/21:00:00:24:ff:86:a6:2a/tpgt_1/enable kernel: pid 348:drivers/scsi/qla2xxx/qla_init.c:6641 qla2x00_abort_isp_cleanup(): vha->flags.online 0x0, ISP_ABORT_NEEDED 0x0 <...> kernel: pid 348:drivers/scsi/qla2xxx/qla_init.c:6998 qla2x00_restart_isp(): vha->flags.online 0x0 echo 0 > /sys/kernel/config/target/qla2xxx/21:00:00:24:ff:86:a6:2a/tpgt_1/enable kernel: pid 348:drivers/scsi/qla2xxx/qla_init.c:6641 qla2x00_abort_isp_cleanup(): vha->flags.online 0x0, ISP_ABORT_NEEDED 0x0 <...> kernel: pid 1404:drivers/scsi/qla2xxx/qla_os.c:1107 qla2x00_wait_for_hba_online(): base_vha->flags.online 0x0 echo 1 > /sys/kernel/config/target/qla2xxx/21:00:00:24:ff:86:a6:2a/tpgt_1/enable kernel: pid 1404:drivers/scsi/qla2xxx/qla_os.c:1107 qla2x00_wait_for_hba_online(): base_vha->flags.online 0x0 kernel: -----------[ cut here ]----------- kernel: WARNING: CPU: 1 PID: 1404 at drivers/scsi/qla2xxx/qla_target.c:6654 qlt_enable_vha+0x1e0/0x280 [qla2xxx] The issue happens because no real ISP reset is executed. The qla2x00_abort_isp(scsi_qla_host_t *vha) function expects that vha->flags.online will be not zero for ISP reset procedure. This patch sets vha->flags.online to 1 before calling ->abort_isp() for starting the ISP reset. Link: https://lore.kernel.org/r/1d7b21bf9f7676643239eb3d60eaca7cfa505cf0.camel@yadro.com Reviewed-by: Roman Bolshakov Signed-off-by: Viacheslav Dubeyko Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 4c645d568cf7..f51ade7cfcc8 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -6878,6 +6878,7 @@ qla2x00_do_dpc(void *data) if (do_reset && !(test_and_set_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags))) { + base_vha->flags.online = 1; ql_dbg(ql_dbg_dpc, base_vha, 0x4007, "ISP abort scheduled.\n"); if (ha->isp_ops->abort_isp(base_vha)) { From 6b3d16f9875e181412401b8ab445dce83bd6fedc Mon Sep 17 00:00:00 2001 From: Viacheslav Dubeyko Date: Wed, 22 Apr 2020 13:51:51 +0300 Subject: [PATCH 246/562] scsi: qla2xxx: Fix failure message in qlt_disable_vha() The following sequence of commands result in an incorrect failure message being printed: echo 0x7fffffff > /sys/module/qla2xxx/parameters/logging modprobe target_core_mod modprobe tcm_qla2xxx mkdir /sys/kernel/config/target/qla2xxx mkdir /sys/kernel/config/target/qla2xxx/ mkdir /sys/kernel/config/target/qla2xxx//tpgt_1 echo 1 > /sys/kernel/config/target/qla2xxx//tpgt_1/enable echo 0 > /sys/kernel/config/target/qla2xxx//tpgt_1/enable qla2xxx [0001:00:02.0]-e881:1: qla2x00_wait_for_hba_online() failed The reason of this message is the QLA_FUNCTION_FAILED code that qla2x00_wait_for_hba_online() returns. However, qlt_disable_vha() expects that adapter is offlined and QLA_FUNCTION_FAILED informs about the offline state of the adapter. The qla2x00_abort_isp() function finishes the execution at the point of checking the adapter's mode (for example, qla_tgt_mode_enabled()) because of the qlt_disable_vha() calls qlt_clear_mode() method. It means that qla2x00_abort_isp() keeps vha->flags.online is equal to zero. Finally, qla2x00_wait_for_hba_online() checks the state of this flag and returns QLA_FUNCTION_FAILED error code. This patch changes the failure message which informs about adapter's offline state. Link: https://lore.kernel.org/r/3cd0bbf3599c53b0c2a7184582d705d8b8052c8b.camel@yadro.com Reviewed-by: Roman Bolshakov Reviewed-by: Himanshu Madhani Signed-off-by: Viacheslav Dubeyko Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_target.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 622e7337affc..f3255aa70dcc 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -6661,9 +6661,14 @@ static void qlt_disable_vha(struct scsi_qla_host *vha) set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); qla2xxx_wake_dpc(vha); + + /* + * We are expecting the offline state. + * QLA_FUNCTION_FAILED means that adapter is offline. + */ if (qla2x00_wait_for_hba_online(vha) != QLA_SUCCESS) ql_dbg(ql_dbg_tgt, vha, 0xe081, - "qla2x00_wait_for_hba_online() failed\n"); + "adapter is offline\n"); } /* From 803e45550b11c8e43d89812356fe6f105adebdf9 Mon Sep 17 00:00:00 2001 From: Viacheslav Dubeyko Date: Wed, 22 Apr 2020 13:55:52 +0300 Subject: [PATCH 247/562] scsi: qla2xxx: Fix issue with adapter's stopping state The goal of the following command sequence is to restart the adapter. However, the tgt_stop flag remains set, indicating that the adapter is still in stopping state even after re-enabling it. echo 0x7fffffff > /sys/module/qla2xxx/parameters/logging modprobe target_core_mod modprobe tcm_qla2xxx mkdir /sys/kernel/config/target/qla2xxx mkdir /sys/kernel/config/target/qla2xxx/ mkdir /sys/kernel/config/target/qla2xxx//tpgt_1 echo 1 > /sys/kernel/config/target/qla2xxx//tpgt_1/enable echo 0 > /sys/kernel/config/target/qla2xxx//tpgt_1/enable echo 1 > /sys/kernel/config/target/qla2xxx//tpgt_1/enable kernel: PID 1396:qla_target.c:1555 qlt_stop_phase1(): tgt_stop 0x0, tgt_stopped 0x0 kernel: qla2xxx [0001:00:02.0]-e803:1: PID 1396:qla_target.c:1567: Stopping target for host 1(c0000000033557e8) kernel: PID 1396:qla_target.c:1579 qlt_stop_phase1(): tgt_stop 0x1, tgt_stopped 0x0 kernel: PID 1396:qla_target.c:1266 qlt_schedule_sess_for_deletion(): tgt_stop 0x1, tgt_stopped 0x0 kernel: qla2xxx [0001:00:02.0]-e801:1: PID 1396:qla_target.c:1316: Scheduling sess c00000002d5cd800 for deletion 21:00:00:24:ff:7f:35:c7 kernel: qla2xxx [0001:00:02.0]-290a:1: PID 340:qla_target.c:1187: qlt_unreg_sess sess c00000002d5cd800 for deletion 21:00:00:24:ff:7f:35:c7 kernel: qla2xxx [0001:00:02.0]-f801:1: PID 340:qla_target.c:1145: Unregistration of sess c00000002d5cd800 21:00:00:24:ff:7f:35:c7 finished fcp_cnt 0 kernel: PID 340:qla_target.c:1155 qlt_free_session_done(): tgt_stop 0x1, tgt_stopped 0x0 kernel: qla2xxx [0001:00:02.0]-4807:1: PID 346:qla_os.c:6329: ISP abort scheduled. kernel: qla2xxx [0001:00:02.0]-28f1:1: PID 346:qla_os.c:3956: Mark all dev lost kernel: PID 346:qla_target.c:1266 qlt_schedule_sess_for_deletion(): tgt_stop 0x1, tgt_stopped 0x0 kernel: qla2xxx [0001:00:02.0]-4808:1: PID 346:qla_os.c:6338: ISP abort end. kernel: PID 1396:qla_target.c:6812 qlt_enable_vha(): tgt_stop 0x1, tgt_stopped 0x0 kernel: qla2xxx [0001:00:02.0]-4807:1: PID 346:qla_os.c:6329: ISP abort scheduled. kernel: qla2xxx [0001:00:02.0]-4808:1: PID 346:qla_os.c:6338: ISP abort end. qlt_handle_cmd_for_atio() rejects the request to send commands because the adapter is in the stopping state: kernel: PID 0:qla_target.c:4442 qlt_handle_cmd_for_atio(): tgt_stop 0x1, tgt_stopped 0x0 kernel: qla2xxx [0001:00:02.0]-3861:1: PID 0:qla_target.c:4447: New command while device c000000005314600 is shutting down kernel: qla2xxx [0001:00:02.0]-e85f:1: PID 0:qla_target.c:5728: qla_target: Unable to send command to target This patch calls qla_stop_phase2() in addition to qlt_stop_phase1() in tcm_qla2xxx_tpg_enable_store() and tcm_qla2xxx_npiv_tpg_enable_store(). The qlt_stop_phase1() marks adapter as stopping (tgt_stop == 0x1, tgt_stopped == 0x0) but qlt_stop_phase2() marks adapter as stopped (tgt_stop == 0x0, tgt_stopped == 0x1). Link: https://lore.kernel.org/r/52be1e8a3537f6c5407eae3edd4c8e08a9545ea5.camel@yadro.com Reviewed-by: Roman Bolshakov Reviewed-by: Himanshu Madhani Signed-off-by: Viacheslav Dubeyko Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 1f0a185b2a95..bf00ae16b487 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -949,6 +949,7 @@ static ssize_t tcm_qla2xxx_tpg_enable_store(struct config_item *item, atomic_set(&tpg->lport_tpg_enabled, 0); qlt_stop_phase1(vha->vha_tgt.qla_tgt); + qlt_stop_phase2(vha->vha_tgt.qla_tgt); } return count; @@ -1111,6 +1112,7 @@ static ssize_t tcm_qla2xxx_npiv_tpg_enable_store(struct config_item *item, atomic_set(&tpg->lport_tpg_enabled, 0); qlt_stop_phase1(vha->vha_tgt.qla_tgt); + qlt_stop_phase2(vha->vha_tgt.qla_tgt); } return count; From 1b007f96f9e063f9f0b93597a4089114a89c1854 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 29 Apr 2020 22:09:52 +0800 Subject: [PATCH 248/562] scsi: qla2xxx: Make qla_set_ini_mode() return void The return value is not used by the caller and the local variable 'rc' is not needed. Make qla_set_ini_mode() return void and remove 'rc'. This also fixes the following coccicheck warning: drivers/scsi/qla2xxx/qla_attr.c:1906:5-7: Unneeded variable: "rc". Return "0" on line 2180 Link: https://lore.kernel.org/r/20200429140952.8240-1-yanaijie@huawei.com Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_attr.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 3a5f6f27587e..e83e44ef7083 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -1925,9 +1925,8 @@ static char *mode_to_str[] = { }; #define NEED_EXCH_OFFLOAD(_exchg) ((_exchg) > FW_DEF_EXCHANGES_CNT) -static int qla_set_ini_mode(scsi_qla_host_t *vha, int op) +static void qla_set_ini_mode(scsi_qla_host_t *vha, int op) { - int rc = 0; enum { NO_ACTION, MODE_CHANGE_ACCEPT, @@ -2200,8 +2199,6 @@ static int qla_set_ini_mode(scsi_qla_host_t *vha, int op) vha->ql2xexchoffld, vha->u_ql2xexchoffld); break; } - - return rc; } static ssize_t From bda552a7741a23708823c6e87a39d9a956087ac0 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Thu, 30 Apr 2020 20:17:51 +0800 Subject: [PATCH 249/562] scsi: qla2xxx: Use true, false for need_mpi_reset Fix the following coccicheck warning: drivers/scsi/qla2xxx/qla_tmpl.c:1031:6-20: WARNING: Assignment of 0/1 to bool variable drivers/scsi/qla2xxx/qla_tmpl.c:1062:3-17: WARNING: Assignment of 0/1 to bool variable Link: https://lore.kernel.org/r/20200430121751.15232-1-yanaijie@huawei.com Reviewed-by: Himanshu Madhani Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_tmpl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c index 342363862434..dc97f2fbeafe 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.c +++ b/drivers/scsi/qla2xxx/qla_tmpl.c @@ -1028,7 +1028,7 @@ void qla27xx_mpi_fwdump(scsi_qla_host_t *vha, int hardware_locked) { ulong flags = 0; - bool need_mpi_reset = 1; + bool need_mpi_reset = true; #ifndef __CHECKER__ if (!hardware_locked) @@ -1059,7 +1059,7 @@ qla27xx_mpi_fwdump(scsi_qla_host_t *vha, int hardware_locked) "-> fwdt1 fwdump residual=%+ld\n", fwdt->dump_size - len); } else { - need_mpi_reset = 0; + need_mpi_reset = false; } vha->hw->mpi_fw_dump_len = len; From dbe6f49259dacc073c1ae602f383c177f57b1b8a Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Thu, 30 Apr 2020 20:18:00 +0800 Subject: [PATCH 250/562] scsi: qla2xxx: Use true, false for ha->fw_dumped Fix the following coccicheck warning: drivers/scsi/qla2xxx/qla_tmpl.c:1120:2-20: WARNING: Assignment of 0/1 to bool variable Link: https://lore.kernel.org/r/20200430121800.15323-1-yanaijie@huawei.com Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_attr.c | 2 +- drivers/scsi/qla2xxx/qla_dbg.c | 4 ++-- drivers/scsi/qla2xxx/qla_nx.c | 4 ++-- drivers/scsi/qla2xxx/qla_nx2.c | 8 ++++---- drivers/scsi/qla2xxx/qla_os.c | 2 +- drivers/scsi/qla2xxx/qla_tmpl.c | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index e83e44ef7083..4ee1a75e54ad 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -84,7 +84,7 @@ qla2x00_sysfs_write_fw_dump(struct file *filp, struct kobject *kobj, qla82xx_md_prep(vha); } ha->fw_dump_reading = 0; - ha->fw_dumped = 0; + ha->fw_dumped = false; break; case 1: if (ha->fw_dumped && !ha->fw_dump_reading) { diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 8b7d0e476773..1206f7c1ce6a 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -706,12 +706,12 @@ qla2xxx_dump_post_process(scsi_qla_host_t *vha, int rval) ql_log(ql_log_warn, vha, 0xd000, "Failed to dump firmware (%x), dump status flags (0x%lx).\n", rval, ha->fw_dump_cap_flags); - ha->fw_dumped = 0; + ha->fw_dumped = false; } else { ql_log(ql_log_info, vha, 0xd001, "Firmware dump saved to temp buffer (%ld/%p), dump status flags (0x%lx).\n", vha->host_no, ha->fw_dump, ha->fw_dump_cap_flags); - ha->fw_dumped = 1; + ha->fw_dumped = true; qla2x00_post_uevent_work(vha, QLA_UEVENT_CODE_FW_DUMP); } } diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index 185c5f34d4c1..d2037253e2d7 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c @@ -4177,7 +4177,7 @@ qla82xx_md_collect(scsi_qla_host_t *vha) goto md_failed; } - ha->fw_dumped = 0; + ha->fw_dumped = false; if (!ha->md_tmplt_hdr || !ha->md_dump) { ql_log(ql_log_warn, vha, 0xb038, @@ -4357,7 +4357,7 @@ qla82xx_md_collect(scsi_qla_host_t *vha) ql_log(ql_log_info, vha, 0xb044, "Firmware dump saved to temp buffer (%ld/%p %ld/%p).\n", vha->host_no, ha->md_tmplt_hdr, vha->host_no, ha->md_dump); - ha->fw_dumped = 1; + ha->fw_dumped = true; qla2x00_post_uevent_work(vha, QLA_UEVENT_CODE_FW_DUMP); md_failed: diff --git a/drivers/scsi/qla2xxx/qla_nx2.c b/drivers/scsi/qla2xxx/qla_nx2.c index c056f466f1f4..b5c3e56edaba 100644 --- a/drivers/scsi/qla2xxx/qla_nx2.c +++ b/drivers/scsi/qla2xxx/qla_nx2.c @@ -1441,7 +1441,7 @@ qla8044_device_bootstrap(struct scsi_qla_host *vha) if (idc_ctrl & GRACEFUL_RESET_BIT1) { qla8044_wr_reg(ha, QLA8044_IDC_DRV_CTRL, (idc_ctrl & ~GRACEFUL_RESET_BIT1)); - ha->fw_dumped = 0; + ha->fw_dumped = false; } dev_ready: @@ -3249,7 +3249,7 @@ qla8044_collect_md_data(struct scsi_qla_host *vha) goto md_failed; } - ha->fw_dumped = 0; + ha->fw_dumped = false; if (!ha->md_tmplt_hdr || !ha->md_dump) { ql_log(ql_log_warn, vha, 0xb10e, @@ -3470,7 +3470,7 @@ qla8044_collect_md_data(struct scsi_qla_host *vha) ql_log(ql_log_info, vha, 0xb110, "Firmware dump saved to temp buffer (%ld/%p %ld/%p).\n", vha->host_no, ha->md_tmplt_hdr, vha->host_no, ha->md_dump); - ha->fw_dumped = 1; + ha->fw_dumped = true; qla2x00_post_uevent_work(vha, QLA_UEVENT_CODE_FW_DUMP); @@ -3487,7 +3487,7 @@ qla8044_get_minidump(struct scsi_qla_host *vha) struct qla_hw_data *ha = vha->hw; if (!qla8044_collect_md_data(vha)) { - ha->fw_dumped = 1; + ha->fw_dumped = true; ha->prev_minidump_failed = 0; } else { ql_log(ql_log_fatal, vha, 0xb0db, diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index f51ade7cfcc8..382e1f977d01 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -4622,7 +4622,7 @@ qla2x00_free_fw_dump(struct qla_hw_data *ha) ha->flags.fce_enabled = 0; ha->eft = NULL; ha->eft_dma = 0; - ha->fw_dumped = 0; + ha->fw_dumped = false; ha->fw_dump_cap_flags = 0; ha->fw_dump_reading = 0; ha->fw_dump = NULL; diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c index dc97f2fbeafe..281973b317a8 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.c +++ b/drivers/scsi/qla2xxx/qla_tmpl.c @@ -1117,7 +1117,7 @@ qla27xx_fwdump(scsi_qla_host_t *vha, int hardware_locked) } vha->hw->fw_dump_len = len; - vha->hw->fw_dumped = 1; + vha->hw->fw_dumped = true; ql_log(ql_log_warn, vha, 0xd015, "-> Firmware dump saved to buffer (%lu/%p) <%lx>\n", From 88bfdf565cbe33524308d912777f4267981d4be0 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 6 May 2020 14:17:57 +0800 Subject: [PATCH 251/562] scsi: qla2xxx: Make qlafx00_process_aen() return void No other functions use the return value of qlafx00_process_aen() and the return value is always 0 now. Make it return void. This fixes the following coccicheck warning: drivers/scsi/qla2xxx/qla_mr.c:1716:5-9: Unneeded variable: "rval". Return "0" on line 1768 Link: https://lore.kernel.org/r/20200506061757.19536-1-yanaijie@huawei.com Reviewed-by: Bart Van Assche Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_gbl.h | 2 +- drivers/scsi/qla2xxx/qla_mr.c | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index b20c5fa122fb..f62b71e47581 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -771,7 +771,7 @@ extern int qlafx00_fw_ready(scsi_qla_host_t *); extern int qlafx00_configure_devices(scsi_qla_host_t *); extern int qlafx00_reset_initialize(scsi_qla_host_t *); extern int qlafx00_fx_disc(scsi_qla_host_t *, fc_port_t *, uint16_t); -extern int qlafx00_process_aen(struct scsi_qla_host *, struct qla_work_evt *); +extern void qlafx00_process_aen(struct scsi_qla_host *, struct qla_work_evt *); extern int qlafx00_post_aenfx_work(struct scsi_qla_host *, uint32_t, uint32_t *, int); extern uint32_t qlafx00_fw_state_show(struct device *, diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c index df99911b8bb9..ce98189c7872 100644 --- a/drivers/scsi/qla2xxx/qla_mr.c +++ b/drivers/scsi/qla2xxx/qla_mr.c @@ -1710,10 +1710,9 @@ qlafx00_tgt_detach(struct scsi_qla_host *vha, int tgt_id) return; } -int +void qlafx00_process_aen(struct scsi_qla_host *vha, struct qla_work_evt *evt) { - int rval = 0; uint32_t aen_code, aen_data; aen_code = FCH_EVT_VENDOR_UNIQUE; @@ -1764,8 +1763,6 @@ qlafx00_process_aen(struct scsi_qla_host *vha, struct qla_work_evt *evt) fc_host_post_event(vha->host, fc_get_event_number(), aen_code, aen_data); - - return rval; } static void From 297083f6e53ba3ed4f3f3b0000def2e7b86f7b21 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Thu, 30 Apr 2020 18:02:12 +0800 Subject: [PATCH 252/562] scsi: aacraid: Make some symbols static Fix the following sparse warnings: drivers/scsi/aacraid/linit.c:867:6: warning: symbol 'aac_tmf_callback' was not declared. Should it be static? drivers/scsi/aacraid/linit.c:1081:5: warning: symbol 'aac_eh_host_reset' was not declared. Should it be static? drivers/scsi/aacraid/commsup.c:2354:5: warning: symbol 'aac_send_safw_hostttime' was not declared. Should it be static? drivers/scsi/aacraid/commsup.c:2383:5: warning: symbol 'aac_send_hosttime' was not declared. Should it be static? Link: https://lore.kernel.org/r/1588240932-69020-1-git-send-email-zou_wei@huawei.com Reported-by: Hulk Robot Signed-off-by: Zou Wei Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/commsup.c | 4 ++-- drivers/scsi/aacraid/linit.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index ddd73f6798af..8ee4e1abe568 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -2351,7 +2351,7 @@ static int aac_send_wellness_command(struct aac_dev *dev, char *wellness_str, goto out; } -int aac_send_safw_hostttime(struct aac_dev *dev, struct timespec64 *now) +static int aac_send_safw_hostttime(struct aac_dev *dev, struct timespec64 *now) { struct tm cur_tm; char wellness_str[] = "TD\010\0\0\0\0\0\0\0\0\0DW\0\0ZZ"; @@ -2380,7 +2380,7 @@ int aac_send_safw_hostttime(struct aac_dev *dev, struct timespec64 *now) return ret; } -int aac_send_hosttime(struct aac_dev *dev, struct timespec64 *now) +static int aac_send_hosttime(struct aac_dev *dev, struct timespec64 *now) { int ret = -ENOMEM; struct fib *fibptr; diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index a0a2b3abc512..a308e86a97f1 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -864,7 +864,7 @@ static u8 aac_eh_tmf_hard_reset_fib(struct aac_hba_map_info *info, return HBA_IU_TYPE_SATA_REQ; } -void aac_tmf_callback(void *context, struct fib *fibptr) +static void aac_tmf_callback(void *context, struct fib *fibptr) { struct aac_hba_resp *err = &((struct aac_native_hba *)fibptr->hw_fib_va)->resp.err; @@ -1078,7 +1078,7 @@ static int aac_eh_bus_reset(struct scsi_cmnd* cmd) * @scsi_cmd: SCSI command block causing the reset * */ -int aac_eh_host_reset(struct scsi_cmnd *cmd) +static int aac_eh_host_reset(struct scsi_cmnd *cmd) { struct scsi_device * dev = cmd->device; struct Scsi_Host * host = dev->host; From 9187745ceec6ee94c28bba8e78ff9328719e18d3 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Thu, 30 Apr 2020 20:17:06 +0800 Subject: [PATCH 253/562] scsi: qedi: Remove comparison of 0/1 to bool variable Fix the following coccicheck warning: drivers/scsi/qedi/qedi_main.c:1309:5-25: WARNING: Comparison of 0/1 to bool variable drivers/scsi/qedi/qedi_main.c:1315:5-25: WARNING: Comparison of 0/1 to bool variable Link: https://lore.kernel.org/r/20200430121706.14879-1-yanaijie@huawei.com Acked-by: Manish Rangankar Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 4dd965860c98..46584e16d635 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -1306,13 +1306,13 @@ static irqreturn_t qedi_msix_handler(int irq, void *dev_id) "process already running\n"); } - if (qedi_fp_has_work(fp) == 0) + if (!qedi_fp_has_work(fp)) qed_sb_update_sb_idx(fp->sb_info); /* Check for more work */ rmb(); - if (qedi_fp_has_work(fp) == 0) + if (!qedi_fp_has_work(fp)) qed_sb_ack(fp->sb_info, IGU_INT_ENABLE, 1); else goto process_again; From b91857a5ca13cc7e6e7fe904d7f4ad64d44aee04 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Thu, 30 Apr 2020 20:17:18 +0800 Subject: [PATCH 254/562] scsi: fnic: Use true, false for fnic->internal_reset_inprogress Fix the following coccicheck warning: drivers/scsi/fnic/fnic_scsi.c:2627:5-36: WARNING: Comparison of 0/1 to bool variable Link: https://lore.kernel.org/r/20200430121718.14970-1-yanaijie@huawei.com Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/fnic/fnic_scsi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index b60795893994..27535c90b248 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -2624,8 +2624,8 @@ int fnic_host_reset(struct scsi_cmnd *sc) unsigned long flags; spin_lock_irqsave(&fnic->fnic_lock, flags); - if (fnic->internal_reset_inprogress == 0) { - fnic->internal_reset_inprogress = 1; + if (!fnic->internal_reset_inprogress) { + fnic->internal_reset_inprogress = true; } else { spin_unlock_irqrestore(&fnic->fnic_lock, flags); FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, @@ -2654,7 +2654,7 @@ int fnic_host_reset(struct scsi_cmnd *sc) } spin_lock_irqsave(&fnic->fnic_lock, flags); - fnic->internal_reset_inprogress = 0; + fnic->internal_reset_inprogress = false; spin_unlock_irqrestore(&fnic->fnic_lock, flags); return ret; } From 013f69a931e72aa0a38030d23f3c9b7cdafae9cc Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Thu, 30 Apr 2020 20:17:29 +0800 Subject: [PATCH 255/562] scsi: vmw_pvscsi: Use true, false for adapter->use_msg Fix the following coccicheck warning: drivers/scsi/vmw_pvscsi.c:911:2-18: WARNING: Assignment of 0/1 to bool variable Link: https://lore.kernel.org/r/20200430121729.15064-1-yanaijie@huawei.com Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/vmw_pvscsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index c3f010df641e..8dbb4db6831a 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -908,7 +908,7 @@ static int pvscsi_host_reset(struct scsi_cmnd *cmd) use_msg = adapter->use_msg; if (use_msg) { - adapter->use_msg = 0; + adapter->use_msg = false; spin_unlock_irqrestore(&adapter->hw_lock, flags); /* From 55d4ce458c77f21fd20711f23ab8540e904d10c3 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Thu, 30 Apr 2020 20:17:38 +0800 Subject: [PATCH 256/562] scsi: mpt3sas: Use true, false for ioc->use_32bit_dma Fix the following coccicheck warning: drivers/scsi/mpt3sas/mpt3sas_base.c:7202:1-19: WARNING: Assignment of 0/1 to bool variable Link: https://lore.kernel.org/r/20200430121738.15151-1-yanaijie@huawei.com Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 7416242624f0..7fa3bdb906b6 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -7199,7 +7199,7 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) ioc->smp_affinity_enable = smp_affinity_enable; ioc->rdpq_array_enable_assigned = 0; - ioc->use_32bit_dma = 0; + ioc->use_32bit_dma = false; if (ioc->is_aero_ioc) ioc->base_readl = &_base_readl_aero; else From 2b01b293f359ddd67e08bbe8def42a7082310b82 Mon Sep 17 00:00:00 2001 From: Suganath Prabu Date: Tue, 28 Apr 2020 02:45:22 -0400 Subject: [PATCH 257/562] scsi: mpt3sas: Capture IOC data for debugging purposes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Information needed to debug driver problems and firmware faults is stored in the IOC’s MPT3SAS_ADAPTER data structure. Parameters such as IOCFacts, IOC flags (related to sge, MSI-X, error recovery etc.), performance mode type, TMs, internal commands reply status, etc. are present. For debugging purposes, it is therefore helpful to be able to capture this information so that the fault can be analyzed. Export the MPT3SAS_ADAPTER data structure in debugfs. The data is available in: /sys/kernel/debug/mpt3sas/scsi_hostX/ioc_dump Link: https://lore.kernel.org/r/1588056322-29227-1-git-send-email-suganath-prabu.subramani@broadcom.com Signed-off-by: Suganath Prabu Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/Makefile | 3 +- drivers/scsi/mpt3sas/mpt3sas_base.h | 18 ++- drivers/scsi/mpt3sas/mpt3sas_debugfs.c | 158 +++++++++++++++++++++++++ drivers/scsi/mpt3sas/mpt3sas_scsih.c | 4 + 4 files changed, 179 insertions(+), 4 deletions(-) create mode 100644 drivers/scsi/mpt3sas/mpt3sas_debugfs.c diff --git a/drivers/scsi/mpt3sas/Makefile b/drivers/scsi/mpt3sas/Makefile index 84fb3fbdb0ca..e76d994dbed3 100644 --- a/drivers/scsi/mpt3sas/Makefile +++ b/drivers/scsi/mpt3sas/Makefile @@ -7,4 +7,5 @@ mpt3sas-y += mpt3sas_base.o \ mpt3sas_transport.o \ mpt3sas_ctl.o \ mpt3sas_trigger_diag.o \ - mpt3sas_warpdrive.o + mpt3sas_warpdrive.o \ + mpt3sas_debugfs.o \ diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index c5743792bbfb..4fca3939c034 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -76,9 +76,9 @@ #define MPT3SAS_DRIVER_NAME "mpt3sas" #define MPT3SAS_AUTHOR "Avago Technologies " #define MPT3SAS_DESCRIPTION "LSI MPT Fusion SAS 3.0 Device Driver" -#define MPT3SAS_DRIVER_VERSION "33.101.00.00" -#define MPT3SAS_MAJOR_VERSION 33 -#define MPT3SAS_MINOR_VERSION 101 +#define MPT3SAS_DRIVER_VERSION "34.100.00.00" +#define MPT3SAS_MAJOR_VERSION 34 +#define MPT3SAS_MINOR_VERSION 100 #define MPT3SAS_BUILD_VERSION 0 #define MPT3SAS_RELEASE_VERSION 00 @@ -1472,6 +1472,8 @@ struct MPT3SAS_ADAPTER { u16 device_remove_in_progress_sz; u8 is_gen35_ioc; u8 is_aero_ioc; + struct dentry *debugfs_root; + struct dentry *ioc_dump; PUT_SMID_IO_FP_HIP put_smid_scsi_io; PUT_SMID_IO_FP_HIP put_smid_fast_path; PUT_SMID_IO_FP_HIP put_smid_hi_priority; @@ -1479,6 +1481,11 @@ struct MPT3SAS_ADAPTER { GET_MSIX_INDEX get_msix_index_for_smlio; }; +struct mpt3sas_debugfs_buffer { + void *buf; + u32 len; +}; + #define MPT_DRV_SUPPORT_BITMAP_MEMMOVE 0x00000001 typedef u8 (*MPT_CALLBACK)(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, @@ -1782,6 +1789,11 @@ mpt3sas_setup_direct_io(struct MPT3SAS_ADAPTER *ioc, struct scsi_cmnd *scmd, /* NCQ Prio Handling Check */ bool scsih_ncq_prio_supp(struct scsi_device *sdev); +void mpt3sas_setup_debugfs(struct MPT3SAS_ADAPTER *ioc); +void mpt3sas_destroy_debugfs(struct MPT3SAS_ADAPTER *ioc); +void mpt3sas_init_debugfs(void); +void mpt3sas_exit_debugfs(void); + /** * _scsih_is_pcie_scsi_device - determines if device is an pcie scsi device * @device_info: bitfield providing information about the device. diff --git a/drivers/scsi/mpt3sas/mpt3sas_debugfs.c b/drivers/scsi/mpt3sas/mpt3sas_debugfs.c new file mode 100644 index 000000000000..48095d8aef95 --- /dev/null +++ b/drivers/scsi/mpt3sas/mpt3sas_debugfs.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Debugfs interface Support for MPT (Message Passing Technology) based + * controllers. + * + * Copyright (C) 2020 Broadcom Inc. + * + * Authors: Broadcom Inc. + * Sreekanth Reddy + * Suganath Prabu + * + * Send feedback to : MPT-FusionLinux.pdl@broadcom.com) + * + **/ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include "mpt3sas_base.h" +#include + +static struct dentry *mpt3sas_debugfs_root; + +/* + * _debugfs_iocdump_read - copy ioc dump from debugfs buffer + * @filep: File Pointer + * @ubuf: Buffer to fill data + * @cnt: Length of the buffer + * @ppos: Offset in the file + */ + +static ssize_t +_debugfs_iocdump_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) + +{ + struct mpt3sas_debugfs_buffer *debug = filp->private_data; + + if (!debug || !debug->buf) + return 0; + + return simple_read_from_buffer(ubuf, cnt, ppos, debug->buf, debug->len); +} + +/* + * _debugfs_iocdump_open : open the ioc_dump debugfs attribute file + */ +static int +_debugfs_iocdump_open(struct inode *inode, struct file *file) +{ + struct MPT3SAS_ADAPTER *ioc = inode->i_private; + struct mpt3sas_debugfs_buffer *debug; + + debug = kzalloc(sizeof(struct mpt3sas_debugfs_buffer), GFP_KERNEL); + if (!debug) + return -ENOMEM; + + debug->buf = (void *)ioc; + debug->len = sizeof(struct MPT3SAS_ADAPTER); + file->private_data = debug; + return 0; +} + +/* + * _debugfs_iocdump_release : release the ioc_dump debugfs attribute + * @inode: inode structure to the corresponds device + * @file: File pointer + */ +static int +_debugfs_iocdump_release(struct inode *inode, struct file *file) +{ + struct mpt3sas_debugfs_buffer *debug = file->private_data; + + if (!debug) + return 0; + + file->private_data = NULL; + kfree(debug); + return 0; +} + +static const struct file_operations mpt3sas_debugfs_iocdump_fops = { + .owner = THIS_MODULE, + .open = _debugfs_iocdump_open, + .read = _debugfs_iocdump_read, + .release = _debugfs_iocdump_release, +}; + +/* + * mpt3sas_init_debugfs : Create debugfs root for mpt3sas driver + */ +void mpt3sas_init_debugfs(void) +{ + mpt3sas_debugfs_root = debugfs_create_dir("mpt3sas", NULL); + if (!mpt3sas_debugfs_root) + pr_info("mpt3sas: Cannot create debugfs root\n"); +} + +/* + * mpt3sas_exit_debugfs : Remove debugfs root for mpt3sas driver + */ +void mpt3sas_exit_debugfs(void) +{ + debugfs_remove_recursive(mpt3sas_debugfs_root); +} + +/* + * mpt3sas_setup_debugfs : Setup debugfs per HBA adapter + * ioc: MPT3SAS_ADAPTER object + */ +void +mpt3sas_setup_debugfs(struct MPT3SAS_ADAPTER *ioc) +{ + char name[64]; + + snprintf(name, sizeof(name), "scsi_host%d", ioc->shost->host_no); + if (!ioc->debugfs_root) { + ioc->debugfs_root = + debugfs_create_dir(name, mpt3sas_debugfs_root); + if (!ioc->debugfs_root) { + dev_err(&ioc->pdev->dev, + "Cannot create per adapter debugfs directory\n"); + return; + } + } + + snprintf(name, sizeof(name), "ioc_dump"); + ioc->ioc_dump = debugfs_create_file(name, 0444, + ioc->debugfs_root, ioc, &mpt3sas_debugfs_iocdump_fops); + if (!ioc->ioc_dump) { + dev_err(&ioc->pdev->dev, + "Cannot create ioc_dump debugfs file\n"); + debugfs_remove(ioc->debugfs_root); + return; + } + + snprintf(name, sizeof(name), "host_recovery"); + debugfs_create_u8(name, 0444, ioc->debugfs_root, &ioc->shost_recovery); + +} + +/* + * mpt3sas_destroy_debugfs : Destroy debugfs per HBA adapter + * @ioc: MPT3SAS_ADAPTER object + */ +void mpt3sas_destroy_debugfs(struct MPT3SAS_ADAPTER *ioc) +{ + debugfs_remove_recursive(ioc->debugfs_root); +} + diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 04a40afe60e3..6b84a83f1720 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -9928,6 +9928,7 @@ static void scsih_remove(struct pci_dev *pdev) &ioc->ioc_pg1_copy); /* release all the volumes */ _scsih_ir_shutdown(ioc); + mpt3sas_destroy_debugfs(ioc); sas_remove_host(shost); list_for_each_entry_safe(raid_device, next, &ioc->raid_device_list, list) { @@ -10814,6 +10815,7 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id) } scsi_scan_host(shost); + mpt3sas_setup_debugfs(ioc); return 0; out_add_shost_fail: mpt3sas_base_detach(ioc); @@ -11220,6 +11222,7 @@ scsih_init(void) tm_sas_control_cb_idx = mpt3sas_base_register_callback_handler( _scsih_sas_control_complete); + mpt3sas_init_debugfs(); return 0; } @@ -11251,6 +11254,7 @@ scsih_exit(void) if (hbas_to_enumerate != 2) raid_class_release(mpt2sas_raid_template); sas_release_transport(mpt3sas_transport_template); + mpt3sas_exit_debugfs(); } /** From 4778069ccf54dc7f0d1f7f7facf30c4edc9a707f Mon Sep 17 00:00:00 2001 From: Suganath Prabu Date: Tue, 28 Apr 2020 02:47:08 -0400 Subject: [PATCH 258/562] scsi: mpt3sas: Update maintainers Updated maintainers list for MPT DRIVERS Link: https://lore.kernel.org/r/1588056428-29369-1-git-send-email-suganath-prabu.subramani@broadcom.com Signed-off-by: Suganath Prabu Signed-off-by: Martin K. Petersen --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e64e5db31497..56fd620d6320 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9944,7 +9944,7 @@ F: drivers/hid/hid-lg-g15.c LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI) M: Sathya Prakash -M: Chaitra P B +M: Sreekanth Reddy M: Suganath Prabu Subramani L: MPT-FusionLinux.pdl@broadcom.com L: linux-scsi@vger.kernel.org From e869f8ea6a643103faad8949ae7153cab6bfa702 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 28 Apr 2020 05:25:02 -0400 Subject: [PATCH 259/562] scsi: mpt3sas: Disable DIF when prot_mask set to zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By default DIF Type 1, DIF Type 2 & DIF Type 3 will be enabled. Also, users can enable either DIF Type 1 or DIF Type 2 or DIF Type 3 or in any combination using the prot_mask module parameter. However, when the user provides a prot_mask module parameter value of zero, then the driver is not disabling the DIF. Instead it enables all three types. Modify the driver to disable the DIF support if the user provides a prot_mask module parameter value of zero. Link: https://lore.kernel.org/r/1588065902-2726-1-git-send-email-sreekanth.reddy@broadcom.com Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 6b84a83f1720..08fc4b381056 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -10764,8 +10764,8 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id) } } /* register EEDP capabilities with SCSI layer */ - if (prot_mask > 0) - scsi_host_set_prot(shost, prot_mask); + if (prot_mask >= 0) + scsi_host_set_prot(shost, (prot_mask & 0x07)); else scsi_host_set_prot(shost, SHOST_DIF_TYPE1_PROTECTION | SHOST_DIF_TYPE2_PROTECTION From 9299941716a3082e5335fe751ce433cdb62b26d0 Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Mon, 27 Apr 2020 17:08:20 +0200 Subject: [PATCH 260/562] scsi: target: Add missing emulate_pr attribute to passthrough backends In commit b49d6f788530 ("scsi: target: add emulate_pr backstore attr to toggle PR support") the new attribute emulate_pr was added. passthrough_parse_cdb() uses the attribute's value to distinguish whether reservation commands should be rejected or not. But the new attribute was not added to passthrough_attrib_attrs, so in pscsi and tcmu - the users of passthrough_parse_cdb() - the attribute is not available to change parser's behavior. Link: https://lore.kernel.org/r/20200427150823.15350-2-bstroesser@ts.fujitsu.com Reviewed-by: Mike Christie Signed-off-by: Bodo Stroesser Signed-off-by: Martin K. Petersen --- drivers/target/target_core_configfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index ff82b21fdcce..d8589bb39c53 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -1203,6 +1203,7 @@ struct configfs_attribute *passthrough_attrib_attrs[] = { &attr_hw_block_size, &attr_hw_max_sectors, &attr_hw_queue_depth, + &attr_emulate_pr, &attr_alua_support, &attr_pgr_support, NULL, From 4703b6252b338eb312ba61c5129d872cfe58759f Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Mon, 27 Apr 2020 17:08:21 +0200 Subject: [PATCH 261/562] scsi: target: tcmu: Add attributes enforce_pr_isids and force_pr_aptpl tcmu has not set TRANSPORT_FLAG_PASSTHROUGH_PGR. Therefore the in-core pr emulation is active by default, but there are some attributes for configuration missing. Add them. Link: https://lore.kernel.org/r/20200427150823.15350-3-bstroesser@ts.fujitsu.com Reviewed-by: Mike Christie Signed-off-by: Bodo Stroesser Signed-off-by: Martin K. Petersen --- drivers/target/target_core_configfs.c | 11 +++++++++++ drivers/target/target_core_user.c | 19 +++++++++---------- include/target/target_core_backend.h | 1 + 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index d8589bb39c53..efa92d836946 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -1210,6 +1210,17 @@ struct configfs_attribute *passthrough_attrib_attrs[] = { }; EXPORT_SYMBOL(passthrough_attrib_attrs); +/* + * pr related dev_attrib attributes for devices passing through CDBs, + * but allowing in core pr emulation. + */ +struct configfs_attribute *passthrough_pr_attrib_attrs[] = { + &attr_enforce_pr_isids, + &attr_force_pr_aptpl, + NULL, +}; +EXPORT_SYMBOL(passthrough_pr_attrib_attrs); + TB_CIT_SETUP_DRV(dev_attrib, NULL, NULL); TB_CIT_SETUP_DRV(dev_action, NULL, NULL); diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 0b9dfa6b17bc..4b190d600d2f 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -2752,12 +2752,12 @@ static int __init tcmu_module_init(void) goto out_unreg_device; } - for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) { + for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) len += sizeof(struct configfs_attribute *); - } - for (i = 0; tcmu_attrib_attrs[i] != NULL; i++) { + for (i = 0; passthrough_pr_attrib_attrs[i] != NULL; i++) + len += sizeof(struct configfs_attribute *); + for (i = 0; tcmu_attrib_attrs[i] != NULL; i++) len += sizeof(struct configfs_attribute *); - } len += sizeof(struct configfs_attribute *); tcmu_attrs = kzalloc(len, GFP_KERNEL); @@ -2766,13 +2766,12 @@ static int __init tcmu_module_init(void) goto out_unreg_genl; } - for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) { + for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) tcmu_attrs[i] = passthrough_attrib_attrs[i]; - } - for (k = 0; tcmu_attrib_attrs[k] != NULL; k++) { - tcmu_attrs[i] = tcmu_attrib_attrs[k]; - i++; - } + for (k = 0; passthrough_pr_attrib_attrs[k] != NULL; k++) + tcmu_attrs[i++] = passthrough_pr_attrib_attrs[k]; + for (k = 0; tcmu_attrib_attrs[k] != NULL; k++) + tcmu_attrs[i++] = tcmu_attrib_attrs[k]; tcmu_ops.tb_dev_attrib_attrs = tcmu_attrs; ret = transport_backend_register(&tcmu_ops); diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 1b752d8ea529..8177667eba6d 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -94,6 +94,7 @@ int transport_set_vpd_ident(struct t10_vpd *, unsigned char *); extern struct configfs_attribute *sbc_attrib_attrs[]; extern struct configfs_attribute *passthrough_attrib_attrs[]; +extern struct configfs_attribute *passthrough_pr_attrib_attrs[]; /* core helpers also used by command snooping in pscsi */ void *transport_kmap_data_sg(struct se_cmd *); From 69088a049488171bc05394799b048c8536e7dbab Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Mon, 27 Apr 2020 17:08:22 +0200 Subject: [PATCH 262/562] scsi: target: Make transport_flags per device pgr_support and alua_support device attributes show the inverted value of the transport_flags: * TRANSPORT_FLAG_PASSTHROUGH_PGR * TRANSPORT_FLAG_PASSTHROUGH_ALUA These attributes are per device, while the flags are per backend. Rename the transport_flags in backend/transport to transport_flags_default and use this value to initialize the new transport_flags field in the se_device structure. Now data and attribute both are per se_device. Link: https://lore.kernel.org/r/20200427150823.15350-4-bstroesser@ts.fujitsu.com Reviewed-by: Mike Christie Signed-off-by: Bodo Stroesser Signed-off-by: Martin K. Petersen --- drivers/target/target_core_alua.c | 10 +++++----- drivers/target/target_core_configfs.c | 14 +++++++------- drivers/target/target_core_device.c | 3 ++- drivers/target/target_core_pr.c | 2 +- drivers/target/target_core_pscsi.c | 6 +++--- drivers/target/target_core_tpg.c | 3 +-- drivers/target/target_core_transport.c | 6 +++--- drivers/target/target_core_user.c | 2 +- include/target/target_core_backend.h | 2 +- include/target/target_core_base.h | 1 + 10 files changed, 25 insertions(+), 24 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 385e4cf9cfa6..6b72afee2f8b 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -677,7 +677,7 @@ target_alua_state_check(struct se_cmd *cmd) if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE) return 0; - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA) + if (dev->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA) return 0; /* @@ -1090,7 +1090,7 @@ int core_alua_do_port_transition( struct t10_alua_tg_pt_gp *tg_pt_gp; int primary, valid_states, rc = 0; - if (l_dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA) + if (l_dev->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA) return -ENODEV; valid_states = l_tg_pt_gp->tg_pt_gp_alua_supported_states; @@ -1920,7 +1920,7 @@ ssize_t core_alua_store_tg_pt_gp_info( unsigned char buf[TG_PT_GROUP_NAME_BUF]; int move = 0; - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA || + if (dev->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA || (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) return -ENODEV; @@ -2177,7 +2177,7 @@ ssize_t core_alua_store_offline_bit( unsigned long tmp; int ret; - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA || + if (dev->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA || (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) return -ENODEV; @@ -2263,7 +2263,7 @@ ssize_t core_alua_store_secondary_write_metadata( int core_setup_alua(struct se_device *dev) { - if (!(dev->transport->transport_flags & + if (!(dev->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA) && !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) { struct t10_alua_lu_gp_member *lu_gp_mem; diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index efa92d836946..279989e32e64 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -1099,7 +1099,7 @@ static ssize_t block_size_store(struct config_item *item, static ssize_t alua_support_show(struct config_item *item, char *page) { struct se_dev_attrib *da = to_attrib(item); - u8 flags = da->da_dev->transport->transport_flags; + u8 flags = da->da_dev->transport_flags; return snprintf(page, PAGE_SIZE, "%d\n", flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA ? 0 : 1); @@ -1108,7 +1108,7 @@ static ssize_t alua_support_show(struct config_item *item, char *page) static ssize_t pgr_support_show(struct config_item *item, char *page) { struct se_dev_attrib *da = to_attrib(item); - u8 flags = da->da_dev->transport->transport_flags; + u8 flags = da->da_dev->transport_flags; return snprintf(page, PAGE_SIZE, "%d\n", flags & TRANSPORT_FLAG_PASSTHROUGH_PGR ? 0 : 1); @@ -1654,7 +1654,7 @@ static ssize_t target_pr_res_holder_show(struct config_item *item, char *page) if (!dev->dev_attrib.emulate_pr) return sprintf(page, "SPC_RESERVATIONS_DISABLED\n"); - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) + if (dev->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) return sprintf(page, "Passthrough\n"); spin_lock(&dev->dev_reservation_lock); @@ -1796,7 +1796,7 @@ static ssize_t target_pr_res_type_show(struct config_item *item, char *page) if (!dev->dev_attrib.emulate_pr) return sprintf(page, "SPC_RESERVATIONS_DISABLED\n"); - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) + if (dev->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) return sprintf(page, "SPC_PASSTHROUGH\n"); if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) return sprintf(page, "SPC2_RESERVATIONS\n"); @@ -1810,7 +1810,7 @@ static ssize_t target_pr_res_aptpl_active_show(struct config_item *item, struct se_device *dev = pr_to_dev(item); if (!dev->dev_attrib.emulate_pr || - (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)) + (dev->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)) return 0; return sprintf(page, "APTPL Bit Status: %s\n", @@ -1823,7 +1823,7 @@ static ssize_t target_pr_res_aptpl_metadata_show(struct config_item *item, struct se_device *dev = pr_to_dev(item); if (!dev->dev_attrib.emulate_pr || - (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)) + (dev->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)) return 0; return sprintf(page, "Ready to process PR APTPL metadata..\n"); @@ -1870,7 +1870,7 @@ static ssize_t target_pr_res_aptpl_metadata_store(struct config_item *item, u8 type = 0; if (!dev->dev_attrib.emulate_pr || - (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)) + (dev->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)) return count; if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) return count; diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 4cee1138284b..d5800cf22df7 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -732,6 +732,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) dev->se_hba = hba; dev->transport = hba->backend->ops; + dev->transport_flags = dev->transport->transport_flags_default; dev->prot_length = sizeof(struct t10_pi_tuple); dev->hba_index = hba->hba_index; @@ -1100,7 +1101,7 @@ passthrough_parse_cdb(struct se_cmd *cmd, * emulate the response, since tcmu does not have the information * required to process these commands. */ - if (!(dev->transport->transport_flags & + if (!(dev->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)) { if (cdb[0] == PERSISTENT_RESERVE_IN) { cmd->execute_cmd = target_scsi3_emulate_pr_in; diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 5e931690e697..91e41cc55704 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -4086,7 +4086,7 @@ target_check_reservation(struct se_cmd *cmd) return 0; if (!dev->dev_attrib.emulate_pr) return 0; - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) + if (dev->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) return 0; spin_lock(&dev->dev_reservation_lock); diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index c9d92b3e777d..4e37fa9b409d 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -1070,9 +1070,9 @@ static void pscsi_req_done(struct request *req, blk_status_t status) static const struct target_backend_ops pscsi_ops = { .name = "pscsi", .owner = THIS_MODULE, - .transport_flags = TRANSPORT_FLAG_PASSTHROUGH | - TRANSPORT_FLAG_PASSTHROUGH_ALUA | - TRANSPORT_FLAG_PASSTHROUGH_PGR, + .transport_flags_default = TRANSPORT_FLAG_PASSTHROUGH | + TRANSPORT_FLAG_PASSTHROUGH_ALUA | + TRANSPORT_FLAG_PASSTHROUGH_PGR, .attach_hba = pscsi_attach_hba, .detach_hba = pscsi_detach_hba, .pmode_enable_hba = pscsi_pmode_enable_hba, diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index d24e0a3ba3ff..62aa5fa63ac0 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -582,8 +582,7 @@ int core_tpg_add_lun( if (ret) goto out_kill_ref; - if (!(dev->transport->transport_flags & - TRANSPORT_FLAG_PASSTHROUGH_ALUA) && + if (!(dev->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA) && !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) target_attach_tg_pt_gp(lun, dev->t10_alua.default_tg_pt_gp); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 594b724bbf79..e6b448f43071 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1397,7 +1397,7 @@ transport_check_alloc_task_attr(struct se_cmd *cmd) * Check if SAM Task Attribute emulation is enabled for this * struct se_device storage object */ - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) + if (dev->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; if (cmd->sam_task_attr == TCM_ACA_TAG) { @@ -2012,7 +2012,7 @@ static bool target_handle_task_attr(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) + if (dev->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return false; cmd->se_cmd_flags |= SCF_TASK_ATTR_SET; @@ -2126,7 +2126,7 @@ static void transport_complete_task_attr(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) + if (dev->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return; if (!(cmd->se_cmd_flags & SCF_TASK_ATTR_SET)) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 4b190d600d2f..cb388b28f27e 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -2616,7 +2616,7 @@ static struct configfs_attribute *tcmu_action_attrs[] = { static struct target_backend_ops tcmu_ops = { .name = "user", .owner = THIS_MODULE, - .transport_flags = TRANSPORT_FLAG_PASSTHROUGH, + .transport_flags_default = TRANSPORT_FLAG_PASSTHROUGH, .attach_hba = tcmu_attach_hba, .detach_hba = tcmu_detach_hba, .alloc_device = tcmu_alloc_device, diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 8177667eba6d..959163504f82 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -23,7 +23,7 @@ struct target_backend_ops { char inquiry_rev[4]; struct module *owner; - u8 transport_flags; + u8 transport_flags_default; int (*attach_hba)(struct se_hba *, u32); void (*detach_hba)(struct se_hba *); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 6d4a694f6ea7..18c3f277b770 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -772,6 +772,7 @@ struct se_device { #define DF_USING_UDEV_PATH 0x00000008 #define DF_USING_ALIAS 0x00000010 #define DF_READ_ONLY 0x00000020 + u8 transport_flags; /* Physical device queue depth */ u32 queue_depth; /* Used for SPC-2 reservations enforce of ISIDs */ From 356ba2a8bc8d9f9bd2ee969df0e07b285aebb559 Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Mon, 27 Apr 2020 17:08:23 +0200 Subject: [PATCH 263/562] scsi: target: tcmu: Make pgr_support and alua_support attributes writable Currently in tcmu reservation commands are handled by core's pr implementation (default) or completely rejected (emulate_pr set to 0). We additionally want to be able to do full reservation handling in userspace. Therefore we need a way to set TRANSPORT_FLAG_PASSTHROUGH_PGR. The inverted flag is displayed by attribute pgr_support. Since we moved the flag from transport/backend to se_device in the previous commit, we now can make it changeable per device by allowing to write the attribute. The new field transport_flags_changeable in transport/backend is used to reject writing if not allowed for a backend. Regarding ALUA we also want to be able to passthrough commands to userspace in tcmu. Therefore we need TRANSPORT_FLAG_PASSTHROUGH_ALUA to be changeable, because by setting it we can switch off all ALUA checks in core. So we also set TRANSPORT_FLAG_PASSTHROUGH_ALUA in tcmu's transport_flags_changeable. Of course, ALUA and reservation handling in userspace will work only, if session/nexus information is sent to userspace along with every command. This will be object of a patch series announced by Mike Christie. Link: https://lore.kernel.org/r/20200427150823.15350-5-bstroesser@ts.fujitsu.com Reviewed-by: Mike Christie Signed-off-by: Bodo Stroesser Signed-off-by: Martin K. Petersen --- drivers/target/target_core_configfs.c | 56 ++++++++++++++++++++++++++- drivers/target/target_core_user.c | 2 + include/target/target_core_backend.h | 1 + 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 279989e32e64..f04352285155 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -1105,6 +1105,32 @@ static ssize_t alua_support_show(struct config_item *item, char *page) flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA ? 0 : 1); } +static ssize_t alua_support_store(struct config_item *item, + const char *page, size_t count) +{ + struct se_dev_attrib *da = to_attrib(item); + struct se_device *dev = da->da_dev; + bool flag; + int ret; + + if (!(dev->transport->transport_flags_changeable & + TRANSPORT_FLAG_PASSTHROUGH_ALUA)) { + pr_err("dev[%p]: Unable to change SE Device alua_support:" + " alua_support has fixed value\n", dev); + return -EINVAL; + } + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + if (flag) + dev->transport_flags &= ~TRANSPORT_FLAG_PASSTHROUGH_ALUA; + else + dev->transport_flags |= TRANSPORT_FLAG_PASSTHROUGH_ALUA; + return count; +} + static ssize_t pgr_support_show(struct config_item *item, char *page) { struct se_dev_attrib *da = to_attrib(item); @@ -1114,6 +1140,32 @@ static ssize_t pgr_support_show(struct config_item *item, char *page) flags & TRANSPORT_FLAG_PASSTHROUGH_PGR ? 0 : 1); } +static ssize_t pgr_support_store(struct config_item *item, + const char *page, size_t count) +{ + struct se_dev_attrib *da = to_attrib(item); + struct se_device *dev = da->da_dev; + bool flag; + int ret; + + if (!(dev->transport->transport_flags_changeable & + TRANSPORT_FLAG_PASSTHROUGH_PGR)) { + pr_err("dev[%p]: Unable to change SE Device pgr_support:" + " pgr_support has fixed value\n", dev); + return -EINVAL; + } + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + if (flag) + dev->transport_flags &= ~TRANSPORT_FLAG_PASSTHROUGH_PGR; + else + dev->transport_flags |= TRANSPORT_FLAG_PASSTHROUGH_PGR; + return count; +} + CONFIGFS_ATTR(, emulate_model_alias); CONFIGFS_ATTR(, emulate_dpo); CONFIGFS_ATTR(, emulate_fua_write); @@ -1146,8 +1198,8 @@ CONFIGFS_ATTR(, unmap_granularity); CONFIGFS_ATTR(, unmap_granularity_alignment); CONFIGFS_ATTR(, unmap_zeroes_data); CONFIGFS_ATTR(, max_write_same_len); -CONFIGFS_ATTR_RO(, alua_support); -CONFIGFS_ATTR_RO(, pgr_support); +CONFIGFS_ATTR(, alua_support); +CONFIGFS_ATTR(, pgr_support); /* * dev_attrib attributes for devices using the target core SBC/SPC diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index cb388b28f27e..72ffd359fc4d 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -2617,6 +2617,8 @@ static struct target_backend_ops tcmu_ops = { .name = "user", .owner = THIS_MODULE, .transport_flags_default = TRANSPORT_FLAG_PASSTHROUGH, + .transport_flags_changeable = TRANSPORT_FLAG_PASSTHROUGH_PGR | + TRANSPORT_FLAG_PASSTHROUGH_ALUA, .attach_hba = tcmu_attach_hba, .detach_hba = tcmu_detach_hba, .alloc_device = tcmu_alloc_device, diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 959163504f82..f51452e3b984 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -24,6 +24,7 @@ struct target_backend_ops { struct module *owner; u8 transport_flags_default; + u8 transport_flags_changeable; int (*attach_hba)(struct se_hba *, u32); void (*detach_hba)(struct se_hba *); From 164ba8d2df66735dd4f00d3b85d898a907fa6982 Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Fri, 1 May 2020 14:43:03 -0700 Subject: [PATCH 264/562] scsi: lpfc: Maintain atomic consistency of queue_claimed flag A previous change introduced the atomic use of queue_claimed flag for eq's and cq's. The code works fine, but the clearing of the queue_claimed flag is not atomic. Change queue_claimed = 0 into xchg(&queue_claimed, 0) to be consistent for change under atomicity. Link: https://lore.kernel.org/r/20200501214310.91713-3-jsmart2021@gmail.com Reviewed-by: Hannes Reinecke Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index b6fb665e6ec4..9ce37560f4c0 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -538,7 +538,7 @@ lpfc_sli4_process_eq(struct lpfc_hba *phba, struct lpfc_queue *eq, if (count > eq->EQ_max_eqe) eq->EQ_max_eqe = count; - eq->queue_claimed = 0; + xchg(&eq->queue_claimed, 0); rearm_and_exit: /* Always clear the EQ. */ @@ -13694,7 +13694,7 @@ __lpfc_sli4_process_cq(struct lpfc_hba *phba, struct lpfc_queue *cq, "0369 No entry from completion queue " "qid=%d\n", cq->queue_id); - cq->queue_claimed = 0; + xchg(&cq->queue_claimed, 0); rearm_and_exit: phba->sli4_hba.sli4_write_cq_db(phba, cq, consumed, From b98214f6070ef5052b763b61ac2089a9ac6c8677 Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Fri, 1 May 2020 14:43:04 -0700 Subject: [PATCH 265/562] scsi: lpfc: Remove re-binding of nvme rport during registration The lldd rebinds the ndlp with rport during a nvme rport registration (via nvme_fc_register_remoteport). If rport & ndlp pointers are same as the previous one, the lldd will re-use the ndlp and rport association without re-initialization. This assumption is incorrect. The lldd should be ignorant of whether the returned rport pointer is new or not, and should always assume it is new. Remove the re-binding code, always assumes that rport pointer received from transport is a new pointer. Link: https://lore.kernel.org/r/20200501214310.91713-4-jsmart2021@gmail.com Reviewed-by: Hannes Reinecke Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nvme.c | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 12d2b2775773..c708abc9721c 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2321,38 +2321,6 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) spin_unlock_irq(&vport->phba->hbalock); rport = remote_port->private; if (oldrport) { - /* New remoteport record does not guarantee valid - * host private memory area. - */ - if (oldrport == remote_port->private) { - /* Same remoteport - ndlp should match. - * Just reuse. - */ - lpfc_printf_vlog(ndlp->vport, KERN_INFO, - LOG_NVME_DISC, - "6014 Rebind lport to current " - "remoteport x%px wwpn 0x%llx, " - "Data: x%x x%x x%px x%px x%x " - " x%06x\n", - remote_port, - remote_port->port_name, - remote_port->port_id, - remote_port->port_role, - oldrport->ndlp, - ndlp, - ndlp->nlp_type, - ndlp->nlp_DID); - - /* It's a complete rebind only if the driver - * is registering with the same ndlp. Otherwise - * the driver likely executed a node swap - * prior to this registration and the ndlp to - * remoteport binding needs to be redone. - */ - if (prev_ndlp == ndlp) - return 0; - - } /* Sever the ndlp<->rport association * before dropping the ndlp ref from From f809da6db68a8be49e317f0ccfbced1af9258839 Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Fri, 1 May 2020 14:43:05 -0700 Subject: [PATCH 266/562] scsi: lpfc: Fix negation of else clause in lpfc_prep_node_fc4type Implementation of a previous patch added a condition to an if check that always end up with the if test being true. Execution of the else clause was inadvertently negated. The additional condition check was incorrect and unnecessary after the other modifications had been done in that patch. Remove the check from the if series. Link: https://lore.kernel.org/r/20200501214310.91713-5-jsmart2021@gmail.com Fixes: b95b21193c85 ("scsi: lpfc: Fix loss of remote port after devloss due to lack of RPIs") Cc: # v5.4+ Reviewed-by: Hannes Reinecke Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_ct.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index 2aa578d20f8c..7fce73c39c1c 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -462,7 +462,6 @@ lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type) struct lpfc_nodelist *ndlp; if ((vport->port_type != LPFC_NPIV_PORT) || - (fc4_type == FC_TYPE_FCP) || !(vport->ct_flags & FC_CT_RFF_ID) || !vport->cfg_restrict_login) { ndlp = lpfc_setup_disc_node(vport, Did); From 3048e3e805e36a61cf02f185b02b4144f46d8ff3 Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Fri, 1 May 2020 14:43:06 -0700 Subject: [PATCH 267/562] scsi: lpfc: Change default queue allocation for reduced memory consumption By default, the driver attempts to allocate a hdwq per logical cpu in order to provide good cpu affinity. Some systems have extremely high cpu counts and this can significantly raise memory consumption. In testing on x86 platforms (non-AMD) it is found that sharing of a hdwq by a physical cpu and its HT cpu can occur with little performance degredation. By sharing, the hdwq count can be halved, significantly reducing the memory overhead. Change the default behavior of the driver on non-AMD x86 platforms to share a hdwq by the cpu and its HT cpu. Link: https://lore.kernel.org/r/20200501214310.91713-6-jsmart2021@gmail.com Reviewed-by: Hannes Reinecke Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 23 ++++++-- drivers/scsi/lpfc/lpfc_attr.c | 106 +++++++++++++++++++++++++++------- drivers/scsi/lpfc/lpfc_init.c | 82 +++++++++++--------------- drivers/scsi/lpfc/lpfc_sli4.h | 2 +- 4 files changed, 137 insertions(+), 76 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 8e2a356911a9..45657a7502f6 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -627,6 +627,19 @@ struct lpfc_ras_fwlog { enum ras_state state; /* RAS logging running state */ }; +enum lpfc_irq_chann_mode { + /* Assign IRQs to all possible cpus that have hardware queues */ + NORMAL_MODE, + + /* Assign IRQs only to cpus on the same numa node as HBA */ + NUMA_MODE, + + /* Assign IRQs only on non-hyperthreaded CPUs. This is the + * same as normal_mode, but assign IRQS only on physical CPUs. + */ + NHT_MODE, +}; + struct lpfc_hba { /* SCSI interface function jump table entries */ struct lpfc_io_buf * (*lpfc_get_scsi_buf) @@ -835,7 +848,6 @@ struct lpfc_hba { uint32_t cfg_fcp_mq_threshold; uint32_t cfg_hdw_queue; uint32_t cfg_irq_chann; - uint32_t cfg_irq_numa; uint32_t cfg_suppress_rsp; uint32_t cfg_nvme_oas; uint32_t cfg_nvme_embed_cmd; @@ -1003,6 +1015,7 @@ struct lpfc_hba { mempool_t *active_rrq_pool; struct fc_host_statistics link_stats; + enum lpfc_irq_chann_mode irq_chann_mode; enum intr_type_t intr_type; uint32_t intr_mode; #define LPFC_INTR_ERROR 0xFFFFFFFF @@ -1314,19 +1327,19 @@ lpfc_phba_elsring(struct lpfc_hba *phba) } /** - * lpfc_next_online_numa_cpu - Finds next online CPU on NUMA node - * @numa_mask: Pointer to phba's numa_mask member. + * lpfc_next_online_cpu - Finds next online CPU on cpumask + * @mask: Pointer to phba's cpumask member. * @start: starting cpu index * * Note: If no valid cpu found, then nr_cpu_ids is returned. * **/ static inline unsigned int -lpfc_next_online_numa_cpu(const struct cpumask *numa_mask, unsigned int start) +lpfc_next_online_cpu(const struct cpumask *mask, unsigned int start) { unsigned int cpu_it; - for_each_cpu_wrap(cpu_it, numa_mask, start) { + for_each_cpu_wrap(cpu_it, mask, start) { if (cpu_online(cpu_it)) break; } diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 1354c141d614..2791efa770af 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -5704,17 +5704,69 @@ LPFC_ATTR_R(hdw_queue, LPFC_HBA_HDWQ_MIN, LPFC_HBA_HDWQ_MAX, "Set the number of I/O Hardware Queues"); -static inline void -lpfc_assign_default_irq_numa(struct lpfc_hba *phba) +#if IS_ENABLED(CONFIG_X86) +/** + * lpfc_cpumask_irq_mode_init - initalizes cpumask of phba based on + * irq_chann_mode + * @phba: Pointer to HBA context object. + **/ +static void +lpfc_cpumask_irq_mode_init(struct lpfc_hba *phba) +{ + unsigned int cpu, first_cpu, numa_node = NUMA_NO_NODE; + const struct cpumask *sibling_mask; + struct cpumask *aff_mask = &phba->sli4_hba.irq_aff_mask; + + cpumask_clear(aff_mask); + + if (phba->irq_chann_mode == NUMA_MODE) { + /* Check if we're a NUMA architecture */ + numa_node = dev_to_node(&phba->pcidev->dev); + if (numa_node == NUMA_NO_NODE) { + phba->irq_chann_mode = NORMAL_MODE; + return; + } + } + + for_each_possible_cpu(cpu) { + switch (phba->irq_chann_mode) { + case NUMA_MODE: + if (cpu_to_node(cpu) == numa_node) + cpumask_set_cpu(cpu, aff_mask); + break; + case NHT_MODE: + sibling_mask = topology_sibling_cpumask(cpu); + first_cpu = cpumask_first(sibling_mask); + if (first_cpu < nr_cpu_ids) + cpumask_set_cpu(first_cpu, aff_mask); + break; + default: + break; + } + } +} +#endif + +static void +lpfc_assign_default_irq_chann(struct lpfc_hba *phba) { #if IS_ENABLED(CONFIG_X86) - /* If AMD architecture, then default is LPFC_IRQ_CHANN_NUMA */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - phba->cfg_irq_numa = 1; - else - phba->cfg_irq_numa = 0; + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + /* If AMD architecture, then default is NUMA_MODE */ + phba->irq_chann_mode = NUMA_MODE; + break; + case X86_VENDOR_INTEL: + /* If Intel architecture, then default is no hyperthread mode */ + phba->irq_chann_mode = NHT_MODE; + break; + default: + phba->irq_chann_mode = NORMAL_MODE; + break; + } + lpfc_cpumask_irq_mode_init(phba); #else - phba->cfg_irq_numa = 0; + phba->irq_chann_mode = NORMAL_MODE; #endif } @@ -5726,6 +5778,7 @@ lpfc_assign_default_irq_numa(struct lpfc_hba *phba) * * 0 = Configure number of IRQ Channels to: * if AMD architecture, number of CPUs on HBA's NUMA node + * if Intel architecture, number of physical CPUs. * otherwise, number of active CPUs. * [1,256] = Manually specify how many IRQ Channels to use. * @@ -5751,35 +5804,44 @@ MODULE_PARM_DESC(lpfc_irq_chann, "Set number of interrupt vectors to allocate"); static int lpfc_irq_chann_init(struct lpfc_hba *phba, uint32_t val) { - const struct cpumask *numa_mask; + const struct cpumask *aff_mask; if (phba->cfg_use_msi != 2) { lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "8532 use_msi = %u ignoring cfg_irq_numa\n", phba->cfg_use_msi); - phba->cfg_irq_numa = 0; - phba->cfg_irq_chann = LPFC_IRQ_CHANN_MIN; + phba->irq_chann_mode = NORMAL_MODE; + phba->cfg_irq_chann = LPFC_IRQ_CHANN_DEF; return 0; } /* Check if default setting was passed */ if (val == LPFC_IRQ_CHANN_DEF) - lpfc_assign_default_irq_numa(phba); + lpfc_assign_default_irq_chann(phba); - if (phba->cfg_irq_numa) { - numa_mask = &phba->sli4_hba.numa_mask; + if (phba->irq_chann_mode != NORMAL_MODE) { + aff_mask = &phba->sli4_hba.irq_aff_mask; - if (cpumask_empty(numa_mask)) { + if (cpumask_empty(aff_mask)) { lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "8533 Could not identify NUMA node, " - "ignoring cfg_irq_numa\n"); - phba->cfg_irq_numa = 0; - phba->cfg_irq_chann = LPFC_IRQ_CHANN_MIN; + "8533 Could not identify CPUS for " + "mode %d, ignoring\n", + phba->irq_chann_mode); + phba->irq_chann_mode = NORMAL_MODE; + phba->cfg_irq_chann = LPFC_IRQ_CHANN_DEF; } else { - phba->cfg_irq_chann = cpumask_weight(numa_mask); + phba->cfg_irq_chann = cpumask_weight(aff_mask); + + /* If no hyperthread mode, then set hdwq count to + * aff_mask weight as well + */ + if (phba->irq_chann_mode == NHT_MODE) + phba->cfg_hdw_queue = phba->cfg_irq_chann; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "8543 lpfc_irq_chann set to %u " - "(numa)\n", phba->cfg_irq_chann); + "(mode: %d)\n", phba->cfg_irq_chann, + phba->irq_chann_mode); } } else { if (val > LPFC_IRQ_CHANN_MAX) { @@ -5790,7 +5852,7 @@ lpfc_irq_chann_init(struct lpfc_hba *phba, uint32_t val) val, LPFC_IRQ_CHANN_MIN, LPFC_IRQ_CHANN_MAX); - phba->cfg_irq_chann = LPFC_IRQ_CHANN_MIN; + phba->cfg_irq_chann = LPFC_IRQ_CHANN_DEF; return -EINVAL; } phba->cfg_irq_chann = val; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 4104bdcdbb6f..8b8530351843 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -6022,29 +6022,6 @@ static void lpfc_log_intr_mode(struct lpfc_hba *phba, uint32_t intr_mode) return; } -/** - * lpfc_cpumask_of_node_init - initalizes cpumask of phba's NUMA node - * @phba: Pointer to HBA context object. - * - **/ -static void -lpfc_cpumask_of_node_init(struct lpfc_hba *phba) -{ - unsigned int cpu, numa_node; - struct cpumask *numa_mask = &phba->sli4_hba.numa_mask; - - cpumask_clear(numa_mask); - - /* Check if we're a NUMA architecture */ - numa_node = dev_to_node(&phba->pcidev->dev); - if (numa_node == NUMA_NO_NODE) - return; - - for_each_possible_cpu(cpu) - if (cpu_to_node(cpu) == numa_node) - cpumask_set_cpu(cpu, numa_mask); -} - /** * lpfc_enable_pci_dev - Enable a generic PCI device. * @phba: pointer to lpfc hba data structure. @@ -6483,7 +6460,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) phba->sli4_hba.num_present_cpu = lpfc_present_cpu; phba->sli4_hba.num_possible_cpu = cpumask_last(cpu_possible_mask) + 1; phba->sli4_hba.curr_disp_cpu = 0; - lpfc_cpumask_of_node_init(phba); /* Get all the module params for configuring this host */ lpfc_get_cfgparam(phba); @@ -6691,6 +6667,13 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) #endif /* Not supported for NVMET */ phba->cfg_xri_rebalancing = 0; + if (phba->irq_chann_mode == NHT_MODE) { + phba->cfg_irq_chann = + phba->sli4_hba.num_present_cpu; + phba->cfg_hdw_queue = + phba->sli4_hba.num_present_cpu; + phba->irq_chann_mode = NORMAL_MODE; + } break; } } @@ -7032,7 +7015,7 @@ lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba) phba->sli4_hba.num_possible_cpu = 0; phba->sli4_hba.num_present_cpu = 0; phba->sli4_hba.curr_disp_cpu = 0; - cpumask_clear(&phba->sli4_hba.numa_mask); + cpumask_clear(&phba->sli4_hba.irq_aff_mask); /* Free memory allocated for fast-path work queue handles */ kfree(phba->sli4_hba.hba_eq_hdl); @@ -11287,11 +11270,12 @@ lpfc_irq_clear_aff(struct lpfc_hba_eq_hdl *eqhdl) * @offline: true, cpu is going offline. false, cpu is coming online. * * If cpu is going offline, we'll try our best effort to find the next - * online cpu on the phba's NUMA node and migrate all offlining IRQ affinities. + * online cpu on the phba's original_mask and migrate all offlining IRQ + * affinities. * - * If cpu is coming online, reaffinitize the IRQ back to the onlineng cpu. + * If cpu is coming online, reaffinitize the IRQ back to the onlining cpu. * - * Note: Call only if cfg_irq_numa is enabled, otherwise rely on + * Note: Call only if NUMA or NHT mode is enabled, otherwise rely on * PCI_IRQ_AFFINITY to auto-manage IRQ affinity. * **/ @@ -11301,14 +11285,14 @@ lpfc_irq_rebalance(struct lpfc_hba *phba, unsigned int cpu, bool offline) struct lpfc_vector_map_info *cpup; struct cpumask *aff_mask; unsigned int cpu_select, cpu_next, idx; - const struct cpumask *numa_mask; + const struct cpumask *orig_mask; - if (!phba->cfg_irq_numa) + if (phba->irq_chann_mode == NORMAL_MODE) return; - numa_mask = &phba->sli4_hba.numa_mask; + orig_mask = &phba->sli4_hba.irq_aff_mask; - if (!cpumask_test_cpu(cpu, numa_mask)) + if (!cpumask_test_cpu(cpu, orig_mask)) return; cpup = &phba->sli4_hba.cpu_map[cpu]; @@ -11317,9 +11301,9 @@ lpfc_irq_rebalance(struct lpfc_hba *phba, unsigned int cpu, bool offline) return; if (offline) { - /* Find next online CPU on NUMA node */ - cpu_next = cpumask_next_wrap(cpu, numa_mask, cpu, true); - cpu_select = lpfc_next_online_numa_cpu(numa_mask, cpu_next); + /* Find next online CPU on original mask */ + cpu_next = cpumask_next_wrap(cpu, orig_mask, cpu, true); + cpu_select = lpfc_next_online_cpu(orig_mask, cpu_next); /* Found a valid CPU */ if ((cpu_select < nr_cpu_ids) && (cpu_select != cpu)) { @@ -11434,7 +11418,7 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba) { int vectors, rc, index; char *name; - const struct cpumask *numa_mask = NULL; + const struct cpumask *aff_mask = NULL; unsigned int cpu = 0, cpu_cnt = 0, cpu_select = nr_cpu_ids; struct lpfc_hba_eq_hdl *eqhdl; const struct cpumask *maskp; @@ -11444,16 +11428,18 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba) /* Set up MSI-X multi-message vectors */ vectors = phba->cfg_irq_chann; - if (phba->cfg_irq_numa) { - numa_mask = &phba->sli4_hba.numa_mask; - cpu_cnt = cpumask_weight(numa_mask); + if (phba->irq_chann_mode != NORMAL_MODE) + aff_mask = &phba->sli4_hba.irq_aff_mask; + + if (aff_mask) { + cpu_cnt = cpumask_weight(aff_mask); vectors = min(phba->cfg_irq_chann, cpu_cnt); - /* cpu: iterates over numa_mask including offline or online - * cpu_select: iterates over online numa_mask to set affinity + /* cpu: iterates over aff_mask including offline or online + * cpu_select: iterates over online aff_mask to set affinity */ - cpu = cpumask_first(numa_mask); - cpu_select = lpfc_next_online_numa_cpu(numa_mask, cpu); + cpu = cpumask_first(aff_mask); + cpu_select = lpfc_next_online_cpu(aff_mask, cpu); } else { flags |= PCI_IRQ_AFFINITY; } @@ -11487,7 +11473,7 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba) eqhdl->irq = pci_irq_vector(phba->pcidev, index); - if (phba->cfg_irq_numa) { + if (aff_mask) { /* If found a neighboring online cpu, set affinity */ if (cpu_select < nr_cpu_ids) lpfc_irq_set_aff(eqhdl, cpu_select); @@ -11497,11 +11483,11 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba) LPFC_CPU_FIRST_IRQ, cpu); - /* Iterate to next offline or online cpu in numa_mask */ - cpu = cpumask_next(cpu, numa_mask); + /* Iterate to next offline or online cpu in aff_mask */ + cpu = cpumask_next(cpu, aff_mask); - /* Find next online cpu in numa_mask to set affinity */ - cpu_select = lpfc_next_online_numa_cpu(numa_mask, cpu); + /* Find next online cpu in aff_mask to set affinity */ + cpu_select = lpfc_next_online_cpu(aff_mask, cpu); } else if (vectors == 1) { cpu = cpumask_first(cpu_present_mask); lpfc_assign_eq_map_info(phba, index, LPFC_CPU_FIRST_IRQ, diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 8da7429e385a..4decb53d81c3 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -920,7 +920,7 @@ struct lpfc_sli4_hba { struct lpfc_vector_map_info *cpu_map; uint16_t num_possible_cpu; uint16_t num_present_cpu; - struct cpumask numa_mask; + struct cpumask irq_aff_mask; uint16_t curr_disp_cpu; struct lpfc_eq_intr_info __percpu *eq_info; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS From 88acb4d9ff981d3ff6ea307e62a08b8739f8ebcd Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Fri, 1 May 2020 14:43:07 -0700 Subject: [PATCH 268/562] scsi: lpfc: Remove unnecessary lockdep_assert_held calls In an audit of lockdep calls in the driver, there are multiple lockdep checks in successive calling layers. E.g. a routine checks, and then calls a lower routine that also checks, and so on. Calling sequences result in many redundant checks. Refine the code to remove lower-level lockdep checks. Update comments on the lock, correcting a few places where lock object in comment was incorrect. Link: https://lore.kernel.org/r/20200501214310.91713-7-jsmart2021@gmail.com Reviewed-by: Hannes Reinecke Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_hbadisc.c | 8 +++----- drivers/scsi/lpfc/lpfc_sli.c | 33 +++++++++++++++----------------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 789eecbf32eb..a8f4ced0fc41 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -1356,14 +1356,14 @@ lpfc_vlan_id_match(uint16_t curr_vlan_id, uint16_t new_vlan_id) } /** - * lpfc_update_fcf_record - Update driver fcf record * __lpfc_update_fcf_record_pri - update the lpfc_fcf_pri record. * @phba: pointer to lpfc hba data structure. * @fcf_index: Index for the lpfc_fcf_record. * @new_fcf_record: pointer to hba fcf record. * * This routine updates the driver FCF priority record from the new HBA FCF - * record. This routine is called with the host lock held. + * record. The hbalock is asserted held in the code path calling this + * routine. **/ static void __lpfc_update_fcf_record_pri(struct lpfc_hba *phba, uint16_t fcf_index, @@ -1372,8 +1372,6 @@ __lpfc_update_fcf_record_pri(struct lpfc_hba *phba, uint16_t fcf_index, { struct lpfc_fcf_pri *fcf_pri; - lockdep_assert_held(&phba->hbalock); - fcf_pri = &phba->fcf.fcf_pri[fcf_index]; fcf_pri->fcf_rec.fcf_index = fcf_index; /* FCF record priority */ @@ -1451,7 +1449,7 @@ lpfc_copy_fcf_record(struct lpfc_fcf_rec *fcf_rec, * * This routine updates the driver FCF record from the new HBA FCF record * together with the address mode, vlan_id, and other informations. This - * routine is called with the host lock held. + * routine is called with the hbalock held. **/ static void __lpfc_update_fcf_record(struct lpfc_hba *phba, struct lpfc_fcf_rec *fcf_rec, diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 9ce37560f4c0..df77e75b9f53 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -1248,8 +1248,8 @@ lpfc_sli_get_iocbq(struct lpfc_hba *phba) * @phba: Pointer to HBA context object. * @iocbq: Pointer to driver iocb object. * - * This function is called with hbalock held to release driver - * iocb object to the iocb pool. The iotag in the iocb object + * This function is called to release the driver iocb object + * to the iocb pool. The iotag in the iocb object * does not change for each use of the iocb object. This function * clears all other fields of the iocb object when it is freed. * The sqlq structure that holds the xritag and phys and virtual @@ -1259,7 +1259,8 @@ lpfc_sli_get_iocbq(struct lpfc_hba *phba) * this IO was aborted then the sglq entry it put on the * lpfc_abts_els_sgl_list until the CQ_ABORTED_XRI is received. If the * IO has good status or fails for any other reason then the sglq - * entry is added to the free list (lpfc_els_sgl_list). + * entry is added to the free list (lpfc_els_sgl_list). The hbalock is + * asserted held in the code path calling this routine. **/ static void __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) @@ -1269,8 +1270,6 @@ __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) unsigned long iflag = 0; struct lpfc_sli_ring *pring; - lockdep_assert_held(&phba->hbalock); - if (iocbq->sli4_xritag == NO_XRI) sglq = NULL; else @@ -1333,18 +1332,17 @@ __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) * @phba: Pointer to HBA context object. * @iocbq: Pointer to driver iocb object. * - * This function is called with hbalock held to release driver - * iocb object to the iocb pool. The iotag in the iocb object - * does not change for each use of the iocb object. This function - * clears all other fields of the iocb object when it is freed. + * This function is called to release the driver iocb object to the + * iocb pool. The iotag in the iocb object does not change for each + * use of the iocb object. This function clears all other fields of + * the iocb object when it is freed. The hbalock is asserted held in + * the code path calling this routine. **/ static void __lpfc_sli_release_iocbq_s3(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) { size_t start_clean = offsetof(struct lpfc_iocbq, iocb); - lockdep_assert_held(&phba->hbalock); - /* * Clean all volatile data fields, preserve iotag and node struct. */ @@ -1789,17 +1787,17 @@ lpfc_sli_next_iotag(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) * @nextiocb: Pointer to driver iocb object which need to be * posted to firmware. * - * This function is called with hbalock held to post a new iocb to - * the firmware. This function copies the new iocb to ring iocb slot and - * updates the ring pointers. It adds the new iocb to txcmplq if there is + * This function is called to post a new iocb to the firmware. This + * function copies the new iocb to ring iocb slot and updates the + * ring pointers. It adds the new iocb to txcmplq if there is * a completion call back for this iocb else the function will free the - * iocb object. + * iocb object. The hbalock is asserted held in the code path calling + * this routine. **/ static void lpfc_sli_submit_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, IOCB_t *iocb, struct lpfc_iocbq *nextiocb) { - lockdep_assert_held(&phba->hbalock); /* * Set up an iotag */ @@ -11170,6 +11168,7 @@ lpfc_ignore_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, * request, this function issues abort out unconditionally. This function is * called with hbalock held. The function returns 0 when it fails due to * memory allocation failure or when the command iocb is an abort request. + * The hbalock is asserted held in the code path calling this routine. **/ static int lpfc_sli_abort_iotag_issue(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, @@ -11183,8 +11182,6 @@ lpfc_sli_abort_iotag_issue(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, unsigned long iflags; struct lpfc_nodelist *ndlp; - lockdep_assert_held(&phba->hbalock); - /* * There are certain command types we don't want to abort. And we * don't want to abort commands that are already in the process of From a7fc071ab56e59e10a6ca868fad91d9677083dba Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Fri, 1 May 2020 14:43:08 -0700 Subject: [PATCH 269/562] scsi: lpfc: Fix noderef and address space warnings Running make C=1 M=drivers/scsi/lpfc triggers sparse warnings Correct the code generating the following errors: - Incompatible address space assignment without proper conversion. - Deference of usespace and per-cpu pointers. Link: https://lore.kernel.org/r/20200501214310.91713-8-jsmart2021@gmail.com Reviewed-by: Hannes Reinecke Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_debugfs.c | 3 ++- drivers/scsi/lpfc/lpfc_mbox.c | 3 ++- drivers/scsi/lpfc/lpfc_sli.c | 8 ++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 8a6e02aa553f..24d946ef8609 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -2436,7 +2436,8 @@ lpfc_debugfs_dif_err_write(struct file *file, const char __user *buf, return 0; if (dent == phba->debug_InjErrLBA) { - if ((buf[0] == 'o') && (buf[1] == 'f') && (buf[2] == 'f')) + if ((dstbuf[0] == 'o') && (dstbuf[1] == 'f') && + (dstbuf[2] == 'f')) tmp = (uint64_t)(-1); } diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c index e35b52b66d6c..e34e0f11bfdd 100644 --- a/drivers/scsi/lpfc/lpfc_mbox.c +++ b/drivers/scsi/lpfc/lpfc_mbox.c @@ -1378,7 +1378,8 @@ lpfc_config_port(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) */ if (phba->cfg_hostmem_hgp && phba->sli_rev != 3) { - phba->host_gp = &phba->mbox->us.s2.host[0]; + phba->host_gp = (struct lpfc_hgp __iomem *) + &phba->mbox->us.s2.host[0]; phba->hbq_put = NULL; offset = (uint8_t *)&phba->mbox->us.s2.host - (uint8_t *)phba->slim2p.virt; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index df77e75b9f53..0c9844cac3aa 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -14272,7 +14272,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id) int ecount = 0; int hba_eqidx; struct lpfc_eq_intr_info *eqi; - uint32_t icnt; /* Get the driver's phba structure from the dev_id */ hba_eq_hdl = (struct lpfc_hba_eq_hdl *)dev_id; @@ -14300,11 +14299,12 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id) return IRQ_NONE; } - eqi = phba->sli4_hba.eq_info; - icnt = this_cpu_inc_return(eqi->icnt); + eqi = this_cpu_ptr(phba->sli4_hba.eq_info); + eqi->icnt++; + fpeq->last_cpu = raw_smp_processor_id(); - if (icnt > LPFC_EQD_ISR_TRIGGER && + if (eqi->icnt > LPFC_EQD_ISR_TRIGGER && fpeq->q_flag & HBA_EQ_DELAY_CHK && phba->cfg_auto_imax && fpeq->q_mode != LPFC_MAX_AUTO_EQ_DELAY && From 8cdc5a223ed06b47e7e4045f765f2813cae3f5e9 Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Fri, 1 May 2020 14:43:09 -0700 Subject: [PATCH 270/562] scsi: lpfc: Fix MDS Diagnostic Enablement definition The MDS diagnostic enablement bit for the adapter interface is incorrect in the driver header. Correct the bit position for the SET_FEATURE MDS bit. Link: https://lore.kernel.org/r/20200501214310.91713-9-jsmart2021@gmail.com Reviewed-by: Hannes Reinecke Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_hw4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index 10c5d1c3122e..6dfff0376547 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -3541,7 +3541,7 @@ struct lpfc_mbx_set_feature { #define lpfc_mbx_set_feature_UER_SHIFT 0 #define lpfc_mbx_set_feature_UER_MASK 0x00000001 #define lpfc_mbx_set_feature_UER_WORD word6 -#define lpfc_mbx_set_feature_mds_SHIFT 0 +#define lpfc_mbx_set_feature_mds_SHIFT 2 #define lpfc_mbx_set_feature_mds_MASK 0x00000001 #define lpfc_mbx_set_feature_mds_WORD word6 #define lpfc_mbx_set_feature_mds_deep_loopbk_SHIFT 1 From 29022b61307f1aab5564ef65dc268ab7f55b699c Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Fri, 1 May 2020 14:43:10 -0700 Subject: [PATCH 271/562] scsi: lpfc: Update lpfc version to 12.8.0.1 Update lpfc version to 12.8.0.1 Link: https://lore.kernel.org/r/20200501214310.91713-10-jsmart2021@gmail.com Reviewed-by: Hannes Reinecke Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index ca40c47cfbe0..ab0bc26c098d 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "12.8.0.0" +#define LPFC_DRIVER_VERSION "12.8.0.1" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ From 8d925b1f00e6894e4c601cb1d395dab3bd66c374 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Sun, 26 Apr 2020 10:42:44 +0800 Subject: [PATCH 272/562] scsi: aacraid: Use memdup_user() as a cleanup Fix coccicheck warning which recommends to use memdup_user(). This patch fixes the following coccicheck warning: drivers/scsi/aacraid/commctrl.c:516:15-22: WARNING opportunity for memdup_user Link: https://lore.kernel.org/r/1587868964-75969-1-git-send-email-zou_wei@huawei.com Fixes: 4645df1035b3 ("[PATCH] aacraid: swapped kmalloc args.") Reported-by: Hulk Robot Signed-off-by: Zou Wei Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/commctrl.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c index ffe41bc111fc..102658bdc15a 100644 --- a/drivers/scsi/aacraid/commctrl.c +++ b/drivers/scsi/aacraid/commctrl.c @@ -513,15 +513,9 @@ static int aac_send_raw_srb(struct aac_dev* dev, void __user * arg) goto cleanup; } - user_srbcmd = kmalloc(fibsize, GFP_KERNEL); - if (!user_srbcmd) { - dprintk((KERN_DEBUG"aacraid: Could not make a copy of the srb\n")); - rcode = -ENOMEM; - goto cleanup; - } - if(copy_from_user(user_srbcmd, user_srb,fibsize)){ - dprintk((KERN_DEBUG"aacraid: Could not copy srb from user\n")); - rcode = -EFAULT; + user_srbcmd = memdup_user(user_srb, fibsize); + if (IS_ERR(user_srbcmd)) { + rcode = PTR_ERR(user_srbcmd); goto cleanup; } From c68a56736c129f5dd1632856956f9c3e04bae200 Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Tue, 28 Apr 2020 20:26:17 +0200 Subject: [PATCH 273/562] scsi: target: loopback: Fix READ with data and sensebytes We use tcm_loop with tape emulations running on tcmu. In case application reads a short tape block with a longer READ, or a long tape block with a short READ, according to SCC spec data has to be tranferred _and_ sensebytes with ILI set and information field containing the residual count. Similar problem also exists when using fixed block size in READ. Up to now tcm_loop is not prepared to handle sensebytes if input data is provided, as in tcm_loop_queue_data_in() it only sets SAM_STAT_GOOD and, if necessary, the residual count. To fix the bug, the same handling for sensebytes as present in tcm_loop_queue_status() must be done in tcm_loop_queue_data_in() also. After adding this handling, the two function now are nearly identical, so I created a single function with two wrappers. Link: https://lore.kernel.org/r/20200428182617.32726-1-bstroesser@ts.fujitsu.com Signed-off-by: Bodo Stroesser Signed-off-by: Martin K. Petersen --- drivers/target/loopback/tcm_loop.c | 36 +++++++++++++----------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 3305b47fdf53..16d5a4e117a2 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -545,32 +545,15 @@ static int tcm_loop_write_pending(struct se_cmd *se_cmd) return 0; } -static int tcm_loop_queue_data_in(struct se_cmd *se_cmd) +static int tcm_loop_queue_data_or_status(const char *func, + struct se_cmd *se_cmd, u8 scsi_status) { struct tcm_loop_cmd *tl_cmd = container_of(se_cmd, struct tcm_loop_cmd, tl_se_cmd); struct scsi_cmnd *sc = tl_cmd->sc; pr_debug("%s() called for scsi_cmnd: %p cdb: 0x%02x\n", - __func__, sc, sc->cmnd[0]); - - sc->result = SAM_STAT_GOOD; - set_host_byte(sc, DID_OK); - if ((se_cmd->se_cmd_flags & SCF_OVERFLOW_BIT) || - (se_cmd->se_cmd_flags & SCF_UNDERFLOW_BIT)) - scsi_set_resid(sc, se_cmd->residual_count); - sc->scsi_done(sc); - return 0; -} - -static int tcm_loop_queue_status(struct se_cmd *se_cmd) -{ - struct tcm_loop_cmd *tl_cmd = container_of(se_cmd, - struct tcm_loop_cmd, tl_se_cmd); - struct scsi_cmnd *sc = tl_cmd->sc; - - pr_debug("%s() called for scsi_cmnd: %p cdb: 0x%02x\n", - __func__, sc, sc->cmnd[0]); + func, sc, sc->cmnd[0]); if (se_cmd->sense_buffer && ((se_cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) || @@ -581,7 +564,7 @@ static int tcm_loop_queue_status(struct se_cmd *se_cmd) sc->result = SAM_STAT_CHECK_CONDITION; set_driver_byte(sc, DRIVER_SENSE); } else - sc->result = se_cmd->scsi_status; + sc->result = scsi_status; set_host_byte(sc, DID_OK); if ((se_cmd->se_cmd_flags & SCF_OVERFLOW_BIT) || @@ -591,6 +574,17 @@ static int tcm_loop_queue_status(struct se_cmd *se_cmd) return 0; } +static int tcm_loop_queue_data_in(struct se_cmd *se_cmd) +{ + return tcm_loop_queue_data_or_status(__func__, se_cmd, SAM_STAT_GOOD); +} + +static int tcm_loop_queue_status(struct se_cmd *se_cmd) +{ + return tcm_loop_queue_data_or_status(__func__, + se_cmd, se_cmd->scsi_status); +} + static void tcm_loop_queue_tm_rsp(struct se_cmd *se_cmd) { struct tcm_loop_cmd *tl_cmd = container_of(se_cmd, From 102026483d2babb1e6f081288c65409186c05330 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 5 May 2020 15:38:07 +0800 Subject: [PATCH 274/562] scsi: bfa: Make bfad_iocmd_ioc_get_stats() static Fix the following sparse warning: drivers/scsi/bfa/bfad_bsg.c:140:1: warning: symbol 'bfad_iocmd_ioc_get_stats' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200505073807.40332-1-yanaijie@huawei.com Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfad_bsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c index a76c968dbac5..412dbe125e10 100644 --- a/drivers/scsi/bfa/bfad_bsg.c +++ b/drivers/scsi/bfa/bfad_bsg.c @@ -136,7 +136,7 @@ bfad_iocmd_ioc_get_attr(struct bfad_s *bfad, void *cmd) return 0; } -int +static int bfad_iocmd_ioc_get_stats(struct bfad_s *bfad, void *cmd) { struct bfa_bsg_ioc_stats_s *iocmd = (struct bfa_bsg_ioc_stats_s *)cmd; From f9491ed56e3a73aaa27accf4600671611dcf18bd Mon Sep 17 00:00:00 2001 From: Xie XiuQi Date: Tue, 5 May 2020 20:19:04 +0800 Subject: [PATCH 275/562] scsi: qedi: Remove unused variable udev & uctrl uctrl and udev are unused after commit 9632a6b4b747 ("scsi: qedi: Move LL2 producer index processing in BH.") Remove them. Link: https://lore.kernel.org/r/20200505121904.25702-1-xiexiuqi@huawei.com Reviewed-by: Lee Duncan Signed-off-by: Xie XiuQi Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 46584e16d635..81a307695cc9 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -662,8 +662,6 @@ static struct qedi_ctx *qedi_host_alloc(struct pci_dev *pdev) static int qedi_ll2_rx(void *cookie, struct sk_buff *skb, u32 arg1, u32 arg2) { struct qedi_ctx *qedi = (struct qedi_ctx *)cookie; - struct qedi_uio_dev *udev; - struct qedi_uio_ctrl *uctrl; struct skb_work_list *work; struct ethhdr *eh; @@ -702,9 +700,6 @@ static int qedi_ll2_rx(void *cookie, struct sk_buff *skb, u32 arg1, u32 arg2) "Allowed frame ethertype [0x%x] len [0x%x].\n", eh->h_proto, skb->len); - udev = qedi->udev; - uctrl = udev->uctrl; - work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) { QEDI_WARN(&qedi->dbg_ctx, From 00b42b70ae521e6ccc86ed403e2d101616689bdc Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 14:21:47 -0500 Subject: [PATCH 276/562] scsi: libsas: Replace zero-length array with flexible-array The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Link: https://lore.kernel.org/r/20200507192147.GA16206@embeddedor Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: Gustavo A. R. Silva Signed-off-by: Martin K. Petersen --- drivers/scsi/aic94xx/aic94xx_sds.c | 14 +++++++------- include/scsi/sas.h | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/aic94xx/aic94xx_sds.c b/drivers/scsi/aic94xx/aic94xx_sds.c index 3ddc8852bc32..105adba559a1 100644 --- a/drivers/scsi/aic94xx/aic94xx_sds.c +++ b/drivers/scsi/aic94xx/aic94xx_sds.c @@ -406,7 +406,7 @@ struct asd_manuf_sec { u8 sas_addr[SAS_ADDR_SIZE]; u8 pcba_sn[ASD_PCBA_SN_SIZE]; /* Here start the other segments */ - u8 linked_list[0]; + u8 linked_list[]; } __attribute__ ((packed)); struct asd_manuf_phy_desc { @@ -449,7 +449,7 @@ struct asd_ms_sb_desc { u8 type; u8 node_desc_index; u8 conn_desc_index; - u8 _recvd[0]; + u8 _recvd[]; } __attribute__ ((packed)); #if 0 @@ -478,12 +478,12 @@ struct asd_ms_conn_desc { u8 size_sideband_desc; u32 _resvd; u8 name[16]; - struct asd_ms_sb_desc sb_desc[0]; + struct asd_ms_sb_desc sb_desc[]; } __attribute__ ((packed)); struct asd_nd_phy_desc { u8 vp_attch_type; - u8 attch_specific[0]; + u8 attch_specific[]; } __attribute__ ((packed)); #if 0 @@ -503,7 +503,7 @@ struct asd_ms_node_desc { u8 size_phy_desc; u8 _resvd; u8 name[16]; - struct asd_nd_phy_desc phy_desc[0]; + struct asd_nd_phy_desc phy_desc[]; } __attribute__ ((packed)); struct asd_ms_conn_map { @@ -518,7 +518,7 @@ struct asd_ms_conn_map { u8 usage_model_id; u32 _resvd; struct asd_ms_conn_desc conn_desc[0]; - struct asd_ms_node_desc node_desc[0]; + struct asd_ms_node_desc node_desc[]; } __attribute__ ((packed)); struct asd_ctrla_phy_entry { @@ -542,7 +542,7 @@ struct asd_ll_el { u8 id0; u8 id1; __le16 next; - u8 something_here[0]; + u8 something_here[]; } __attribute__ ((packed)); static int asd_poll_flash(struct asd_ha_struct *asd_ha) diff --git a/include/scsi/sas.h b/include/scsi/sas.h index a5d8ae49198c..4726c1bbec65 100644 --- a/include/scsi/sas.h +++ b/include/scsi/sas.h @@ -324,7 +324,7 @@ struct ssp_response_iu { __be32 response_data_len; u8 resp_data[0]; - u8 sense_data[0]; + u8 sense_data[]; } __attribute__ ((packed)); struct ssp_command_iu { @@ -346,7 +346,7 @@ struct ssp_command_iu { u8 add_cdb_len:6; u8 cdb[16]; - u8 add_cdb[0]; + u8 add_cdb[]; } __attribute__ ((packed)); struct xfer_rdy_iu { @@ -555,7 +555,7 @@ struct ssp_response_iu { __be32 response_data_len; u8 resp_data[0]; - u8 sense_data[0]; + u8 sense_data[]; } __attribute__ ((packed)); struct ssp_command_iu { @@ -577,7 +577,7 @@ struct ssp_command_iu { u8 _r_c:2; u8 cdb[16]; - u8 add_cdb[0]; + u8 add_cdb[]; } __attribute__ ((packed)); struct xfer_rdy_iu { From ec38c0adc0a149496b6b0e0bfcff9bc7d80db2c4 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 14:25:50 -0500 Subject: [PATCH 277/562] scsi: ufs: Replace zero-length array with flexible-array The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Link: https://lore.kernel.org/r/20200507192550.GA16683@embeddedor Signed-off-by: Gustavo A. R. Silva Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 1827b57eb7db..915e963398c4 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -3258,7 +3258,7 @@ static inline int ufshcd_read_desc(struct ufs_hba *hba, struct uc_string_id { u8 len; u8 type; - wchar_t uc[0]; + wchar_t uc[]; } __packed; /* replace non-printable or non-ASCII characters with spaces */ From 6e27a86aed9760780666b062abbcf5e1408e8376 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 7 May 2020 21:31:11 +0100 Subject: [PATCH 278/562] scsi: lpfc: Remove redundant initialization to variable rc The variable rc is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Link: https://lore.kernel.org/r/20200507203111.64709-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen Addresses-Coverity: ("Unused value") --- drivers/scsi/lpfc/lpfc_attr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 2791efa770af..d51fe5f9e5ec 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -4877,7 +4877,7 @@ lpfc_request_firmware_upgrade_store(struct device *dev, struct Scsi_Host *shost = class_to_shost(dev); struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata; struct lpfc_hba *phba = vport->phba; - int val = 0, rc = -EINVAL; + int val = 0, rc; /* Sanity check on user data */ if (!isdigit(buf[0])) From 817d7e140283f4afc766569c670997df79a7c9ee Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Fri, 8 May 2020 16:01:08 +0800 Subject: [PATCH 279/562] scsi: ufs: Enable WriteBooster on some pre-3.1 UFS devices The WriteBooster feature can be supported by some pre-3.1 UFS devices by upgrading firmware. To enable WriteBooster feature in such devices, introduce a device quirk to relax the entrance condition of ufshcd_wb_probe() to allow host driver to check those devices' WriteBooster capability. WriteBooster feature can be available if below all conditions are satisfied, 1. Host enables WriteBooster capability 2. UFS 3.1 device or UFS pre-3.1 device with quirk UFS_DEVICE_QUIRK_SUPPORT_EXTENDED_FEATURES enabled 3. The device descriptor shall have DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP field 4. WriteBooster support is specified in above field Link: https://lore.kernel.org/r/20200508080115.24233-2-stanley.chu@mediatek.com Reviewed-by: Avri Altman Reviewed-by: Asutosh Das Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs_quirks.h | 7 ++++ drivers/scsi/ufs/ufshcd.c | 67 ++++++++++++++++++++++------------- 2 files changed, 49 insertions(+), 25 deletions(-) diff --git a/drivers/scsi/ufs/ufs_quirks.h b/drivers/scsi/ufs/ufs_quirks.h index df7a1e6805a3..e3175a63c676 100644 --- a/drivers/scsi/ufs/ufs_quirks.h +++ b/drivers/scsi/ufs/ufs_quirks.h @@ -101,4 +101,11 @@ struct ufs_dev_fix { */ #define UFS_DEVICE_QUIRK_HOST_VS_DEBUGSAVECONFIGTIME (1 << 9) +/* + * Some pre-3.1 UFS devices can support extended features by upgrading + * the firmware. Enable this quirk to make UFS core driver probe and enable + * supported features on such devices. + */ +#define UFS_DEVICE_QUIRK_SUPPORT_EXTENDED_FEATURES (1 << 10) + #endif /* UFS_QUIRKS_H_ */ diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 915e963398c4..a802c5f5ec7c 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -6800,9 +6800,19 @@ static int ufshcd_scsi_add_wlus(struct ufs_hba *hba) static void ufshcd_wb_probe(struct ufs_hba *hba, u8 *desc_buf) { + if (!ufshcd_is_wb_allowed(hba)) + return; + + if (hba->desc_size.dev_desc < DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP + 4) + goto wb_disabled; + hba->dev_info.d_ext_ufs_feature_sup = get_unaligned_be32(desc_buf + DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP); + + if (!(hba->dev_info.d_ext_ufs_feature_sup & UFS_DEV_WRITE_BOOSTER_SUP)) + goto wb_disabled; + /* * WB may be supported but not configured while provisioning. * The spec says, in dedicated wb buffer mode, @@ -6818,11 +6828,29 @@ static void ufshcd_wb_probe(struct ufs_hba *hba, u8 *desc_buf) hba->dev_info.b_presrv_uspc_en = desc_buf[DEVICE_DESC_PARAM_WB_PRESRV_USRSPC_EN]; - if (!((hba->dev_info.d_ext_ufs_feature_sup & - UFS_DEV_WRITE_BOOSTER_SUP) && - hba->dev_info.b_wb_buffer_type && + if (!(hba->dev_info.b_wb_buffer_type && hba->dev_info.d_wb_alloc_units)) - hba->caps &= ~UFSHCD_CAP_WB_EN; + goto wb_disabled; + + return; + +wb_disabled: + hba->caps &= ~UFSHCD_CAP_WB_EN; +} + +static void ufs_fixup_device_setup(struct ufs_hba *hba) +{ + struct ufs_dev_fix *f; + struct ufs_dev_info *dev_info = &hba->dev_info; + + for (f = ufs_fixups; f->quirk; f++) { + if ((f->wmanufacturerid == dev_info->wmanufacturerid || + f->wmanufacturerid == UFS_ANY_VENDOR) && + ((dev_info->model && + STR_PRFX_EQUAL(f->model, dev_info->model)) || + !strcmp(f->model, UFS_ANY_MODEL))) + hba->dev_quirks |= f->quirk; + } } static int ufs_get_device_desc(struct ufs_hba *hba) @@ -6862,10 +6890,6 @@ static int ufs_get_device_desc(struct ufs_hba *hba) model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME]; - /* Enable WB only for UFS-3.1 */ - if (dev_info->wspecversion >= 0x310) - ufshcd_wb_probe(hba, desc_buf); - err = ufshcd_read_string_desc(hba, model_index, &dev_info->model, SD_ASCII_STD); if (err < 0) { @@ -6874,6 +6898,16 @@ static int ufs_get_device_desc(struct ufs_hba *hba) goto out; } + ufs_fixup_device_setup(hba); + + /* + * Probe WB only for UFS-3.1 devices or UFS devices with quirk + * UFS_DEVICE_QUIRK_SUPPORT_EXTENDED_FEATURES enabled + */ + if (dev_info->wspecversion >= 0x310 || + (hba->dev_quirks & UFS_DEVICE_QUIRK_SUPPORT_EXTENDED_FEATURES)) + ufshcd_wb_probe(hba, desc_buf); + /* * ufshcd_read_string_desc returns size of the string * reset the error value @@ -6893,21 +6927,6 @@ static void ufs_put_device_desc(struct ufs_hba *hba) dev_info->model = NULL; } -static void ufs_fixup_device_setup(struct ufs_hba *hba) -{ - struct ufs_dev_fix *f; - struct ufs_dev_info *dev_info = &hba->dev_info; - - for (f = ufs_fixups; f->quirk; f++) { - if ((f->wmanufacturerid == dev_info->wmanufacturerid || - f->wmanufacturerid == UFS_ANY_VENDOR) && - ((dev_info->model && - STR_PRFX_EQUAL(f->model, dev_info->model)) || - !strcmp(f->model, UFS_ANY_MODEL))) - hba->dev_quirks |= f->quirk; - } -} - /** * ufshcd_tune_pa_tactivate - Tunes PA_TActivate of local UniPro * @hba: per-adapter instance @@ -7244,8 +7263,6 @@ static int ufshcd_device_params_init(struct ufs_hba *hba) ufshcd_get_ref_clk_gating_wait(hba); - ufs_fixup_device_setup(hba); - if (!ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_READ_FLAG, QUERY_FLAG_IDN_PWR_ON_WPE, &flag)) hba->dev_info.f_power_on_wp_en = flag; From c28c00ba4f060949c2f461f08a4aa10024a40672 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Fri, 8 May 2020 16:01:09 +0800 Subject: [PATCH 280/562] scsi: ufs: Introduce fixup_dev_quirks vops Some UFS deivces may have required device quirks or have non-standard features which are enabled only on specified UFS hosts or for special customers. To not "pollute" common device quirk list, i.e. ufs_fixups table, for those devices mentioned above, introduce "fixup_dev_quirks" vops to allow vendors to fix or modify device quirks accordingly. Link: https://lore.kernel.org/r/20200508080115.24233-3-stanley.chu@mediatek.com Reviewed-by: Avri Altman Reviewed-by: Asutosh Das Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 11 ++++++++++- drivers/scsi/ufs/ufshcd.h | 9 ++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index a802c5f5ec7c..8d86d9a6a622 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -6838,7 +6838,7 @@ static void ufshcd_wb_probe(struct ufs_hba *hba, u8 *desc_buf) hba->caps &= ~UFSHCD_CAP_WB_EN; } -static void ufs_fixup_device_setup(struct ufs_hba *hba) +static void ufshcd_fixup_dev_quirks(struct ufs_hba *hba) { struct ufs_dev_fix *f; struct ufs_dev_info *dev_info = &hba->dev_info; @@ -6853,6 +6853,15 @@ static void ufs_fixup_device_setup(struct ufs_hba *hba) } } +static void ufs_fixup_device_setup(struct ufs_hba *hba) +{ + /* fix by general quirk table */ + ufshcd_fixup_dev_quirks(hba); + + /* allow vendors to fix quirks */ + ufshcd_vops_fixup_dev_quirks(hba); +} + static int ufs_get_device_desc(struct ufs_hba *hba) { int err; diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 056537e52c19..897338ba67aa 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -69,6 +69,7 @@ #include #include "ufs.h" +#include "ufs_quirks.h" #include "ufshci.h" #define UFSHCD "ufshcd" @@ -336,6 +337,7 @@ struct ufs_hba_variant_ops { void (*hibern8_notify)(struct ufs_hba *, enum uic_cmd_dme, enum ufs_notify_change_status); int (*apply_dev_quirks)(struct ufs_hba *hba); + void (*fixup_dev_quirks)(struct ufs_hba *hba); int (*suspend)(struct ufs_hba *, enum ufs_pm_op); int (*resume)(struct ufs_hba *, enum ufs_pm_op); void (*dbg_register_dump)(struct ufs_hba *hba); @@ -950,7 +952,6 @@ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode, void ufshcd_auto_hibern8_enable(struct ufs_hba *hba); void ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit); - #define SD_ASCII_STD true #define SD_RAW false int ufshcd_read_string_desc(struct ufs_hba *hba, u8 desc_index, @@ -1085,6 +1086,12 @@ static inline int ufshcd_vops_apply_dev_quirks(struct ufs_hba *hba) return 0; } +static inline void ufshcd_vops_fixup_dev_quirks(struct ufs_hba *hba) +{ + if (hba->vops && hba->vops->fixup_dev_quirks) + hba->vops->fixup_dev_quirks(hba); +} + static inline int ufshcd_vops_suspend(struct ufs_hba *hba, enum ufs_pm_op op) { if (hba->vops && hba->vops->suspend) From 8db269a5102eabfb05f4207f90e0a022caf7b804 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Fri, 8 May 2020 16:01:10 +0800 Subject: [PATCH 281/562] scsi: ufs: Export ufs_fixup_device_setup() function Export ufs_fixup_device_setup() to allow vendors to re-use it for fixing device quriks on specified UFS hosts. Link: https://lore.kernel.org/r/20200508080115.24233-4-stanley.chu@mediatek.com Reviewed-by: Avri Altman Reviewed-by: Asutosh Das Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 10 +++++++--- drivers/scsi/ufs/ufshcd.h | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 8d86d9a6a622..c0b2cec5dcd8 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -6838,12 +6838,15 @@ static void ufshcd_wb_probe(struct ufs_hba *hba, u8 *desc_buf) hba->caps &= ~UFSHCD_CAP_WB_EN; } -static void ufshcd_fixup_dev_quirks(struct ufs_hba *hba) +void ufshcd_fixup_dev_quirks(struct ufs_hba *hba, struct ufs_dev_fix *fixups) { struct ufs_dev_fix *f; struct ufs_dev_info *dev_info = &hba->dev_info; - for (f = ufs_fixups; f->quirk; f++) { + if (!fixups) + return; + + for (f = fixups; f->quirk; f++) { if ((f->wmanufacturerid == dev_info->wmanufacturerid || f->wmanufacturerid == UFS_ANY_VENDOR) && ((dev_info->model && @@ -6852,11 +6855,12 @@ static void ufshcd_fixup_dev_quirks(struct ufs_hba *hba) hba->dev_quirks |= f->quirk; } } +EXPORT_SYMBOL_GPL(ufshcd_fixup_dev_quirks); static void ufs_fixup_device_setup(struct ufs_hba *hba) { /* fix by general quirk table */ - ufshcd_fixup_dev_quirks(hba); + ufshcd_fixup_dev_quirks(hba, ufs_fixups); /* allow vendors to fix quirks */ ufshcd_vops_fixup_dev_quirks(hba); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 897338ba67aa..4d296acadd6f 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -952,6 +952,7 @@ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode, void ufshcd_auto_hibern8_enable(struct ufs_hba *hba); void ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit); +void ufshcd_fixup_dev_quirks(struct ufs_hba *hba, struct ufs_dev_fix *fixups); #define SD_ASCII_STD true #define SD_RAW false int ufshcd_read_string_desc(struct ufs_hba *hba, u8 desc_index, From 62c2f503b54cd17bc4ad7b929354923012375f73 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Fri, 8 May 2020 16:01:11 +0800 Subject: [PATCH 282/562] scsi: ufs-mediatek: Add fixup_dev_quirks vops Add fixup_dev_quirk vops in MediaTek UFS platforms and provide an initial vendor-specific device quirk table. Link: https://lore.kernel.org/r/20200508080115.24233-5-stanley.chu@mediatek.com Reviewed-by: Avri Altman Reviewed-by: Asutosh Das Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-mediatek.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c index 673c16596fb2..1898f1269ac5 100644 --- a/drivers/scsi/ufs/ufs-mediatek.c +++ b/drivers/scsi/ufs/ufs-mediatek.c @@ -30,6 +30,12 @@ #define ufs_mtk_device_reset_ctrl(high, res) \ ufs_mtk_smc(UFS_MTK_SIP_DEVICE_RESET, high, res) +static struct ufs_dev_fix ufs_mtk_dev_fixups[] = { + UFS_FIX(UFS_VENDOR_SKHYNIX, "H9HQ21AFAMZDAR", + UFS_DEVICE_QUIRK_SUPPORT_EXTENDED_FEATURES), + END_FIX +}; + static void ufs_mtk_cfg_unipro_cg(struct ufs_hba *hba, bool enable) { u32 tmp; @@ -555,10 +561,8 @@ static int ufs_mtk_apply_dev_quirks(struct ufs_hba *hba) struct ufs_dev_info *dev_info = &hba->dev_info; u16 mid = dev_info->wmanufacturerid; - if (mid == UFS_VENDOR_SAMSUNG) { - hba->dev_quirks &= ~UFS_DEVICE_QUIRK_HOST_PA_TACTIVATE; + if (mid == UFS_VENDOR_SAMSUNG) ufshcd_dme_set(hba, UIC_ARG_MIB(PA_TACTIVATE), 6); - } /* * Decide waiting time before gating reference clock and @@ -575,6 +579,17 @@ static int ufs_mtk_apply_dev_quirks(struct ufs_hba *hba) return 0; } +void ufs_mtk_fixup_dev_quirks(struct ufs_hba *hba) +{ + struct ufs_dev_info *dev_info = &hba->dev_info; + u16 mid = dev_info->wmanufacturerid; + + ufshcd_fixup_dev_quirks(hba, ufs_mtk_dev_fixups); + + if (mid == UFS_VENDOR_SAMSUNG) + hba->dev_quirks &= ~UFS_DEVICE_QUIRK_HOST_PA_TACTIVATE; +} + /** * struct ufs_hba_mtk_vops - UFS MTK specific variant operations * @@ -589,6 +604,7 @@ static struct ufs_hba_variant_ops ufs_hba_mtk_vops = { .link_startup_notify = ufs_mtk_link_startup_notify, .pwr_change_notify = ufs_mtk_pwr_change_notify, .apply_dev_quirks = ufs_mtk_apply_dev_quirks, + .fixup_dev_quirks = ufs_mtk_fixup_dev_quirks, .suspend = ufs_mtk_suspend, .resume = ufs_mtk_resume, .dbg_register_dump = ufs_mtk_dbg_register_dump, From 1f34eedf9bc15d238aece3308aba0a21a1ef5fe2 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Fri, 8 May 2020 16:01:12 +0800 Subject: [PATCH 283/562] scsi: ufs: Add "index" in parameter list of ufshcd_query_flag() For preparation of LU Dedicated buffer mode support on WriteBooster feature, "index" parameter shall be added and allowed to be specified by callers. Link: https://lore.kernel.org/r/20200508080115.24233-6-stanley.chu@mediatek.com Reviewed-by: Bean Huo Reviewed-by: Avri Altman Reviewed-by: Can Guo Reviewed-by: Asutosh Das Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-sysfs.c | 2 +- drivers/scsi/ufs/ufshcd.c | 28 +++++++++++++++------------- drivers/scsi/ufs/ufshcd.h | 2 +- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/ufs/ufs-sysfs.c b/drivers/scsi/ufs/ufs-sysfs.c index 93484408bc40..b86b6a40d7e6 100644 --- a/drivers/scsi/ufs/ufs-sysfs.c +++ b/drivers/scsi/ufs/ufs-sysfs.c @@ -631,7 +631,7 @@ static ssize_t _name##_show(struct device *dev, \ struct ufs_hba *hba = dev_get_drvdata(dev); \ pm_runtime_get_sync(hba->dev); \ ret = ufshcd_query_flag(hba, UPIU_QUERY_OPCODE_READ_FLAG, \ - QUERY_FLAG_IDN##_uname, &flag); \ + QUERY_FLAG_IDN##_uname, 0, &flag); \ pm_runtime_put_sync(hba->dev); \ if (ret) \ return -EINVAL; \ diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index c0b2cec5dcd8..4053d24eaafc 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -2782,13 +2782,13 @@ static inline void ufshcd_init_query(struct ufs_hba *hba, } static int ufshcd_query_flag_retry(struct ufs_hba *hba, - enum query_opcode opcode, enum flag_idn idn, bool *flag_res) + enum query_opcode opcode, enum flag_idn idn, u8 index, bool *flag_res) { int ret; int retries; for (retries = 0; retries < QUERY_REQ_RETRIES; retries++) { - ret = ufshcd_query_flag(hba, opcode, idn, flag_res); + ret = ufshcd_query_flag(hba, opcode, idn, index, flag_res); if (ret) dev_dbg(hba->dev, "%s: failed with error %d, retries %d\n", @@ -2809,16 +2809,17 @@ static int ufshcd_query_flag_retry(struct ufs_hba *hba, * @hba: per-adapter instance * @opcode: flag query to perform * @idn: flag idn to access + * @index: flag index to access * @flag_res: the flag value after the query request completes * * Returns 0 for success, non-zero in case of failure */ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode, - enum flag_idn idn, bool *flag_res) + enum flag_idn idn, u8 index, bool *flag_res) { struct ufs_query_req *request = NULL; struct ufs_query_res *response = NULL; - int err, index = 0, selector = 0; + int err, selector = 0; int timeout = QUERY_REQ_TIMEOUT; BUG_ON(!hba); @@ -4175,7 +4176,7 @@ static int ufshcd_complete_dev_init(struct ufs_hba *hba) bool flag_res = true; err = ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_SET_FLAG, - QUERY_FLAG_IDN_FDEVICEINIT, NULL); + QUERY_FLAG_IDN_FDEVICEINIT, 0, NULL); if (err) { dev_err(hba->dev, "%s setting fDeviceInit flag failed with error %d\n", @@ -4186,7 +4187,7 @@ static int ufshcd_complete_dev_init(struct ufs_hba *hba) /* poll for max. 1000 iterations for fDeviceInit flag to clear */ for (i = 0; i < 1000 && !err && flag_res; i++) err = ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_READ_FLAG, - QUERY_FLAG_IDN_FDEVICEINIT, &flag_res); + QUERY_FLAG_IDN_FDEVICEINIT, 0, &flag_res); if (err) dev_err(hba->dev, @@ -5001,7 +5002,7 @@ static int ufshcd_enable_auto_bkops(struct ufs_hba *hba) goto out; err = ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_SET_FLAG, - QUERY_FLAG_IDN_BKOPS_EN, NULL); + QUERY_FLAG_IDN_BKOPS_EN, 0, NULL); if (err) { dev_err(hba->dev, "%s: failed to enable bkops %d\n", __func__, err); @@ -5051,7 +5052,7 @@ static int ufshcd_disable_auto_bkops(struct ufs_hba *hba) } err = ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_CLEAR_FLAG, - QUERY_FLAG_IDN_BKOPS_EN, NULL); + QUERY_FLAG_IDN_BKOPS_EN, 0, NULL); if (err) { dev_err(hba->dev, "%s: failed to disable bkops %d\n", __func__, err); @@ -5217,7 +5218,7 @@ static int ufshcd_wb_ctrl(struct ufs_hba *hba, bool enable) opcode = UPIU_QUERY_OPCODE_CLEAR_FLAG; ret = ufshcd_query_flag_retry(hba, opcode, - QUERY_FLAG_IDN_WB_EN, NULL); + QUERY_FLAG_IDN_WB_EN, 0, NULL); if (ret) { dev_err(hba->dev, "%s write booster %s failed %d\n", __func__, enable ? "enable" : "disable", ret); @@ -5241,7 +5242,7 @@ static int ufshcd_wb_toggle_flush_during_h8(struct ufs_hba *hba, bool set) val = UPIU_QUERY_OPCODE_CLEAR_FLAG; return ufshcd_query_flag_retry(hba, val, - QUERY_FLAG_IDN_WB_BUFF_FLUSH_DURING_HIBERN8, + QUERY_FLAG_IDN_WB_BUFF_FLUSH_DURING_HIBERN8, 0, NULL); } @@ -5262,7 +5263,8 @@ static int ufshcd_wb_buf_flush_enable(struct ufs_hba *hba) return 0; ret = ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_SET_FLAG, - QUERY_FLAG_IDN_WB_BUFF_FLUSH_EN, NULL); + QUERY_FLAG_IDN_WB_BUFF_FLUSH_EN, + 0, NULL); if (ret) dev_err(hba->dev, "%s WB - buf flush enable failed %d\n", __func__, ret); @@ -5281,7 +5283,7 @@ static int ufshcd_wb_buf_flush_disable(struct ufs_hba *hba) return 0; ret = ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_CLEAR_FLAG, - QUERY_FLAG_IDN_WB_BUFF_FLUSH_EN, NULL); + QUERY_FLAG_IDN_WB_BUFF_FLUSH_EN, 0, NULL); if (ret) { dev_warn(hba->dev, "%s: WB - buf flush disable failed %d\n", __func__, ret); @@ -7277,7 +7279,7 @@ static int ufshcd_device_params_init(struct ufs_hba *hba) ufshcd_get_ref_clk_gating_wait(hba); if (!ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_READ_FLAG, - QUERY_FLAG_IDN_PWR_ON_WPE, &flag)) + QUERY_FLAG_IDN_PWR_ON_WPE, 0, &flag)) hba->dev_info.f_power_on_wp_en = flag; /* Probe maximum power mode co-supported by both UFS host and device */ diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 4d296acadd6f..8262a48eb9eb 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -948,7 +948,7 @@ int ufshcd_read_desc_param(struct ufs_hba *hba, int ufshcd_query_attr(struct ufs_hba *hba, enum query_opcode opcode, enum attr_idn idn, u8 index, u8 selector, u32 *attr_val); int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode, - enum flag_idn idn, bool *flag_res); + enum flag_idn idn, u8 index, bool *flag_res); void ufshcd_auto_hibern8_enable(struct ufs_hba *hba); void ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit); From 6f8d5a6a78cff85a7cbd1f9327e9aee25b0a48b2 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Fri, 8 May 2020 16:01:13 +0800 Subject: [PATCH 284/562] scsi: ufs: Add LU Dedicated buffer mode support for WriteBooster According to UFS specification, there are two WriteBooster mode of operations: "LU dedicated buffer" mode and "shared buffer" mode. In the "LU dedicated buffer" mode, the WriteBooster Buffer is dedicated to a logical unit. If the device supports the "LU dedicated buffer" mode, this mode is configured by setting bWriteBoosterBufferType to 00h. The logical unit WriteBooster Buffer size is configured by setting the dLUNumWriteBoosterBufferAllocUnits field of the related Unit Descriptor. Only a value greater than zero enables the WriteBooster feature in the logical unit. Modify ufshcd_wb_probe() as above description to support LU Dedicated buffer mode. Note that according to UFS 3.1 specification, the valid value of bDeviceMaxWriteBoosterLUs parameter in Geometry Descriptor is 1, which means at most one LUN can have WriteBooster buffer in "LU dedicated buffer mode". Therefore this patch supports only one LUN with WriteBooster enabled. All WriteBooster related sysfs nodes are specifically mapped to the LUN with WriteBooster enabled in LU Dedicated buffer mode. Link: https://lore.kernel.org/r/20200508080115.24233-7-stanley.chu@mediatek.com Reviewed-by: Avri Altman Reviewed-by: Bean Huo Reviewed-by: Asutosh Das Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-sysfs.c | 11 +++++++- drivers/scsi/ufs/ufs.h | 10 ++++++++ drivers/scsi/ufs/ufshcd.c | 50 ++++++++++++++++++++++++++++-------- drivers/scsi/ufs/ufshcd.h | 7 +++++ 4 files changed, 66 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/ufs/ufs-sysfs.c b/drivers/scsi/ufs/ufs-sysfs.c index b86b6a40d7e6..a0b3763e1dc2 100644 --- a/drivers/scsi/ufs/ufs-sysfs.c +++ b/drivers/scsi/ufs/ufs-sysfs.c @@ -622,16 +622,25 @@ static const struct attribute_group ufs_sysfs_string_descriptors_group = { .attrs = ufs_sysfs_string_descriptors, }; +static inline bool ufshcd_is_wb_flags(enum flag_idn idn) +{ + return ((idn >= QUERY_FLAG_IDN_WB_EN) && + (idn <= QUERY_FLAG_IDN_WB_BUFF_FLUSH_DURING_HIBERN8)); +} + #define UFS_FLAG(_name, _uname) \ static ssize_t _name##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ bool flag; \ + u8 index = 0; \ int ret; \ struct ufs_hba *hba = dev_get_drvdata(dev); \ + if (ufshcd_is_wb_flags(QUERY_FLAG_IDN##_uname)) \ + index = ufshcd_wb_get_flag_index(hba); \ pm_runtime_get_sync(hba->dev); \ ret = ufshcd_query_flag(hba, UPIU_QUERY_OPCODE_READ_FLAG, \ - QUERY_FLAG_IDN##_uname, 0, &flag); \ + QUERY_FLAG_IDN##_uname, index, &flag); \ pm_runtime_put_sync(hba->dev); \ if (ret) \ return -EINVAL; \ diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h index daac5053b850..b3135344ab3f 100644 --- a/drivers/scsi/ufs/ufs.h +++ b/drivers/scsi/ufs/ufs.h @@ -64,6 +64,9 @@ #define UFS_MAX_LUNS (SCSI_W_LUN_BASE + UFS_UPIU_MAX_UNIT_NUM_ID) #define UFS_UPIU_WLUN_ID (1 << 7) +/* WriteBooster buffer is available only for the logical unit from 0 to 7 */ +#define UFS_UPIU_MAX_WB_LUN_ID 8 + /* Well known logical unit id in LUN field of UPIU */ enum { UFS_UPIU_REPORT_LUNS_WLUN = 0x81, @@ -330,6 +333,12 @@ enum health_desc_param { HEALTH_DESC_PARAM_LIFE_TIME_EST_B = 0x4, }; +/* WriteBooster buffer mode */ +enum { + WB_BUF_MODE_LU_DEDICATED = 0x0, + WB_BUF_MODE_SHARED = 0x1, +}; + /* * Logical Unit Write Protect * 00h: LU not write protected @@ -559,6 +568,7 @@ struct ufs_dev_info { bool is_lu_power_on_wp; /* Maximum number of general LU supported by the UFS device */ u8 max_lu_supported; + u8 wb_dedicated_lu; u16 wmanufacturerid; /*UFS device Product Name */ u8 *model; diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 4053d24eaafc..b6a0d77d47ac 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -5205,6 +5205,7 @@ static bool ufshcd_wb_sup(struct ufs_hba *hba) static int ufshcd_wb_ctrl(struct ufs_hba *hba, bool enable) { int ret; + u8 index; enum query_opcode opcode; if (!ufshcd_wb_sup(hba)) @@ -5217,8 +5218,9 @@ static int ufshcd_wb_ctrl(struct ufs_hba *hba, bool enable) else opcode = UPIU_QUERY_OPCODE_CLEAR_FLAG; + index = ufshcd_wb_get_flag_index(hba); ret = ufshcd_query_flag_retry(hba, opcode, - QUERY_FLAG_IDN_WB_EN, 0, NULL); + QUERY_FLAG_IDN_WB_EN, index, NULL); if (ret) { dev_err(hba->dev, "%s write booster %s failed %d\n", __func__, enable ? "enable" : "disable", ret); @@ -5235,15 +5237,17 @@ static int ufshcd_wb_ctrl(struct ufs_hba *hba, bool enable) static int ufshcd_wb_toggle_flush_during_h8(struct ufs_hba *hba, bool set) { int val; + u8 index; if (set) val = UPIU_QUERY_OPCODE_SET_FLAG; else val = UPIU_QUERY_OPCODE_CLEAR_FLAG; + index = ufshcd_wb_get_flag_index(hba); return ufshcd_query_flag_retry(hba, val, - QUERY_FLAG_IDN_WB_BUFF_FLUSH_DURING_HIBERN8, 0, - NULL); + QUERY_FLAG_IDN_WB_BUFF_FLUSH_DURING_HIBERN8, + index, NULL); } static inline void ufshcd_wb_toggle_flush(struct ufs_hba *hba, bool enable) @@ -5258,13 +5262,15 @@ static inline void ufshcd_wb_toggle_flush(struct ufs_hba *hba, bool enable) static int ufshcd_wb_buf_flush_enable(struct ufs_hba *hba) { int ret; + u8 index; if (!ufshcd_wb_sup(hba) || hba->wb_buf_flush_enabled) return 0; + index = ufshcd_wb_get_flag_index(hba); ret = ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_SET_FLAG, QUERY_FLAG_IDN_WB_BUFF_FLUSH_EN, - 0, NULL); + index, NULL); if (ret) dev_err(hba->dev, "%s WB - buf flush enable failed %d\n", __func__, ret); @@ -5278,12 +5284,15 @@ static int ufshcd_wb_buf_flush_enable(struct ufs_hba *hba) static int ufshcd_wb_buf_flush_disable(struct ufs_hba *hba) { int ret; + u8 index; if (!ufshcd_wb_sup(hba) || !hba->wb_buf_flush_enabled) return 0; + index = ufshcd_wb_get_flag_index(hba); ret = ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_CLEAR_FLAG, - QUERY_FLAG_IDN_WB_BUFF_FLUSH_EN, 0, NULL); + QUERY_FLAG_IDN_WB_BUFF_FLUSH_EN, + index, NULL); if (ret) { dev_warn(hba->dev, "%s: WB - buf flush disable failed %d\n", __func__, ret); @@ -6802,6 +6811,9 @@ static int ufshcd_scsi_add_wlus(struct ufs_hba *hba) static void ufshcd_wb_probe(struct ufs_hba *hba, u8 *desc_buf) { + u8 lun; + u32 d_lu_wb_buf_alloc; + if (!ufshcd_is_wb_allowed(hba)) return; @@ -6824,16 +6836,32 @@ static void ufshcd_wb_probe(struct ufs_hba *hba, u8 *desc_buf) hba->dev_info.b_wb_buffer_type = desc_buf[DEVICE_DESC_PARAM_WB_TYPE]; - hba->dev_info.d_wb_alloc_units = - get_unaligned_be32(desc_buf + - DEVICE_DESC_PARAM_WB_SHARED_ALLOC_UNITS); hba->dev_info.b_presrv_uspc_en = desc_buf[DEVICE_DESC_PARAM_WB_PRESRV_USRSPC_EN]; - if (!(hba->dev_info.b_wb_buffer_type && - hba->dev_info.d_wb_alloc_units)) - goto wb_disabled; + if (hba->dev_info.b_wb_buffer_type == WB_BUF_MODE_SHARED) { + hba->dev_info.d_wb_alloc_units = + get_unaligned_be32(desc_buf + + DEVICE_DESC_PARAM_WB_SHARED_ALLOC_UNITS); + if (!hba->dev_info.d_wb_alloc_units) + goto wb_disabled; + } else { + for (lun = 0; lun < UFS_UPIU_MAX_WB_LUN_ID; lun++) { + d_lu_wb_buf_alloc = 0; + ufshcd_read_unit_desc_param(hba, + lun, + UNIT_DESC_PARAM_WB_BUF_ALLOC_UNITS, + (u8 *)&d_lu_wb_buf_alloc, + sizeof(d_lu_wb_buf_alloc)); + if (d_lu_wb_buf_alloc) { + hba->dev_info.wb_dedicated_lu = lun; + break; + } + } + if (!d_lu_wb_buf_alloc) + goto wb_disabled; + } return; wb_disabled: diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 8262a48eb9eb..23a434c03c2a 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -861,6 +861,13 @@ static inline bool ufshcd_keep_autobkops_enabled_except_suspend( return hba->caps & UFSHCD_CAP_KEEP_AUTO_BKOPS_ENABLED_EXCEPT_SUSPEND; } +static inline u8 ufshcd_wb_get_flag_index(struct ufs_hba *hba) +{ + if (hba->dev_info.b_wb_buffer_type == WB_BUF_MODE_LU_DEDICATED) + return hba->dev_info.wb_dedicated_lu; + return 0; +} + extern int ufshcd_runtime_suspend(struct ufs_hba *hba); extern int ufshcd_runtime_resume(struct ufs_hba *hba); extern int ufshcd_runtime_idle(struct ufs_hba *hba); From 29060a629135a00414d6ea3fbb246a114898f4be Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Fri, 8 May 2020 16:01:14 +0800 Subject: [PATCH 285/562] scsi: ufs-mediatek: Enable WriteBooster capability Enable WriteBooster capability on MediaTek UFS platforms. Link: https://lore.kernel.org/r/20200508080115.24233-8-stanley.chu@mediatek.com Reviewed-by: Bean Huo Reviewed-by: Avri Altman Reviewed-by: Asutosh Das Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-mediatek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c index 1898f1269ac5..c543142554d3 100644 --- a/drivers/scsi/ufs/ufs-mediatek.c +++ b/drivers/scsi/ufs/ufs-mediatek.c @@ -269,6 +269,9 @@ static int ufs_mtk_init(struct ufs_hba *hba) /* Enable clock-gating */ hba->caps |= UFSHCD_CAP_CLK_GATING; + /* Enable WriteBooster */ + hba->caps |= UFSHCD_CAP_WB_EN; + /* * ufshcd_vops_init() is invoked after * ufshcd_setup_clock(true) in ufshcd_hba_init() thus From 79e3520f82cb91e46e7db1af14ad916d6036dde8 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Fri, 8 May 2020 16:01:15 +0800 Subject: [PATCH 286/562] scsi: ufs: Cleanup WriteBooster feature Small cleanup as below items, 1. Use ufshcd_is_wb_allowed() directly instead of ufshcd_wb_sup() since ufshcd_wb_sup() just returns the result of ufshcd_is_wb_allowed(). 2. In ufshcd_suspend(), "else if (!ufshcd_is_runtime_pm(pm_op)) can be simplified to "else" since both have the same meaning. This patch does not change any functionality. Link: https://lore.kernel.org/r/20200508080115.24233-9-stanley.chu@mediatek.com Reviewed-by: Avri Altman Reviewed-by: Asutosh Das Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index b6a0d77d47ac..426073a518ef 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -253,7 +253,6 @@ static int ufshcd_scale_clks(struct ufs_hba *hba, bool scale_up); static irqreturn_t ufshcd_intr(int irq, void *__hba); static int ufshcd_change_power_mode(struct ufs_hba *hba, struct ufs_pa_layer_attr *pwr_mode); -static bool ufshcd_wb_sup(struct ufs_hba *hba); static int ufshcd_wb_buf_flush_enable(struct ufs_hba *hba); static int ufshcd_wb_buf_flush_disable(struct ufs_hba *hba); static int ufshcd_wb_ctrl(struct ufs_hba *hba, bool enable); @@ -285,7 +284,7 @@ static inline void ufshcd_wb_config(struct ufs_hba *hba) { int ret; - if (!ufshcd_wb_sup(hba)) + if (!ufshcd_is_wb_allowed(hba)) return; ret = ufshcd_wb_ctrl(hba, true); @@ -5197,18 +5196,13 @@ static void ufshcd_bkops_exception_event_handler(struct ufs_hba *hba) __func__, err); } -static bool ufshcd_wb_sup(struct ufs_hba *hba) -{ - return ufshcd_is_wb_allowed(hba); -} - static int ufshcd_wb_ctrl(struct ufs_hba *hba, bool enable) { int ret; u8 index; enum query_opcode opcode; - if (!ufshcd_wb_sup(hba)) + if (!ufshcd_is_wb_allowed(hba)) return 0; if (!(enable ^ hba->wb_enabled)) @@ -5264,7 +5258,7 @@ static int ufshcd_wb_buf_flush_enable(struct ufs_hba *hba) int ret; u8 index; - if (!ufshcd_wb_sup(hba) || hba->wb_buf_flush_enabled) + if (!ufshcd_is_wb_allowed(hba) || hba->wb_buf_flush_enabled) return 0; index = ufshcd_wb_get_flag_index(hba); @@ -5286,7 +5280,7 @@ static int ufshcd_wb_buf_flush_disable(struct ufs_hba *hba) int ret; u8 index; - if (!ufshcd_wb_sup(hba) || !hba->wb_buf_flush_enabled) + if (!ufshcd_is_wb_allowed(hba) || !hba->wb_buf_flush_enabled) return 0; index = ufshcd_wb_get_flag_index(hba); @@ -5336,7 +5330,7 @@ static bool ufshcd_wb_keep_vcc_on(struct ufs_hba *hba) int ret; u32 avail_buf; - if (!ufshcd_wb_sup(hba)) + if (!ufshcd_is_wb_allowed(hba)) return false; /* * The ufs device needs the vcc to be ON to flush. @@ -8235,12 +8229,12 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) * configured WB type is 70% full, keep vcc ON * for the device to flush the wb buffer */ - if ((hba->auto_bkops_enabled && ufshcd_wb_sup(hba)) || + if ((hba->auto_bkops_enabled && ufshcd_is_wb_allowed(hba)) || ufshcd_wb_keep_vcc_on(hba)) hba->dev_info.keep_vcc_on = true; else hba->dev_info.keep_vcc_on = false; - } else if (!ufshcd_is_runtime_pm(pm_op)) { + } else { hba->dev_info.keep_vcc_on = false; } From 6c205a66d65d031e8bf54cf0f4586258ca09396a Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Fri, 8 May 2020 14:08:34 +0530 Subject: [PATCH 287/562] scsi: megaraid_sas: Limit device queue depth to controller queue depth The driver currently assigns a pre-defined queue depth when the firmware-provided device queue depth is greater than the controller queue depth. Use the controller queue depth if the reported target queue depth is too large. Link: https://lore.kernel.org/r/20200508083838.22778-2-chandrakanth.patil@broadcom.com Signed-off-by: Kashyap Desai Signed-off-by: Chandrakanth Patil Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index fb9c3ceed508..00668335c2af 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -1982,9 +1982,9 @@ static void megasas_set_fw_assisted_qd(struct scsi_device *sdev, if (is_target_prop) { tgt_device_qd = le32_to_cpu(instance->tgt_prop->device_qdepth); - if (tgt_device_qd && - (tgt_device_qd <= instance->host->can_queue)) - device_qd = tgt_device_qd; + if (tgt_device_qd) + device_qd = min(instance->host->can_queue, + (int)tgt_device_qd); } if (instance->enable_sdev_max_qd && interface_type != UNKNOWN_DRIVE) From 84badfab0d914e2c9e96bdc3f418d7f494bfb0bb Mon Sep 17 00:00:00 2001 From: Sumit Saxena Date: Fri, 8 May 2020 14:08:35 +0530 Subject: [PATCH 288/562] scsi: megaraid_sas: Remove IO buffer hole detection logic As blk_queue_virt_boundary() API in slave_configure ensures that no IOs will come with holes/gaps. Hence, code logic to detect the holes/gaps in IO buffer is not required. Link: https://lore.kernel.org/r/20200508083838.22778-3-chandrakanth.patil@broadcom.com Signed-off-by: Sumit Saxena Signed-off-by: Chandrakanth Patil Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 58 --------------------- 1 file changed, 58 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index e0f923b8cc50..87f91a38e60d 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2070,7 +2070,6 @@ static bool megasas_is_prp_possible(struct megasas_instance *instance, struct scsi_cmnd *scmd, int sge_count) { - int i; u32 data_length = 0; struct scatterlist *sg_scmd; bool build_prp = false; @@ -2099,63 +2098,6 @@ megasas_is_prp_possible(struct megasas_instance *instance, build_prp = true; } -/* - * Below code detects gaps/holes in IO data buffers. - * What does holes/gaps mean? - * Any SGE except first one in a SGL starts at non NVME page size - * aligned address OR Any SGE except last one in a SGL ends at - * non NVME page size boundary. - * - * Driver has already informed block layer by setting boundary rules for - * bio merging done at NVME page size boundary calling kernel API - * blk_queue_virt_boundary inside slave_config. - * Still there is possibility of IO coming with holes to driver because of - * IO merging done by IO scheduler. - * - * With SCSI BLK MQ enabled, there will be no IO with holes as there is no - * IO scheduling so no IO merging. - * - * With SCSI BLK MQ disabled, IO scheduler may attempt to merge IOs and - * then sending IOs with holes. - * - * Though driver can request block layer to disable IO merging by calling- - * blk_queue_flag_set(QUEUE_FLAG_NOMERGES, sdev->request_queue) but - * user may tune sysfs parameter- nomerges again to 0 or 1. - * - * If in future IO scheduling is enabled with SCSI BLK MQ, - * this algorithm to detect holes will be required in driver - * for SCSI BLK MQ enabled case as well. - * - * - */ - scsi_for_each_sg(scmd, sg_scmd, sge_count, i) { - if ((i != 0) && (i != (sge_count - 1))) { - if (mega_mod64(sg_dma_len(sg_scmd), mr_nvme_pg_size) || - mega_mod64(sg_dma_address(sg_scmd), - mr_nvme_pg_size)) { - build_prp = false; - break; - } - } - - if ((sge_count > 1) && (i == 0)) { - if ((mega_mod64((sg_dma_address(sg_scmd) + - sg_dma_len(sg_scmd)), - mr_nvme_pg_size))) { - build_prp = false; - break; - } - } - - if ((sge_count > 1) && (i == (sge_count - 1))) { - if (mega_mod64(sg_dma_address(sg_scmd), - mr_nvme_pg_size)) { - build_prp = false; - break; - } - } - } - return build_prp; } From b9d5e3e7f370a817c742fb089ac1a86dfe8947dc Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Fri, 8 May 2020 14:21:30 +0530 Subject: [PATCH 289/562] scsi: megaraid_sas: Replace undefined MFI_BIG_ENDIAN macro with __BIG_ENDIAN_BITFIELD macro MFI_BIG_ENDIAN macro used in drivers structure bitfield to check the CPU big endianness is undefined which would break the code on big endian machine. __BIG_ENDIAN_BITFIELD kernel macro should be used in places of MFI_BIG_ENDIAN macro. Link: https://lore.kernel.org/r/20200508085130.23339-1-chandrakanth.patil@broadcom.com Fixes: a7faf81d7858 ("scsi: megaraid_sas: Set no_write_same only for Virtual Disk") Cc: # v5.6+ Signed-off-by: Shivasharan S Signed-off-by: Chandrakanth Patil Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas.h | 4 ++-- drivers/scsi/megaraid/megaraid_sas_fusion.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 83d8c4cb1ad5..98827363bc49 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -511,7 +511,7 @@ union MR_PROGRESS { */ struct MR_PD_PROGRESS { struct { -#ifndef MFI_BIG_ENDIAN +#ifndef __BIG_ENDIAN_BITFIELD u32 rbld:1; u32 patrol:1; u32 clear:1; @@ -537,7 +537,7 @@ struct MR_PD_PROGRESS { }; struct { -#ifndef MFI_BIG_ENDIAN +#ifndef __BIG_ENDIAN_BITFIELD u32 rbld:1; u32 patrol:1; u32 clear:1; diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.h b/drivers/scsi/megaraid/megaraid_sas_fusion.h index d57ecc7f88d8..30de4b01f703 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.h +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.h @@ -774,7 +774,7 @@ struct MR_SPAN_BLOCK_INFO { struct MR_CPU_AFFINITY_MASK { union { struct { -#ifndef MFI_BIG_ENDIAN +#ifndef __BIG_ENDIAN_BITFIELD u8 hw_path:1; u8 cpu0:1; u8 cpu1:1; @@ -866,7 +866,7 @@ struct MR_LD_RAID { __le16 seqNum; struct { -#ifndef MFI_BIG_ENDIAN +#ifndef __BIG_ENDIAN_BITFIELD u32 ldSyncRequired:1; u32 regTypeReqOnReadIsValid:1; u32 isEPD:1; @@ -889,7 +889,7 @@ struct { /* 0x30 - 0x33, Logical block size for the LD */ u32 logical_block_length; struct { -#ifndef MFI_BIG_ENDIAN +#ifndef __BIG_ENDIAN_BITFIELD /* 0x34, P_I_EXPONENT from READ CAPACITY 16 */ u32 ld_pi_exp:4; /* 0x34, LOGICAL BLOCKS PER PHYSICAL From 6fd8525a70221c26823b1c7e912fb21f218fb0c5 Mon Sep 17 00:00:00 2001 From: Sumit Saxena Date: Fri, 8 May 2020 14:22:42 +0530 Subject: [PATCH 290/562] scsi: megaraid_sas: TM command refire leads to controller firmware crash When TM command times out, driver invokes the controller reset. Post reset, driver re-fires pended TM commands which leads to firmware crash. Post controller reset, return pended TM commands back to OS. Link: https://lore.kernel.org/r/20200508085242.23406-1-chandrakanth.patil@broadcom.com Cc: stable@vger.kernel.org Signed-off-by: Sumit Saxena Signed-off-by: Chandrakanth Patil Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 87f91a38e60d..319f241da4b6 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -4180,6 +4180,7 @@ static void megasas_refire_mgmt_cmd(struct megasas_instance *instance, struct fusion_context *fusion; struct megasas_cmd *cmd_mfi; union MEGASAS_REQUEST_DESCRIPTOR_UNION *req_desc; + struct MPI2_RAID_SCSI_IO_REQUEST *scsi_io_req; u16 smid; bool refire_cmd = false; u8 result; @@ -4247,6 +4248,11 @@ static void megasas_refire_mgmt_cmd(struct megasas_instance *instance, result = COMPLETE_CMD; } + scsi_io_req = (struct MPI2_RAID_SCSI_IO_REQUEST *) + cmd_fusion->io_request; + if (scsi_io_req->Function == MPI2_FUNCTION_SCSI_TASK_MGMT) + result = RETURN_CMD; + switch (result) { case REFIRE_CMD: megasas_fire_cmd_fusion(instance, req_desc); @@ -4475,7 +4481,6 @@ megasas_issue_tm(struct megasas_instance *instance, u16 device_handle, if (!timeleft) { dev_err(&instance->pdev->dev, "task mgmt type 0x%x timed out\n", type); - cmd_mfi->flags |= DRV_DCMD_SKIP_REFIRE; mutex_unlock(&instance->reset_mutex); rc = megasas_reset_fusion(instance->host, MFI_IO_TIMEOUT_OCR); mutex_lock(&instance->reset_mutex); From c1bb43e23c4508654badfdac9ca79e4cca4c8b13 Mon Sep 17 00:00:00 2001 From: Chandrakanth Patil Date: Fri, 8 May 2020 14:23:14 +0530 Subject: [PATCH 291/562] scsi: megaraid_sas: Update driver version to 07.714.04.00-rc1 Link: https://lore.kernel.org/r/20200508085314.23461-1-chandrakanth.patil@broadcom.com Signed-off-by: Chandrakanth Patil Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 98827363bc49..af2c7a2a9565 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -21,8 +21,8 @@ /* * MegaRAID SAS Driver meta data */ -#define MEGASAS_VERSION "07.713.01.00-rc1" -#define MEGASAS_RELDATE "Dec 27, 2019" +#define MEGASAS_VERSION "07.714.04.00-rc1" +#define MEGASAS_RELDATE "Apr 14, 2020" #define MEGASAS_MSIX_NAME_LEN 32 From cbbfdb2a2416c9f0cde913cf09670097ac281282 Mon Sep 17 00:00:00 2001 From: Suganath Prabu S Date: Fri, 8 May 2020 07:07:38 -0400 Subject: [PATCH 292/562] scsi: mpt3sas: Fix double free warnings Fix following warning from Smatch static analyser: drivers/scsi/mpt3sas/mpt3sas_base.c:5256 _base_allocate_memory_pools() warn: 'ioc->hpr_lookup' double freed drivers/scsi/mpt3sas/mpt3sas_base.c:5256 _base_allocate_memory_pools() warn: 'ioc->internal_lookup' double freed Link: https://lore.kernel.org/r/20200508110738.30732-1-suganath-prabu.subramani@broadcom.com Reported-by: Dan Carpenter Signed-off-by: Suganath Prabu S Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 7fa3bdb906b6..dc260fef9897 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -4899,7 +4899,9 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) } kfree(ioc->hpr_lookup); + ioc->hpr_lookup = NULL; kfree(ioc->internal_lookup); + ioc->internal_lookup = NULL; if (ioc->chain_lookup) { for (i = 0; i < ioc->scsiio_depth; i++) { for (j = ioc->chains_per_prp_buffer; From b59293b469b9e1bbdaf314123498b83a5a9e9ce9 Mon Sep 17 00:00:00 2001 From: Samuel Zou Date: Fri, 8 May 2020 19:49:33 +0800 Subject: [PATCH 293/562] scsi: mpt3sas: Remove unused including Fix the following versioncheck warning: drivers/scsi/mpt3sas/mpt3sas_debugfs.c:16:1: unused including Link: https://lore.kernel.org/r/1588938573-57847-1-git-send-email-zou_wei@huawei.com Reported-by: Hulk Robot Signed-off-by: Samuel Zou Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_debugfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_debugfs.c b/drivers/scsi/mpt3sas/mpt3sas_debugfs.c index 48095d8aef95..a6ab1db81167 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_debugfs.c +++ b/drivers/scsi/mpt3sas/mpt3sas_debugfs.c @@ -13,7 +13,6 @@ * **/ -#include #include #include #include From 978857c7e367d6841f71c4ded5a8c244520f5e22 Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Fri, 8 May 2020 19:23:28 +0200 Subject: [PATCH 294/562] scsi: zfcp: Move shost modification after QDIO (re-)open into fenced function When establishing and activating the QDIO queue pair for a FCP device for the first time, or after an adapter recovery, we publish some of its characteristics to the scsi host object representing that FCP device. When moving the scsi host object allocation and registration to after the first exchange config and exchange port data, this is not possible for the former case - QDIO open for the first time - because that happens before exchange config and exchange port data. Move the scsi host object update into a fenced function that checks whether the object already exists or not. This way we can repeat that step later, once we are past the allocation. Once the first recovery succeeds we don't release the scsi host object anymore, so further recoveries do work as before. Link: https://lore.kernel.org/r/a214ebf508f71e3690113e3e90edab1cea0e24e3.1588956679.git.bblock@linux.ibm.com Reviewed-by: Steffen Maier Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_ext.h | 2 ++ drivers/s390/scsi/zfcp_qdio.c | 19 ++++++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 88294ca0e2ea..d67f31b9b859 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -153,6 +153,8 @@ extern int zfcp_qdio_sbal_get(struct zfcp_qdio *); extern int zfcp_qdio_send(struct zfcp_qdio *, struct zfcp_qdio_req *); extern int zfcp_qdio_sbals_from_sg(struct zfcp_qdio *, struct zfcp_qdio_req *, struct scatterlist *); +extern void zfcp_qdio_shost_update(struct zfcp_adapter *const adapter, + const struct zfcp_qdio *const qdio); extern int zfcp_qdio_open(struct zfcp_qdio *); extern void zfcp_qdio_close(struct zfcp_qdio *); extern void zfcp_qdio_siosl(struct zfcp_adapter *); diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index 26702b56a7ab..3a7f3374d10a 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -4,7 +4,7 @@ * * Setup and helper functions to access QDIO. * - * Copyright IBM Corp. 2002, 2017 + * Copyright IBM Corp. 2002, 2020 */ #define KMSG_COMPONENT "zfcp" @@ -342,6 +342,18 @@ void zfcp_qdio_close(struct zfcp_qdio *qdio) atomic_set(&qdio->req_q_free, 0); } +void zfcp_qdio_shost_update(struct zfcp_adapter *const adapter, + const struct zfcp_qdio *const qdio) +{ + struct Scsi_Host *const shost = adapter->scsi_host; + + if (shost == NULL) + return; + + shost->sg_tablesize = qdio->max_sbale_per_req; + shost->max_sectors = qdio->max_sbale_per_req * 8; +} + /** * zfcp_qdio_open - prepare and initialize response queue * @qdio: pointer to struct zfcp_qdio @@ -420,10 +432,7 @@ int zfcp_qdio_open(struct zfcp_qdio *qdio) atomic_set(&qdio->req_q_free, QDIO_MAX_BUFFERS_PER_Q); atomic_or(ZFCP_STATUS_ADAPTER_QDIOUP, &qdio->adapter->status); - if (adapter->scsi_host) { - adapter->scsi_host->sg_tablesize = qdio->max_sbale_per_req; - adapter->scsi_host->max_sectors = qdio->max_sbale_per_req * 8; - } + zfcp_qdio_shost_update(adapter, qdio); return 0; From bd1684817d7d8d1a3b95a4347166246ad1f7670b Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Fri, 8 May 2020 19:23:29 +0200 Subject: [PATCH 295/562] scsi: zfcp: Move shost updates during xconfig data handling into fenced function When executing exchange config data for a FCP device for the first time, or after an adapter recovery, we update several properties of the scsi host or fibre channel host object that represent that FCP device. When moving the scsi host object allocation and registration - and thus also the fibre channel host object allocation - to after the first exchange config and exchange port data, this is not possible for the former case. Move all these update into separate, and fenced function that first checks whether the scsi host object already exists or not, before making the updates. During the first ever exchange config data in the adapter life cycle this will make the exchange config data handler skip over this update step, but we can repeat it later, after we allocated the scsi host object. For any further recovery of that adapter the work flow is only changed slightly because then the scsi host object already exists and we don't free it until we release the adapter completely at the end of its life cycle. Link: https://lore.kernel.org/r/5fc3f4d38d4334f7aa595497c6f7865fb1102e0f.1588956679.git.bblock@linux.ibm.com Reviewed-by: Steffen Maier Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_ext.h | 5 +++ drivers/s390/scsi/zfcp_fsf.c | 48 +++-------------------- drivers/s390/scsi/zfcp_scsi.c | 72 ++++++++++++++++++++++++++++++++++- 3 files changed, 82 insertions(+), 43 deletions(-) diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index d67f31b9b859..4c4be754cf6e 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -125,6 +125,7 @@ extern int zfcp_fsf_exchange_config_data_sync(struct zfcp_qdio *, extern int zfcp_fsf_exchange_port_data(struct zfcp_erp_action *); extern int zfcp_fsf_exchange_port_data_sync(struct zfcp_qdio *, struct fsf_qtcb_bottom_port *); +extern u32 zfcp_fsf_convert_portspeed(u32 fsf_speed); extern void zfcp_fsf_req_dismiss_all(struct zfcp_adapter *); extern int zfcp_fsf_status_read(struct zfcp_qdio *); extern int zfcp_status_read_refill(struct zfcp_adapter *adapter); @@ -171,6 +172,10 @@ extern void zfcp_scsi_schedule_rport_block(struct zfcp_port *); extern void zfcp_scsi_schedule_rports_block(struct zfcp_adapter *); extern void zfcp_scsi_set_prot(struct zfcp_adapter *); extern void zfcp_scsi_dif_sense_error(struct scsi_cmnd *, int); +extern void zfcp_scsi_shost_update_config_data( + struct zfcp_adapter *const adapter, + const struct fsf_qtcb_bottom_config *const bottom, + const bool bottom_incomplete); /* zfcp_sysfs.c */ extern const struct attribute_group *zfcp_unit_attr_groups[]; diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 111fe3fc32d7..54edfcbe84ce 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -479,7 +479,7 @@ void zfcp_fsf_req_dismiss_all(struct zfcp_adapter *adapter) #define ZFCP_FSF_PORTSPEED_128GBIT (1 << 8) #define ZFCP_FSF_PORTSPEED_NOT_NEGOTIATED (1 << 15) -static u32 zfcp_fsf_convert_portspeed(u32 fsf_speed) +u32 zfcp_fsf_convert_portspeed(u32 fsf_speed) { u32 fdmi_speed = 0; if (fsf_speed & ZFCP_FSF_PORTSPEED_1GBIT) @@ -509,64 +509,36 @@ static int zfcp_fsf_exchange_config_evaluate(struct zfcp_fsf_req *req) { struct fsf_qtcb_bottom_config *bottom = &req->qtcb->bottom.config; struct zfcp_adapter *adapter = req->adapter; - struct Scsi_Host *shost = adapter->scsi_host; - struct fc_els_flogi *nsp, *plogi; + struct fc_els_flogi *plogi; /* adjust pointers for missing command code */ - nsp = (struct fc_els_flogi *) ((u8 *)&bottom->nport_serv_param - - sizeof(u32)); plogi = (struct fc_els_flogi *) ((u8 *)&bottom->plogi_payload - sizeof(u32)); if (req->data) memcpy(req->data, bottom, sizeof(*bottom)); - snprintf(fc_host_manufacturer(shost), FC_SERIAL_NUMBER_SIZE, "%s", - "IBM"); - fc_host_port_name(shost) = be64_to_cpu(nsp->fl_wwpn); - fc_host_node_name(shost) = be64_to_cpu(nsp->fl_wwnn); - fc_host_supported_classes(shost) = FC_COS_CLASS2 | FC_COS_CLASS3; - adapter->timer_ticks = bottom->timer_interval & ZFCP_FSF_TIMER_INT_MASK; adapter->stat_read_buf_num = max(bottom->status_read_buf_num, (u16)FSF_STATUS_READS_RECOM); - zfcp_scsi_set_prot(adapter); - /* no error return above here, otherwise must fix call chains */ /* do not evaluate invalid fields */ if (req->qtcb->header.fsf_status == FSF_EXCHANGE_CONFIG_DATA_INCOMPLETE) return 0; - fc_host_port_id(shost) = ntoh24(bottom->s_id); - fc_host_speed(shost) = - zfcp_fsf_convert_portspeed(bottom->fc_link_speed); - adapter->hydra_version = bottom->adapter_type; - snprintf(fc_host_model(shost), FC_SYMBOLIC_NAME_SIZE, "0x%04x", - bottom->adapter_type); switch (bottom->fc_topology) { case FSF_TOPO_P2P: adapter->peer_d_id = ntoh24(bottom->peer_d_id); adapter->peer_wwpn = be64_to_cpu(plogi->fl_wwpn); adapter->peer_wwnn = be64_to_cpu(plogi->fl_wwnn); - fc_host_port_type(shost) = FC_PORTTYPE_PTP; - fc_host_fabric_name(shost) = 0; break; case FSF_TOPO_FABRIC: - fc_host_fabric_name(shost) = be64_to_cpu(plogi->fl_wwnn); - if (bottom->connection_features & FSF_FEATURE_NPIV_MODE) - fc_host_port_type(shost) = FC_PORTTYPE_NPIV; - else - fc_host_port_type(shost) = FC_PORTTYPE_NPORT; break; case FSF_TOPO_AL: - fc_host_port_type(shost) = FC_PORTTYPE_NLPORT; - fc_host_fabric_name(shost) = 0; - fallthrough; default: - fc_host_fabric_name(shost) = 0; dev_err(&adapter->ccw_device->dev, "Unknown or unsupported arbitrated loop " "fibre channel topology detected\n"); @@ -584,13 +556,10 @@ static void zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *req) &adapter->diagnostics->config_data.header; struct fsf_qtcb *qtcb = req->qtcb; struct fsf_qtcb_bottom_config *bottom = &qtcb->bottom.config; - struct Scsi_Host *shost = adapter->scsi_host; if (req->status & ZFCP_STATUS_FSFREQ_ERROR) return; - snprintf(fc_host_firmware_version(shost), FC_VERSION_STRING_SIZE, - "0x%08x", bottom->lic_version); adapter->fsf_lic_version = bottom->lic_version; adapter->adapter_features = bottom->adapter_features; adapter->connection_features = bottom->connection_features; @@ -606,6 +575,7 @@ static void zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *req) */ zfcp_diag_update_xdata(diag_hdr, bottom, false); + zfcp_scsi_shost_update_config_data(adapter, bottom, false); if (zfcp_fsf_exchange_config_evaluate(req)) return; @@ -630,6 +600,8 @@ static void zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *req) &adapter->status); zfcp_fsf_link_down_info_eval(req, &qtcb->header.fsf_status_qual.link_down_info); + + zfcp_scsi_shost_update_config_data(adapter, bottom, true); if (zfcp_fsf_exchange_config_evaluate(req)) return; break; @@ -638,16 +610,8 @@ static void zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *req) return; } - if (adapter->adapter_features & FSF_FEATURE_HBAAPI_MANAGEMENT) { + if (adapter->adapter_features & FSF_FEATURE_HBAAPI_MANAGEMENT) adapter->hardware_version = bottom->hardware_version; - snprintf(fc_host_hardware_version(shost), - FC_VERSION_STRING_SIZE, - "0x%08x", bottom->hardware_version); - memcpy(fc_host_serial_number(shost), bottom->serial_number, - min(FC_SERIAL_NUMBER_SIZE, 17)); - EBCASC(fc_host_serial_number(shost), - min(FC_SERIAL_NUMBER_SIZE, 17)); - } if (FSF_QTCB_CURRENT_VERSION < bottom->low_qtcb_version) { dev_err(&adapter->ccw_device->dev, diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 13d873f806e4..e1281a1ce488 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -4,7 +4,7 @@ * * Interface to Linux SCSI midlayer. * - * Copyright IBM Corp. 2002, 2018 + * Copyright IBM Corp. 2002, 2020 */ #define KMSG_COMPONENT "zfcp" @@ -841,6 +841,76 @@ void zfcp_scsi_dif_sense_error(struct scsi_cmnd *scmd, int ascq) set_host_byte(scmd, DID_SOFT_ERROR); } +void zfcp_scsi_shost_update_config_data( + struct zfcp_adapter *const adapter, + const struct fsf_qtcb_bottom_config *const bottom, + const bool bottom_incomplete) +{ + struct Scsi_Host *const shost = adapter->scsi_host; + const struct fc_els_flogi *nsp, *plogi; + + if (shost == NULL) + return; + + snprintf(fc_host_firmware_version(shost), FC_VERSION_STRING_SIZE, + "0x%08x", bottom->lic_version); + + if (adapter->adapter_features & FSF_FEATURE_HBAAPI_MANAGEMENT) { + snprintf(fc_host_hardware_version(shost), + FC_VERSION_STRING_SIZE, + "0x%08x", bottom->hardware_version); + memcpy(fc_host_serial_number(shost), bottom->serial_number, + min(FC_SERIAL_NUMBER_SIZE, 17)); + EBCASC(fc_host_serial_number(shost), + min(FC_SERIAL_NUMBER_SIZE, 17)); + } + + /* adjust pointers for missing command code */ + nsp = (struct fc_els_flogi *) ((u8 *)&bottom->nport_serv_param + - sizeof(u32)); + plogi = (struct fc_els_flogi *) ((u8 *)&bottom->plogi_payload + - sizeof(u32)); + + snprintf(fc_host_manufacturer(shost), FC_SERIAL_NUMBER_SIZE, "%s", + "IBM"); + fc_host_port_name(shost) = be64_to_cpu(nsp->fl_wwpn); + fc_host_node_name(shost) = be64_to_cpu(nsp->fl_wwnn); + fc_host_supported_classes(shost) = FC_COS_CLASS2 | FC_COS_CLASS3; + + zfcp_scsi_set_prot(adapter); + + /* do not evaluate invalid fields */ + if (bottom_incomplete) + return; + + fc_host_port_id(shost) = ntoh24(bottom->s_id); + fc_host_speed(shost) = + zfcp_fsf_convert_portspeed(bottom->fc_link_speed); + + snprintf(fc_host_model(shost), FC_SYMBOLIC_NAME_SIZE, "0x%04x", + bottom->adapter_type); + + switch (bottom->fc_topology) { + case FSF_TOPO_P2P: + fc_host_port_type(shost) = FC_PORTTYPE_PTP; + fc_host_fabric_name(shost) = 0; + break; + case FSF_TOPO_FABRIC: + fc_host_fabric_name(shost) = be64_to_cpu(plogi->fl_wwnn); + if (bottom->connection_features & FSF_FEATURE_NPIV_MODE) + fc_host_port_type(shost) = FC_PORTTYPE_NPIV; + else + fc_host_port_type(shost) = FC_PORTTYPE_NPORT; + break; + case FSF_TOPO_AL: + fc_host_port_type(shost) = FC_PORTTYPE_NLPORT; + fallthrough; + default: + fc_host_fabric_name(shost) = 0; + break; + } +} + struct fc_function_template zfcp_transport_functions = { .show_starget_port_id = 1, .show_starget_port_name = 1, From 52e61fde5ec95cb4011784fb0bc6b436e16fcaa8 Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Fri, 8 May 2020 19:23:30 +0200 Subject: [PATCH 296/562] scsi: zfcp: Move fc_host updates during xport data handling into fenced function When executing exchange port data for a FCP device for the first time, or after an adapter recovery, we update several properties of the fibre channel host object which represents that FCP device. When moving the scsi host object allocation and registration - and thus also the fibre channel host object allocation - to after the first exchange config and exchange port data, this is not possible for the former case. Move all these update into separate, and fenced function that first checks whether the scsi host object already exists or not, before making the updates. During the first ever exchange port data in the adapter life cycle this will make the exchange port data handler skip over this update step, but we can repeat it later, after we allocated the scsi host object. For any further recovery of that adapter the work flow is only changed slightly because then the scsi host object already exists and we don't free it until we release the adapter completely at the end of its life cycle. Link: https://lore.kernel.org/r/ae454c2dc6da0b02907c489af91d0b211d331825.1588956679.git.bblock@linux.ibm.com Reviewed-by: Steffen Maier Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_ext.h | 3 +++ drivers/s390/scsi/zfcp_fsf.c | 12 +++--------- drivers/s390/scsi/zfcp_scsi.c | 19 +++++++++++++++++++ 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 4c4be754cf6e..5dc9bdc5803f 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -176,6 +176,9 @@ extern void zfcp_scsi_shost_update_config_data( struct zfcp_adapter *const adapter, const struct fsf_qtcb_bottom_config *const bottom, const bool bottom_incomplete); +extern void zfcp_scsi_shost_update_port_data( + struct zfcp_adapter *const adapter, + const struct fsf_qtcb_bottom_port *const bottom); /* zfcp_sysfs.c */ extern const struct attribute_group *zfcp_unit_attr_groups[]; diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 54edfcbe84ce..bfb567a1d7bf 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -725,19 +725,10 @@ static void zfcp_fsf_exchange_port_evaluate(struct zfcp_fsf_req *req) { struct zfcp_adapter *adapter = req->adapter; struct fsf_qtcb_bottom_port *bottom = &req->qtcb->bottom.port; - struct Scsi_Host *shost = adapter->scsi_host; if (req->data) memcpy(req->data, bottom, sizeof(*bottom)); - fc_host_permanent_port_name(shost) = bottom->wwpn; - fc_host_maxframe_size(shost) = bottom->maximum_frame_size; - fc_host_supported_speeds(shost) = - zfcp_fsf_convert_portspeed(bottom->supported_speed); - memcpy(fc_host_supported_fc4s(shost), bottom->supported_fc4_types, - FC_FC4_LIST_SIZE); - memcpy(fc_host_active_fc4s(shost), bottom->active_fc4_types, - FC_FC4_LIST_SIZE); if (adapter->adapter_features & FSF_FEATURE_FC_SECURITY) adapter->fc_security_algorithms = bottom->fc_security_algorithms; @@ -764,6 +755,7 @@ static void zfcp_fsf_exchange_port_data_handler(struct zfcp_fsf_req *req) */ zfcp_diag_update_xdata(diag_hdr, bottom, false); + zfcp_scsi_shost_update_port_data(req->adapter, bottom); zfcp_fsf_exchange_port_evaluate(req); break; case FSF_EXCHANGE_CONFIG_DATA_INCOMPLETE: @@ -772,6 +764,8 @@ static void zfcp_fsf_exchange_port_data_handler(struct zfcp_fsf_req *req) zfcp_fsf_link_down_info_eval(req, &qtcb->header.fsf_status_qual.link_down_info); + + zfcp_scsi_shost_update_port_data(req->adapter, bottom); zfcp_fsf_exchange_port_evaluate(req); break; } diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index e1281a1ce488..f98e4015a0ed 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -911,6 +911,25 @@ void zfcp_scsi_shost_update_config_data( } } +void zfcp_scsi_shost_update_port_data( + struct zfcp_adapter *const adapter, + const struct fsf_qtcb_bottom_port *const bottom) +{ + struct Scsi_Host *const shost = adapter->scsi_host; + + if (shost == NULL) + return; + + fc_host_permanent_port_name(shost) = bottom->wwpn; + fc_host_maxframe_size(shost) = bottom->maximum_frame_size; + fc_host_supported_speeds(shost) = + zfcp_fsf_convert_portspeed(bottom->supported_speed); + memcpy(fc_host_supported_fc4s(shost), bottom->supported_fc4_types, + FC_FC4_LIST_SIZE); + memcpy(fc_host_active_fc4s(shost), bottom->active_fc4_types, + FC_FC4_LIST_SIZE); +} + struct fc_function_template zfcp_transport_functions = { .show_starget_port_id = 1, .show_starget_port_name = 1, From 990486f3a8508494dab2a7ff0fcc3eb977557d89 Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Fri, 8 May 2020 19:23:31 +0200 Subject: [PATCH 297/562] scsi: zfcp: Fence fc_host updates during link-down handling When receiving a notification that a FCP device lost its local link we usually update the fibre channel host object which represents that FCP device to reflect that. This notification/information can also surface when the FCP device is running through adapter recovery (exchange config and exchange port data return incomplete). When moving the scsi host object allocation and registration - and thus also the fibre channel host object allocation - to after the first exchange config and exchange port data, and this happens during the very first adapter recovery, these updates can not be done until after the scsi host object is allocated. Reorder the fc_host updates in zfcp_fsf_fc_host_link_down() so that they only happen after a check of whether the scsi host object is already allocated or not. During the first adapter recovery this will cause the skip of these updates if a link-down condition is detected, but we can repeat them after we allocated the scsi host object, if necessary. For any further link-down handling the only changes in the work flow are the slightly reordered assignments in zfcp_fsf_fc_host_link_down(). Link: https://lore.kernel.org/r/f841f2cda61dcd7b8549910c44e1831927459edf.1588956679.git.bblock@linux.ibm.com Reviewed-by: Steffen Maier Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_fsf.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index bfb567a1d7bf..8c4b690e329e 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -124,17 +124,21 @@ static void zfcp_fsf_fc_host_link_down(struct zfcp_adapter *adapter) { struct Scsi_Host *shost = adapter->scsi_host; + adapter->hydra_version = 0; + adapter->peer_wwpn = 0; + adapter->peer_wwnn = 0; + adapter->peer_d_id = 0; + + /* if there is no shost yet, we have nothing to zero-out */ + if (shost == NULL) + return; + fc_host_port_id(shost) = 0; fc_host_fabric_name(shost) = 0; fc_host_speed(shost) = FC_PORTSPEED_UNKNOWN; fc_host_port_type(shost) = FC_PORTTYPE_UNKNOWN; - adapter->hydra_version = 0; snprintf(fc_host_model(shost), FC_SYMBOLIC_NAME_SIZE, "0x%04x", 0); memset(fc_host_active_fc4s(shost), 0, FC_FC4_LIST_SIZE); - - adapter->peer_wwpn = 0; - adapter->peer_wwnn = 0; - adapter->peer_d_id = 0; } static void zfcp_fsf_link_down_info_eval(struct zfcp_fsf_req *req, From ac007adc4d2d9258fdf27abd25cc77a4e0e8d19f Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Fri, 8 May 2020 19:23:32 +0200 Subject: [PATCH 298/562] scsi: zfcp: Move p-t-p port allocation to after xport data When doing the very first adapter recovery - initialization - for a FCP device in a point-to-point topology we also allocate the port object corresponding to the attached remote port, and trigger a port recovery for it that will run after the adapter recovery finished. Right now this happens right after we finished with the exchange config data command, and uses the fibre channel host object corresponding to the FCP device to determine whether a point-to-point topology is used. When moving the scsi host object allocation and registration - and thus also the fibre channel host object allocation - to after the first exchange config and exchange port data, this use of the fc_host object is not possible anymore at that point in the work flow. But the allocation and recovery trigger doesn't have notable side-effects on the following exchange port data processing, so we can move those to after xport data, and thus also to after the scsi host object allocation, once we move it. Then the fc_host object can be used again, like it is now. For any further adapter recoveries this doesn't change anything, because at that point the port object already exists and recovery is triggered elsewhere for existing port objects. Link: https://lore.kernel.org/r/73e5d4ac21e2b37bf0c3ca8e530bc5a5c6e74f8f.1588956679.git.bblock@linux.ibm.com Reviewed-by: Steffen Maier Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_erp.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 3d0bc000f500..356f51d2bbee 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -4,7 +4,7 @@ * * Error Recovery Procedures (ERP). * - * Copyright IBM Corp. 2002, 2017 + * Copyright IBM Corp. 2002, 2020 */ #define KMSG_COMPONENT "zfcp" @@ -768,10 +768,14 @@ static enum zfcp_erp_act_result zfcp_erp_adapter_strat_fsf_xconf( if (!(atomic_read(&adapter->status) & ZFCP_STATUS_ADAPTER_XCONFIG_OK)) return ZFCP_ERP_FAILED; + return ZFCP_ERP_SUCCEEDED; +} + +static void +zfcp_erp_adapter_strategy_open_ptp_port(struct zfcp_adapter *const adapter) +{ if (fc_host_port_type(adapter->scsi_host) == FC_PORTTYPE_PTP) zfcp_erp_enqueue_ptp_port(adapter); - - return ZFCP_ERP_SUCCEEDED; } static enum zfcp_erp_act_result zfcp_erp_adapter_strategy_open_fsf_xport( @@ -809,6 +813,8 @@ static enum zfcp_erp_act_result zfcp_erp_adapter_strategy_open_fsf( if (zfcp_erp_adapter_strategy_open_fsf_xport(act) == ZFCP_ERP_FAILED) return ZFCP_ERP_FAILED; + zfcp_erp_adapter_strategy_open_ptp_port(act->adapter); + if (mempool_resize(act->adapter->pool.sr_data, act->adapter->stat_read_buf_num)) return ZFCP_ERP_FAILED; From 971f2abb4ca4095bb4bd7c2ba119d1cca078b433 Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Fri, 8 May 2020 19:23:33 +0200 Subject: [PATCH 299/562] scsi: zfcp: Fence adapter status propagation for common statuses Common status flags that all main objects - adapter, port, and unit - support are propagated to sub-objects when set or cleared. For instance, when setting the status ZFCP_STATUS_COMMON_ERP_INUSE for an adapter object, we will propagate this to all its child ports and units - same for when clearing a common status flag. Units of an adapter object are enumerated via __shost_for_each_device() over the scsi host object of the corresponding adapter. Once we move the scsi host object allocation and registration to after the first exchange config and exchange port data, this won't be possible for cases where we set or clear common statuses during the very first adapter recovery. But since we won't have any port or unit objects yet at that point of time, we can just fence the status propagation for cases where the scsi host object is not yet set in the adapter object. It won't change any effective status propagations, but will prevent us from dereferencing invalid pointers. For any later point in the work flow the scsi host object will be set and thus nothing is changed then. Link: https://lore.kernel.org/r/f51fe5f236a1e3d1ce53379c308777561bfe35e1.1588956679.git.bblock@linux.ibm.com Reviewed-by: Steffen Maier Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_erp.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 356f51d2bbee..283bcb985655 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -1642,6 +1642,13 @@ void zfcp_erp_set_adapter_status(struct zfcp_adapter *adapter, u32 mask) atomic_or(common_mask, &port->status); read_unlock_irqrestore(&adapter->port_list_lock, flags); + /* + * if `scsi_host` is missing, xconfig/xport data has never completed + * yet, so we can't access it, but there are also no SDEVs yet + */ + if (adapter->scsi_host == NULL) + return; + spin_lock_irqsave(adapter->scsi_host->host_lock, flags); __shost_for_each_device(sdev, adapter->scsi_host) atomic_or(common_mask, &sdev_to_zfcp(sdev)->status); @@ -1679,6 +1686,13 @@ void zfcp_erp_clear_adapter_status(struct zfcp_adapter *adapter, u32 mask) } read_unlock_irqrestore(&adapter->port_list_lock, flags); + /* + * if `scsi_host` is missing, xconfig/xport data has never completed + * yet, so we can't access it, but there are also no SDEVs yet + */ + if (adapter->scsi_host == NULL) + return; + spin_lock_irqsave(adapter->scsi_host->host_lock, flags); __shost_for_each_device(sdev, adapter->scsi_host) { atomic_andnot(common_mask, &sdev_to_zfcp(sdev)->status); From 71159b6ecb067565fb41faba616116e8c0fc10ea Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Fri, 8 May 2020 19:23:34 +0200 Subject: [PATCH 300/562] scsi: zfcp: Fence early sysfs interfaces for accesses of shost objects When setting an adapter online for the first time, we also create a couple of entries for it in the sysfs device tree. This is also true even if the adapter has not yet ever gone successfully through exchange config and exchange port data. When moving the scsi host object allocation and registration to after the first exchange config and exchange port data, this make the `port_rescan` attribute susceptible to invalid pointer-dereferences of the shost field before the adapter is fully initialized. When written to, it schedules a `scan_work` item that will in turn make use of the associated fibre channel host object to check the topology used for this FCP device. Because scanning for remote ports can't be done successfully without completing exchange config and exchange port data first, we can simply fence `port_rescan`, and so prevent the illegal access. As with cases where we can't get a reference to the adapter, we also return -ENODEV here. Applications need to handle that errno today already. After a successful allocation of the scsi host object nothing changes in the work flow. Link: https://lore.kernel.org/r/ef65366d309993ca91b6917727590ca7ca166c8f.1588956679.git.bblock@linux.ibm.com Reviewed-by: Steffen Maier Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_sysfs.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c index 7ec30ded0169..8d9662e8b717 100644 --- a/drivers/s390/scsi/zfcp_sysfs.c +++ b/drivers/s390/scsi/zfcp_sysfs.c @@ -216,10 +216,22 @@ static ssize_t zfcp_sysfs_port_rescan_store(struct device *dev, { struct ccw_device *cdev = to_ccwdev(dev); struct zfcp_adapter *adapter = zfcp_ccw_adapter_by_cdev(cdev); + int retval = 0; if (!adapter) return -ENODEV; + /* + * If `scsi_host` is missing, we can't schedule `scan_work`, as it + * makes use of the corresponding fc_host object. But this state is + * only possible if xconfig/xport data has never completed yet, + * and we couldn't successfully scan for ports anyway. + */ + if (adapter->scsi_host == NULL) { + retval = -ENODEV; + goto out; + } + /* * Users wish is our command: immediately schedule and flush a * worker to conduct a synchronous port scan, that is, neither @@ -227,9 +239,9 @@ static ssize_t zfcp_sysfs_port_rescan_store(struct device *dev, */ queue_delayed_work(adapter->work_queue, &adapter->scan_work, 0); flush_delayed_work(&adapter->scan_work); +out: zfcp_ccw_adapter_put(adapter); - - return (ssize_t) count; + return retval ? retval : (ssize_t) count; } static ZFCP_DEV_ATTR(adapter, port_rescan, S_IWUSR, NULL, zfcp_sysfs_port_rescan_store); From d0dff2ac98dd41d7d451127d9eae2f6478fc40b0 Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Fri, 8 May 2020 19:23:35 +0200 Subject: [PATCH 301/562] scsi: zfcp: Move allocation of the shost object to after xconf- and xport-data At the moment we allocate and register the Scsi_Host object corresponding to a zfcp adapter (FCP device) very early in the life cycle of the adapter - even before we fully discover and initialize the underlying firmware/hardware. This had the advantage that we could already use the Scsi_Host object, and fill in all its information during said discover and initialize. Due to commit 737eb78e82d5 ("block: Delay default elevator initialization") (first released in v5.4), we noticed a regression that would prevent us from using any storage volume if zfcp is configured with support for DIF or DIX (zfcp.dif=1 || zfcp.dix=1). Doing so would result in an illegal memory access as soon as the first request is sent with such an configuration. As example for a crash resulting from this: scsi host0: scsi_eh_0: sleeping scsi host0: zfcp qdio: 0.0.1900 ZFCP on SC 4bd using AI:1 QEBSM:0 PRI:1 TDD:1 SIGA: W AP scsi 0:0:0:0: scsi scan: INQUIRY pass 1 length 36 Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 0000000000000000 TEID: 0000000000000483 Fault in home space mode while using kernel ASCE. AS:0000000035c7c007 R3:00000001effcc007 S:00000001effd1000 P:000000000000003d Oops: 0004 ilc:3 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: ... CPU: 1 PID: 783 Comm: kworker/u760:5 Kdump: loaded Not tainted 5.6.0-rc2-bb-next+ #1 Hardware name: ... Workqueue: scsi_wq_0 fc_scsi_scan_rport [scsi_transport_fc] Krnl PSW : 0704e00180000000 000003ff801fcdae (scsi_queue_rq+0x436/0x740 [scsi_mod]) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 Krnl GPRS: 0fffffffffffffff 0000000000000000 0000000187150120 0000000000000000 000003ff80223d20 000000000000018e 000000018adc6400 0000000187711000 000003e0062337e8 00000001ae719000 0000000187711000 0000000187150000 00000001ab808100 0000000187150120 000003ff801fcd74 000003e0062336a0 Krnl Code: 000003ff801fcd9e: e310a35c0012 lt %r1,860(%r10) 000003ff801fcda4: a7840010 brc 8,000003ff801fcdc4 #000003ff801fcda8: e310b2900004 lg %r1,656(%r11) >000003ff801fcdae: d71710001000 xc 0(24,%r1),0(%r1) 000003ff801fcdb4: e310b2900004 lg %r1,656(%r11) 000003ff801fcdba: 41201018 la %r2,24(%r1) 000003ff801fcdbe: e32010000024 stg %r2,0(%r1) 000003ff801fcdc4: b904002b lgr %r2,%r11 Call Trace: [<000003ff801fcdae>] scsi_queue_rq+0x436/0x740 [scsi_mod] ([<000003ff801fcd74>] scsi_queue_rq+0x3fc/0x740 [scsi_mod]) [<00000000349c9970>] blk_mq_dispatch_rq_list+0x390/0x680 [<00000000349d1596>] blk_mq_sched_dispatch_requests+0x196/0x1a8 [<00000000349c7a04>] __blk_mq_run_hw_queue+0x144/0x160 [<00000000349c7ab6>] __blk_mq_delay_run_hw_queue+0x96/0x228 [<00000000349c7d5a>] blk_mq_run_hw_queue+0xd2/0xe0 [<00000000349d194a>] blk_mq_sched_insert_request+0x192/0x1d8 [<00000000349c17b8>] blk_execute_rq_nowait+0x80/0x90 [<00000000349c1856>] blk_execute_rq+0x6e/0xb0 [<000003ff801f8ac2>] __scsi_execute+0xe2/0x1f0 [scsi_mod] [<000003ff801fef98>] scsi_probe_and_add_lun+0x358/0x840 [scsi_mod] [<000003ff8020001c>] __scsi_scan_target+0xc4/0x228 [scsi_mod] [<000003ff80200254>] scsi_scan_target+0xd4/0x100 [scsi_mod] [<000003ff802d8b96>] fc_scsi_scan_rport+0x96/0xc0 [scsi_transport_fc] [<0000000034245ce8>] process_one_work+0x458/0x7d0 [<00000000342462a2>] worker_thread+0x242/0x448 [<0000000034250994>] kthread+0x15c/0x170 [<0000000034e1979c>] ret_from_fork+0x30/0x38 INFO: lockdep is turned off. Last Breaking-Event-Address: [<000003ff801fbc36>] scsi_add_cmd_to_list+0x9e/0xa8 [scsi_mod] Kernel panic - not syncing: Fatal exception: panic_on_oops While this issue is exposed by the commit named above, this is only by accident. The real issue exists for longer already - basically since it's possible to use blk-mq via scsi-mq, and blk-mq pre-allocates all requests for a tag-set during initialization of the same. For a given Scsi_Host object this is done when adding the object to the midlayer (`scsi_add_host()` and such). In `scsi_mq_setup_tags()` the midlayer calculates how much memory is required for a single scsi_cmnd, and its additional data, which also might include space for additional protection data - depending on whether the Scsi_Host has any form of protection capabilities (`scsi_host_get_prot()`). The problem is now thus, because zfcp does this step before we actually know whether the firmware/hardware has these capabilities, we don't set any protection capabilities in the Scsi_Host object. And so, no space is allocated for additional protection data for requests in the Scsi_Host tag-set. Once we go through discover and initialize the FCP device firmware/hardware fully (this is done via the firmware commands "Exchange Config Data" and "Exchange Port Data") we find out whether it actually supports DIF and DIX, and we set the corresponding capabilities in the Scsi_Host object (in `zfcp_scsi_set_prot()`). Now the Scsi_Host potentially has protection capabilities, but the already allocated requests in the tag-set don't have any space allocated for that. When we then trigger target scanning or add scsi_devices manually, the midlayer will use requests from that tag-set, and before sending most requests, it will also call `scsi_mq_prep_fn()`. To prepare the scsi_cmnd this function will check again whether the used Scsi_Host has any protection capabilities - and now it potentially has - and if so, it will try to initialize the assumed to be preallocated structures and thus it causes the crash, like shown above. Before delaying the default elevator initialization with the commit named above, we always would also allocate an elevator for any scsi_device before ever sending any requests - in contrast to now, where we do it after device-probing. That elevator in turn would have its own tag-set, and that is initialized after we went through discovery and initialization of the underlying firmware/hardware. So requests from that tag-set can be allocated properly, and if used - unless the user changes/disabled the default elevator - this would hide the underlying issue. To fix this for any configuration - with or without an elevator - we move the allocation and registration of the Scsi_Host object for a given FCP device to after the first complete discovery and initialization of the underlying firmware/hardware. By doing that we can make all basic properties of the Scsi_Host known to the midlayer by the time we call `scsi_add_host()`, including whether we have any protection capabilities. To do that we have to delay all the accesses that we would have done in the past during discovery and initialization, and do them instead once we are finished with it. The previous patches ramp up to this by fencing and factoring out all these accesses, and make it possible to re-do them later on. In addition we make also use of the diagnostic buffers we recently added with commit 92953c6e0aa7 ("scsi: zfcp: signal incomplete or error for sync exchange config/port data") commit 7e418833e689 ("scsi: zfcp: diagnostics buffer caching and use for exchange port data") commit 088210233e6f ("scsi: zfcp: add diagnostics buffer for exchange config data") (first released in v5.5), because these already cache all the information we need for that "re-do operation" - the information cached are always updated during xconf or xport data, so it won't be stale. In addition to the move and re-do, this patch also updates the function-documentation of `zfcp_scsi_adapter_register()` and changes how it reports if a Scsi_Host object already exists. In that case future recovery-operations can skip this step completely and behave much like they would do in the past - zfcp does not release a once allocated Scsi_Host object unless the corresponding FCP device is deconstructed completely. Link: https://lore.kernel.org/r/030dd6da318bbb529f0b5268ec65cebcd20fc0a3.1588956679.git.bblock@linux.ibm.com Reviewed-by: Steffen Maier Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_aux.c | 5 ++- drivers/s390/scsi/zfcp_diag.h | 6 ++-- drivers/s390/scsi/zfcp_erp.c | 58 +++++++++++++++++++++++++++++++++++ drivers/s390/scsi/zfcp_ext.h | 1 + drivers/s390/scsi/zfcp_fsf.c | 2 +- drivers/s390/scsi/zfcp_scsi.c | 40 ++++++++++++++++++------ 6 files changed, 96 insertions(+), 16 deletions(-) diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index 09ec846fe01d..18b713a616de 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -4,7 +4,7 @@ * * Module interface and handling of zfcp data structures. * - * Copyright IBM Corp. 2002, 2018 + * Copyright IBM Corp. 2002, 2020 */ /* @@ -415,8 +415,7 @@ struct zfcp_adapter *zfcp_adapter_enqueue(struct ccw_device *ccw_device) adapter->stat_read_buf_num = FSF_STATUS_READS_RECOM; - if (!zfcp_scsi_adapter_register(adapter)) - return adapter; + return adapter; failed: zfcp_adapter_unregister(adapter); diff --git a/drivers/s390/scsi/zfcp_diag.h b/drivers/s390/scsi/zfcp_diag.h index b9c93d15f67c..3852367f15f6 100644 --- a/drivers/s390/scsi/zfcp_diag.h +++ b/drivers/s390/scsi/zfcp_diag.h @@ -4,7 +4,7 @@ * * Definitions for handling diagnostics in the the zfcp device driver. * - * Copyright IBM Corp. 2018 + * Copyright IBM Corp. 2018, 2020 */ #ifndef ZFCP_DIAG_H @@ -56,11 +56,11 @@ struct zfcp_diag_adapter { unsigned long max_age; - struct { + struct zfcp_diag_adapter_port_data { struct zfcp_diag_header header; struct fsf_qtcb_bottom_port data; } port_data; - struct { + struct zfcp_diag_adapter_config_data { struct zfcp_diag_header header; struct fsf_qtcb_bottom_config data; } config_data; diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 283bcb985655..db320dab1fee 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -14,6 +14,7 @@ #include #include "zfcp_ext.h" #include "zfcp_reqlist.h" +#include "zfcp_diag.h" #define ZFCP_MAX_ERPS 3 @@ -804,6 +805,59 @@ static enum zfcp_erp_act_result zfcp_erp_adapter_strategy_open_fsf_xport( return ZFCP_ERP_SUCCEEDED; } +static enum zfcp_erp_act_result +zfcp_erp_adapter_strategy_alloc_shost(struct zfcp_adapter *const adapter) +{ + struct zfcp_diag_adapter_config_data *const config_data = + &adapter->diagnostics->config_data; + struct zfcp_diag_adapter_port_data *const port_data = + &adapter->diagnostics->port_data; + unsigned long flags; + int rc; + + rc = zfcp_scsi_adapter_register(adapter); + if (rc == -EEXIST) + return ZFCP_ERP_SUCCEEDED; + else if (rc) + return ZFCP_ERP_FAILED; + + /* + * We allocated the shost for the first time. Before it was NULL, + * and so we deferred all updates in the xconf- and xport-data + * handlers. We need to make up for that now, and make all the updates + * that would have been done before. + * + * We can be sure that xconf- and xport-data succeeded, because + * otherwise this function is not called. But they might have been + * incomplete. + */ + + spin_lock_irqsave(&config_data->header.access_lock, flags); + zfcp_scsi_shost_update_config_data(adapter, &config_data->data, + !!config_data->header.incomplete); + spin_unlock_irqrestore(&config_data->header.access_lock, flags); + + if (adapter->adapter_features & FSF_FEATURE_HBAAPI_MANAGEMENT) { + spin_lock_irqsave(&port_data->header.access_lock, flags); + zfcp_scsi_shost_update_port_data(adapter, &port_data->data); + spin_unlock_irqrestore(&port_data->header.access_lock, flags); + } + + /* + * There is a remote possibility that the 'Exchange Port Data' request + * reports a different connectivity status than 'Exchange Config Data'. + * But any change to the connectivity status of the local optic that + * happens after the initial xconf request is expected to be reported + * to us, as soon as we post Status Read Buffers to the FCP channel + * firmware after this function. So any resulting inconsistency will + * only be momentary. + */ + if (config_data->header.incomplete) + zfcp_fsf_fc_host_link_down(adapter); + + return ZFCP_ERP_SUCCEEDED; +} + static enum zfcp_erp_act_result zfcp_erp_adapter_strategy_open_fsf( struct zfcp_erp_action *act) { @@ -813,6 +867,10 @@ static enum zfcp_erp_act_result zfcp_erp_adapter_strategy_open_fsf( if (zfcp_erp_adapter_strategy_open_fsf_xport(act) == ZFCP_ERP_FAILED) return ZFCP_ERP_FAILED; + if (zfcp_erp_adapter_strategy_alloc_shost(act->adapter) == + ZFCP_ERP_FAILED) + return ZFCP_ERP_FAILED; + zfcp_erp_adapter_strategy_open_ptp_port(act->adapter); if (mempool_resize(act->adapter->pool.sr_data, diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 5dc9bdc5803f..3ef5d74331c3 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -135,6 +135,7 @@ extern int zfcp_fsf_send_els(struct zfcp_adapter *, u32, struct zfcp_fsf_ct_els *, unsigned int); extern int zfcp_fsf_fcp_cmnd(struct scsi_cmnd *); extern void zfcp_fsf_req_free(struct zfcp_fsf_req *); +extern void zfcp_fsf_fc_host_link_down(struct zfcp_adapter *adapter); extern struct zfcp_fsf_req *zfcp_fsf_fcp_task_mgmt(struct scsi_device *sdev, u8 tm_flags); extern struct zfcp_fsf_req *zfcp_fsf_abort_fcp_cmnd(struct scsi_cmnd *); diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 8c4b690e329e..c795f22249d8 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -120,7 +120,7 @@ static void zfcp_fsf_status_read_port_closed(struct zfcp_fsf_req *req) read_unlock_irqrestore(&adapter->port_list_lock, flags); } -static void zfcp_fsf_fc_host_link_down(struct zfcp_adapter *adapter) +void zfcp_fsf_fc_host_link_down(struct zfcp_adapter *adapter) { struct Scsi_Host *shost = adapter->scsi_host; diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index f98e4015a0ed..d58bf79892f2 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -451,26 +451,39 @@ static struct scsi_host_template zfcp_scsi_host_template = { }; /** - * zfcp_scsi_adapter_register - Register SCSI and FC host with SCSI midlayer + * zfcp_scsi_adapter_register() - Allocate and register SCSI and FC host with + * SCSI midlayer * @adapter: The zfcp adapter to register with the SCSI midlayer + * + * Allocates the SCSI host object for the given adapter, sets basic properties + * (such as the transport template, QDIO limits, ...), and registers it with + * the midlayer. + * + * During registration with the midlayer the corresponding FC host object for + * the referenced transport class is also implicitely allocated. + * + * Upon success adapter->scsi_host is set, and upon failure it remains NULL. If + * adapter->scsi_host is already set, nothing is done. + * + * Return: + * * 0 - Allocation and registration was successful + * * -EEXIST - SCSI and FC host did already exist, nothing was done, nothing + * was changed + * * -EIO - Allocation or registration failed */ int zfcp_scsi_adapter_register(struct zfcp_adapter *adapter) { struct ccw_dev_id dev_id; if (adapter->scsi_host) - return 0; + return -EEXIST; ccw_device_get_id(adapter->ccw_device, &dev_id); /* register adapter as SCSI host with mid layer of SCSI stack */ adapter->scsi_host = scsi_host_alloc(&zfcp_scsi_host_template, sizeof (struct zfcp_adapter *)); - if (!adapter->scsi_host) { - dev_err(&adapter->ccw_device->dev, - "Registering the FCP device with the " - "SCSI stack failed\n"); - return -EIO; - } + if (!adapter->scsi_host) + goto err_out; /* tell the SCSI stack some characteristics of this adapter */ adapter->scsi_host->max_id = 511; @@ -480,14 +493,23 @@ int zfcp_scsi_adapter_register(struct zfcp_adapter *adapter) adapter->scsi_host->max_cmd_len = 16; /* in struct fcp_cmnd */ adapter->scsi_host->transportt = zfcp_scsi_transport_template; + /* make all basic properties known at registration time */ + zfcp_qdio_shost_update(adapter, adapter->qdio); + zfcp_scsi_set_prot(adapter); + adapter->scsi_host->hostdata[0] = (unsigned long) adapter; if (scsi_add_host(adapter->scsi_host, &adapter->ccw_device->dev)) { scsi_host_put(adapter->scsi_host); - return -EIO; + goto err_out; } return 0; +err_out: + adapter->scsi_host = NULL; + dev_err(&adapter->ccw_device->dev, + "Registering the FCP device with the SCSI stack failed\n"); + return -EIO; } /** From 47742bde281b2920aae8bb82ed2d61d890aa4f56 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 9 May 2020 13:04:08 +0300 Subject: [PATCH 302/562] scsi: scsi_debug: Fix an error handling bug in sdeb_zbc_model_str() This test is checking the wrong variable. It should be testing "res". The "sdeb_zbc_model" variable is an enum (unsigned in this situation) and we never assign negative values to it. [mkp: fixed commit desc issue reported by Doug] Link: https://lore.kernel.org/r/20200509100408.GA5555@mwanda Fixes: 9267e0eb41fe ("scsi: scsi_debug: Add ZBC module parameter") Acked-by: Douglas Gilbert Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 105e563d87b4..73847366dc49 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -6460,7 +6460,7 @@ static int sdeb_zbc_model_str(const char *cp) res = sysfs_match_string(zbc_model_strs_b, cp); if (res < 0) { res = sysfs_match_string(zbc_model_strs_c, cp); - if (sdeb_zbc_model < 0) + if (res < 0) return -EINVAL; } } From 17793833f81ceb319be599ec09498ec0136d9acf Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 4 May 2020 16:25:41 +0300 Subject: [PATCH 303/562] RDMA/ucma: Return stable IB device index as identifier The librdmacm uses node_guid as identifier to correlate between IB devices and CMA devices. However FW resets cause to such "connection" to be lost and require from the user to restart its application. Extend UCMA to return IB device index, which is stable identifier. Link: https://lore.kernel.org/r/20200504132541.355710-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucma.c | 16 +++++++++------- include/uapi/rdma/rdma_user_cm.h | 4 ++++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 16b6cf57fa85..06127c800a49 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -845,7 +845,7 @@ static ssize_t ucma_query_route(struct ucma_file *file, struct sockaddr *addr; int ret = 0; - if (out_len < sizeof(resp)) + if (out_len < offsetof(struct rdma_ucm_query_route_resp, ibdev_index)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) @@ -869,6 +869,7 @@ static ssize_t ucma_query_route(struct ucma_file *file, goto out; resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; + resp.ibdev_index = ctx->cm_id->device->index; resp.port_num = ctx->cm_id->port_num; if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num)) @@ -880,8 +881,8 @@ static ssize_t ucma_query_route(struct ucma_file *file, out: mutex_unlock(&ctx->mutex); - if (copy_to_user(u64_to_user_ptr(cmd.response), - &resp, sizeof(resp))) + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, + min_t(size_t, out_len, sizeof(resp)))) ret = -EFAULT; ucma_put_ctx(ctx); @@ -895,6 +896,7 @@ static void ucma_query_device_addr(struct rdma_cm_id *cm_id, return; resp->node_guid = (__force __u64) cm_id->device->node_guid; + resp->ibdev_index = cm_id->device->index; resp->port_num = cm_id->port_num; resp->pkey = (__force __u16) cpu_to_be16( ib_addr_get_pkey(&cm_id->route.addr.dev_addr)); @@ -907,7 +909,7 @@ static ssize_t ucma_query_addr(struct ucma_context *ctx, struct sockaddr *addr; int ret = 0; - if (out_len < sizeof(resp)) + if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index)) return -ENOSPC; memset(&resp, 0, sizeof resp); @@ -922,7 +924,7 @@ static ssize_t ucma_query_addr(struct ucma_context *ctx, ucma_query_device_addr(ctx->cm_id, &resp); - if (copy_to_user(response, &resp, sizeof(resp))) + if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp)))) ret = -EFAULT; return ret; @@ -974,7 +976,7 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx, struct sockaddr_ib *addr; int ret = 0; - if (out_len < sizeof(resp)) + if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index)) return -ENOSPC; memset(&resp, 0, sizeof resp); @@ -1007,7 +1009,7 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx, &ctx->cm_id->route.addr.dst_addr); } - if (copy_to_user(response, &resp, sizeof(resp))) + if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp)))) ret = -EFAULT; return ret; diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index e42940a215a3..1bb6e75d254b 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -164,6 +164,8 @@ struct rdma_ucm_query_route_resp { __u32 num_paths; __u8 port_num; __u8 reserved[3]; + __u32 ibdev_index; + __u32 reserved1; }; struct rdma_ucm_query_addr_resp { @@ -175,6 +177,8 @@ struct rdma_ucm_query_addr_resp { __u16 dst_size; struct __kernel_sockaddr_storage src_addr; struct __kernel_sockaddr_storage dst_addr; + __u32 ibdev_index; + __u32 reserved1; }; struct rdma_ucm_query_path_resp { From 30661322b8c32e020c6638dc4317034ec95a1b3f Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Tue, 5 May 2020 18:30:06 +0800 Subject: [PATCH 304/562] RDMA/hns: Extend capability flags for HIP08_C 12 bits is not enough for HIP08_C, so extend a new field in length of 16 bits for it. Link: https://lore.kernel.org/r/1588674607-25337-3-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 ++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 +++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 5cac14d7be90..226df2040fdd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -209,6 +209,8 @@ enum { HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE = 0x07, }; +#define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12 + enum { HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0), HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1), diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index ad9a11a2cd0d..5aa56b7dc42b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1887,6 +1887,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->flags = roce_get_field(resp_c->cap_flags_num_pds, V2_QUERY_PF_CAPS_C_CAP_FLAGS_M, V2_QUERY_PF_CAPS_C_CAP_FLAGS_S); + caps->flags |= le16_to_cpu(resp_d->cap_flags_ex) << + HNS_ROCE_CAP_FLAGS_EX_SHIFT; + caps->num_cqs = 1 << roce_get_field(resp_c->max_gid_num_cqs, V2_QUERY_PF_CAPS_C_NUM_CQS_M, V2_QUERY_PF_CAPS_C_NUM_CQS_S); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 05bfe078d537..938b7b522faf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1647,7 +1647,7 @@ struct hns_roce_query_pf_caps_c { struct hns_roce_query_pf_caps_d { __le32 wq_hop_num_max_srqs; __le16 srq_depth; - __le16 rsv; + __le16 cap_flags_ex; __le32 num_ceqs_ceq_depth; __le32 arm_st_aeq_depth; __le32 num_uars_rsv_pds; From 90ae0b57e4a515342fe74ffa21f6972f5145d645 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Tue, 5 May 2020 18:30:07 +0800 Subject: [PATCH 305/562] RDMA/hns: Combine enable flags of qp It's easier to understand and maintain enable flags of qp using a single field in type of unsigned long than defining a field for every flags in the structure hns_roce_qp, and we can add new flags for features more conveniently in the future. Link: https://lore.kernel.org/r/1588674607-25337-4-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 7 +++---- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 22 ++++++++++----------- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 226df2040fdd..4fcd608ee55f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -131,8 +131,8 @@ enum { }; enum { - HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0, - HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1, + HNS_ROCE_QP_CAP_RQ_RECORD_DB = BIT(0), + HNS_ROCE_QP_CAP_SQ_RECORD_DB = BIT(1), }; enum { @@ -623,8 +623,7 @@ struct hns_roce_qp { struct hns_roce_wq rq; struct hns_roce_db rdb; struct hns_roce_db sdb; - u8 rdb_en; - u8 sdb_en; + unsigned long en_flags; u32 doorbell_qpn; u32 sq_signal_bits; struct hns_roce_wq sq; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 5aa56b7dc42b..ebe570aa2323 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3619,7 +3619,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, V2_QPC_BYTE_24_VLAN_ID_S, 0xfff); - if (hr_qp->rdb_en) + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB) roce_set_bit(context->byte_68_rq_db, V2_QPC_BYTE_68_RQ_RECORD_EN_S, 1); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index b5707596148d..dca979d8c345 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -750,8 +750,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, "Failed to map user SQ doorbell\n"); goto err_out; } - hr_qp->sdb_en = 1; - resp->cap_flags |= HNS_ROCE_SUPPORT_SQ_RECORD_DB; + hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB; + resp->cap_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB; } if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) { @@ -762,8 +762,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, "Failed to map user RQ doorbell\n"); goto err_sdb; } - hr_qp->rdb_en = 1; - resp->cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB; + hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB; + resp->cap_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB; } } else { /* QP doorbell register address */ @@ -780,13 +780,13 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, goto err_out; } *hr_qp->rdb.db_record = 0; - hr_qp->rdb_en = 1; + hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB; } } return 0; err_sdb: - if (udata && hr_qp->sdb_en) + if (udata && hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) hns_roce_db_unmap_user(uctx, &hr_qp->sdb); err_out: return ret; @@ -799,12 +799,12 @@ static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, udata, struct hns_roce_ucontext, ibucontext); if (udata) { - if (hr_qp->rdb_en) + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB) hns_roce_db_unmap_user(uctx, &hr_qp->rdb); - if (hr_qp->sdb_en) + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) hns_roce_db_unmap_user(uctx, &hr_qp->sdb); } else { - if (hr_qp->rdb_en) + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB) hns_roce_free_db(hr_dev, &hr_qp->rdb); } } @@ -1178,10 +1178,10 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (ibqp->uobject && (attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) { - if (hr_qp->sdb_en == 1) { + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) { hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr); - if (hr_qp->rdb_en == 1) + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB) hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); } else { ibdev_warn(&hr_dev->ib_dev, From 55ce24b3bfd75f76696a00f2666caaf806eebea2 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 12 May 2020 19:32:58 +0800 Subject: [PATCH 306/562] scsi: hisi_sas: Display proc_name in sysfs The 'proc_name' entry in sysfs for hisi_sas is 'null' now because it is not initialized in scsi_host_template. It looks like: [root@localhost ~]# cat /sys/class/scsi_host/host2/proc_name (null) While the other driver's entry looks like: linux-vnMQMU:~ # cat /sys/class/scsi_host/host0/proc_name megaraid_sas Link: https://lore.kernel.org/r/20200512113258.30781-1-yanaijie@huawei.com Cc: John Garry Cc: Xiang Chen Acked-by: John Garry Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_v1_hw.c | 1 + drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 1 + drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c index fa25766502a2..c205bff20943 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c @@ -1757,6 +1757,7 @@ static struct device_attribute *host_attrs_v1_hw[] = { static struct scsi_host_template sht_v1_hw = { .name = DRV_NAME, + .proc_name = DRV_NAME, .module = THIS_MODULE, .queuecommand = sas_queuecommand, .target_alloc = sas_target_alloc, diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index e05faf315dcd..c725cffe141e 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -3533,6 +3533,7 @@ static struct device_attribute *host_attrs_v2_hw[] = { static struct scsi_host_template sht_v2_hw = { .name = DRV_NAME, + .proc_name = DRV_NAME, .module = THIS_MODULE, .queuecommand = sas_queuecommand, .target_alloc = sas_target_alloc, diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 374885aa8d77..59b1421607dd 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -3071,6 +3071,7 @@ static int debugfs_set_bist_v3_hw(struct hisi_hba *hisi_hba, bool enable) static struct scsi_host_template sht_v3_hw = { .name = DRV_NAME, + .proc_name = DRV_NAME, .module = THIS_MODULE, .queuecommand = sas_queuecommand, .target_alloc = sas_target_alloc, From f8f2a576cb0c574044c049210ea4096e5cb1d7fc Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 6 May 2020 10:46:52 +0300 Subject: [PATCH 307/562] RDMA/addr: Mark addr_resolve as might_sleep() Under one path through ib_nl_fetch_ha() this calls nlmsg_new(GFP_KERNEL) which is a sleeping call. This is a very rare path, so mark fetch_ha() and the module external entry point that conditionally calls through to fetch_ha() as might_sleep(). Link: https://lore.kernel.org/r/20200506074701.9775-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/addr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 1753a9801b70..3a98439bba83 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -371,6 +371,8 @@ static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr, (const void *)&dst_in6->sin6_addr; sa_family_t family = dst_in->sa_family; + might_sleep(); + /* If we have a gateway in IB mode then it must be an IB network */ if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB) return ib_nl_fetch_ha(dev_addr, daddr, seq, family); @@ -727,6 +729,8 @@ int roce_resolve_route_from_path(struct sa_path_rec *rec, struct rdma_dev_addr dev_addr = {}; int ret; + might_sleep(); + if (rec->roce.route_resolved) return 0; From d3552fb65d239be4775dc9b24740efd1bd70b459 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 6 May 2020 10:46:53 +0300 Subject: [PATCH 308/562] RDMA/cm: Remove return code from add_cm_id_to_port_list This cannot happen, all callers pass in one of the two pointers. Use a WARN_ON guard instead. Link: https://lore.kernel.org/r/20200506074701.9775-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 68e1a9bba027..ea3910917a7b 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -474,24 +474,19 @@ static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, grh, &av->ah_attr); } -static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv, - struct cm_av *av, - struct cm_port *port) +static void add_cm_id_to_port_list(struct cm_id_private *cm_id_priv, + struct cm_av *av, struct cm_port *port) { unsigned long flags; - int ret = 0; spin_lock_irqsave(&cm.lock, flags); - if (&cm_id_priv->av == av) list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list); else if (&cm_id_priv->alt_av == av) list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list); else - ret = -EINVAL; - + WARN_ON(true); spin_unlock_irqrestore(&cm.lock, flags); - return ret; } static struct cm_port * @@ -572,12 +567,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, return ret; av->timeout = path->packet_life_time + 1; - - ret = add_cm_id_to_port_list(cm_id_priv, av, port); - if (ret) { - rdma_destroy_ah_attr(&new_ah_attr); - return ret; - } + add_cm_id_to_port_list(cm_id_priv, av, port); rdma_move_ah_attr(&av->ah_attr, &new_ah_attr); return 0; } From 42113eed8f10533ba419df535b6c0ff9141a948c Mon Sep 17 00:00:00 2001 From: Danit Goldberg Date: Wed, 6 May 2020 10:46:54 +0300 Subject: [PATCH 309/562] RDMA/cm: Remove unused store to ret in cm_rej_handler The 'goto out' label doesn't read ret, so don't set it. Link: https://lore.kernel.org/r/20200506074701.9775-4-leon@kernel.org Signed-off-by: Danit Goldberg Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index ea3910917a7b..c12fd673678d 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3056,7 +3056,6 @@ static int cm_rej_handler(struct cm_work *work) __func__, be32_to_cpu(cm_id_priv->id.local_id), cm_id_priv->id.state); spin_unlock_irq(&cm_id_priv->lock); - ret = -EINVAL; goto out; } From e83f195aa45c1ffd73b3a950a887e41c260cf194 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 6 May 2020 10:46:55 +0300 Subject: [PATCH 310/562] RDMA/cm: Pull duplicated code into cm_queue_work_unlock() While unlocking a spinlock held by the caller is a disturbing pattern, this extensively duplicated code is even worse. Pull all the duplicates into a function and explain the purpose of the algorithm. The on creation side call in cm_req_handler() which is different has been micro-optimized on the basis that the work_count == -1 during creation, remove that and just use the normal function. Link: https://lore.kernel.org/r/20200506074701.9775-5-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 146 +++++++++++------------------------ 1 file changed, 44 insertions(+), 102 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index c12fd673678d..f56494de3c77 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -81,8 +81,11 @@ const char *__attribute_const__ ibcm_reject_msg(int reason) EXPORT_SYMBOL(ibcm_reject_msg); struct cm_id_private; +struct cm_work; static int cm_add_one(struct ib_device *device); static void cm_remove_one(struct ib_device *device, void *client_data); +static void cm_process_work(struct cm_id_private *cm_id_priv, + struct cm_work *work); static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv, struct ib_cm_sidr_rep_param *param); static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv, @@ -907,6 +910,35 @@ static void cm_free_work(struct cm_work *work) kfree(work); } +static void cm_queue_work_unlock(struct cm_id_private *cm_id_priv, + struct cm_work *work) +{ + bool immediate; + + /* + * To deliver the event to the user callback we have the drop the + * spinlock, however, we need to ensure that the user callback is single + * threaded and receives events in the temporal order. If there are + * already events being processed then thread new events onto a list, + * the thread currently processing will pick them up. + */ + immediate = atomic_inc_and_test(&cm_id_priv->work_count); + if (!immediate) { + list_add_tail(&work->list, &cm_id_priv->work_list); + /* + * This routine always consumes incoming reference. Once queued + * to the work_list then a reference is held by the thread + * currently running cm_process_work() and this reference is not + * needed. + */ + cm_deref_id(cm_id_priv); + } + spin_unlock_irq(&cm_id_priv->lock); + + if (immediate) + cm_process_work(cm_id_priv, work); +} + static inline int cm_convert_to_ms(int iba_time) { /* approximate conversion to ms from 4.096us x 2^iba_time */ @@ -2144,9 +2176,7 @@ static int cm_req_handler(struct cm_work *work) /* Refcount belongs to the event, pairs with cm_process_work() */ refcount_inc(&cm_id_priv->refcount); - atomic_inc(&cm_id_priv->work_count); - spin_unlock_irq(&cm_id_priv->lock); - cm_process_work(cm_id_priv, work); + cm_queue_work_unlock(cm_id_priv, work); /* * Since this ID was just created and was not made visible to other MAD * handlers until the cm_finalize_id() above we know that the @@ -2492,15 +2522,7 @@ static int cm_rep_handler(struct cm_work *work) cm_id_priv->alt_av.timeout - 1); ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); + cm_queue_work_unlock(cm_id_priv, work); return 0; error: @@ -2511,7 +2533,6 @@ static int cm_rep_handler(struct cm_work *work) static int cm_establish_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; - int ret; /* See comment in cm_establish about lookup. */ cm_id_priv = cm_acquire_id(work->local_id, work->remote_id); @@ -2525,15 +2546,7 @@ static int cm_establish_handler(struct cm_work *work) } ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); + cm_queue_work_unlock(cm_id_priv, work); return 0; out: cm_deref_id(cm_id_priv); @@ -2544,7 +2557,6 @@ static int cm_rtu_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rtu_msg *rtu_msg; - int ret; rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id( @@ -2567,15 +2579,7 @@ static int cm_rtu_handler(struct cm_work *work) cm_id_priv->id.state = IB_CM_ESTABLISHED; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); + cm_queue_work_unlock(cm_id_priv, work); return 0; out: cm_deref_id(cm_id_priv); @@ -2768,7 +2772,6 @@ static int cm_dreq_handler(struct cm_work *work) struct cm_id_private *cm_id_priv; struct cm_dreq_msg *dreq_msg; struct ib_mad_send_buf *msg = NULL; - int ret; dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id( @@ -2833,15 +2836,7 @@ static int cm_dreq_handler(struct cm_work *work) } cm_id_priv->id.state = IB_CM_DREQ_RCVD; cm_id_priv->tid = dreq_msg->hdr.tid; - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); + cm_queue_work_unlock(cm_id_priv, work); return 0; unlock: spin_unlock_irq(&cm_id_priv->lock); @@ -2853,7 +2848,6 @@ static int cm_drep_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_drep_msg *drep_msg; - int ret; drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id( @@ -2874,15 +2868,7 @@ static int cm_drep_handler(struct cm_work *work) cm_enter_timewait(cm_id_priv); ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); + cm_queue_work_unlock(cm_id_priv, work); return 0; out: cm_deref_id(cm_id_priv); @@ -3010,7 +2996,6 @@ static int cm_rej_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rej_msg *rej_msg; - int ret; rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_rejected_id(rej_msg); @@ -3059,15 +3044,7 @@ static int cm_rej_handler(struct cm_work *work) goto out; } - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); + cm_queue_work_unlock(cm_id_priv, work); return 0; out: cm_deref_id(cm_id_priv); @@ -3177,7 +3154,7 @@ static int cm_mra_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_mra_msg *mra_msg; - int timeout, ret; + int timeout; mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_mraed_id(mra_msg); @@ -3237,15 +3214,7 @@ static int cm_mra_handler(struct cm_work *work) cm_id_priv->msg->context[1] = (void *) (unsigned long) cm_id_priv->id.state; - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); + cm_queue_work_unlock(cm_id_priv, work); return 0; out: spin_unlock_irq(&cm_id_priv->lock); @@ -3380,15 +3349,7 @@ static int cm_lap_handler(struct cm_work *work) cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; cm_id_priv->tid = lap_msg->hdr.tid; - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); + cm_queue_work_unlock(cm_id_priv, work); return 0; unlock: spin_unlock_irq(&cm_id_priv->lock); @@ -3400,7 +3361,6 @@ static int cm_apr_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_apr_msg *apr_msg; - int ret; /* Currently Alternate path messages are not supported for * RoCE link layer. @@ -3435,16 +3395,7 @@ static int cm_apr_handler(struct cm_work *work) cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); cm_id_priv->msg = NULL; - - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); + cm_queue_work_unlock(cm_id_priv, work); return 0; out: cm_deref_id(cm_id_priv); @@ -3455,7 +3406,6 @@ static int cm_timewait_handler(struct cm_work *work) { struct cm_timewait_info *timewait_info; struct cm_id_private *cm_id_priv; - int ret; timewait_info = container_of(work, struct cm_timewait_info, work); spin_lock_irq(&cm.lock); @@ -3474,15 +3424,7 @@ static int cm_timewait_handler(struct cm_work *work) goto out; } cm_id_priv->id.state = IB_CM_IDLE; - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); + cm_queue_work_unlock(cm_id_priv, work); return 0; out: cm_deref_id(cm_id_priv); From 9767a27e1aeb462812e9f054c313180eeff0b5c3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 6 May 2020 10:46:56 +0300 Subject: [PATCH 311/562] RDMA/cm: Pass the cm_id_private into cm_cleanup_timewait Also rename it to cm_remove_remote(). This function now removes the tracking of the remote ID/QPN in the redblack trees from a cm_id_private. Replace a open-coded version with a call. The open coded version was deleting only the remote_id, however at this call site the qpn can not have been in the RB tree either, so the cm_remove_remote() will do the same. Link: https://lore.kernel.org/r/20200506074701.9775-6-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index f56494de3c77..6a2a5f6e6d90 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -964,8 +964,10 @@ static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time) return min(31, ack_timeout); } -static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) +static void cm_remove_remote(struct cm_id_private *cm_id_priv) { + struct cm_timewait_info *timewait_info = cm_id_priv->timewait_info; + if (timewait_info->inserted_remote_id) { rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table); timewait_info->inserted_remote_id = 0; @@ -1004,7 +1006,7 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) return; spin_lock_irqsave(&cm.lock, flags); - cm_cleanup_timewait(cm_id_priv->timewait_info); + cm_remove_remote(cm_id_priv); list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list); spin_unlock_irqrestore(&cm.lock, flags); @@ -1035,7 +1037,7 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) cm_id_priv->id.state = IB_CM_IDLE; if (cm_id_priv->timewait_info) { spin_lock_irqsave(&cm.lock, flags); - cm_cleanup_timewait(cm_id_priv->timewait_info); + cm_remove_remote(cm_id_priv); spin_unlock_irqrestore(&cm.lock, flags); kfree(cm_id_priv->timewait_info); cm_id_priv->timewait_info = NULL; @@ -1136,7 +1138,7 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err) spin_lock(&cm.lock); /* Required for cleanup paths related cm_req_handler() */ if (cm_id_priv->timewait_info) { - cm_cleanup_timewait(cm_id_priv->timewait_info); + cm_remove_remote(cm_id_priv); kfree(cm_id_priv->timewait_info); cm_id_priv->timewait_info = NULL; } @@ -1971,7 +1973,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, /* Check for stale connections. */ timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info); if (timewait_info) { - cm_cleanup_timewait(cm_id_priv->timewait_info); + cm_remove_remote(cm_id_priv); cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id, timewait_info->work.remote_id); @@ -1992,7 +1994,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, cm_id_priv->id.device, cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg))); if (!listen_cm_id_priv) { - cm_cleanup_timewait(cm_id_priv->timewait_info); + cm_remove_remote(cm_id_priv); spin_unlock_irq(&cm.lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ, @@ -2475,9 +2477,7 @@ static int cm_rep_handler(struct cm_work *work) /* Check for a stale connection. */ timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info); if (timewait_info) { - rb_erase(&cm_id_priv->timewait_info->remote_id_node, - &cm.remote_id_table); - cm_id_priv->timewait_info->inserted_remote_id = 0; + cm_remove_remote(cm_id_priv); cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id, timewait_info->work.remote_id); From 09fb406a569b2f4b596048e1cfa2c92a35b8fc9b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 6 May 2020 10:46:57 +0300 Subject: [PATCH 312/562] RDMA/cm: Add a note explaining how the timewait is eventually freed The way the cm_timewait_info is converted into a work and then freed is very subtle and surprising, add a note clarifying the lifetime here. Link: https://lore.kernel.org/r/20200506074701.9775-7-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 6a2a5f6e6d90..74c46b0272b9 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1025,6 +1025,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) msecs_to_jiffies(wait_time)); spin_unlock_irqrestore(&cm.lock, flags); + /* + * The timewait_info is converted into a work and gets freed during + * cm_free_work() in cm_timewait_handler(). + */ + BUILD_BUG_ON(offsetof(struct cm_timewait_info, work) != 0); cm_id_priv->timewait_info = NULL; } From cfa68b0d04401b3ae733787b1c72ab1266e852ab Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 6 May 2020 10:46:58 +0300 Subject: [PATCH 313/562] RDMA/cm: Make find_remote_id() return a cm_id_private The only caller doesn't care about the timewait, so acquire and return the cm_id_private from the function. Link: https://lore.kernel.org/r/20200506074701.9775-8-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 74c46b0272b9..4f77ca2ba8fc 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -738,12 +738,14 @@ static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info return NULL; } -static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid, - __be32 remote_id) +static struct cm_id_private *cm_find_remote_id(__be64 remote_ca_guid, + __be32 remote_id) { struct rb_node *node = cm.remote_id_table.rb_node; struct cm_timewait_info *timewait_info; + struct cm_id_private *res = NULL; + spin_lock_irq(&cm.lock); while (node) { timewait_info = rb_entry(node, struct cm_timewait_info, remote_id_node); @@ -755,10 +757,14 @@ static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid, node = node->rb_left; else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid)) node = node->rb_right; - else - return timewait_info; + else { + res = cm_acquire_id(timewait_info->work.local_id, + timewait_info->work.remote_id); + break; + } } - return NULL; + spin_unlock_irq(&cm.lock); + return res; } static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info @@ -2966,24 +2972,15 @@ static void cm_format_rej_event(struct cm_work *work) static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) { - struct cm_timewait_info *timewait_info; struct cm_id_private *cm_id_priv; __be32 remote_id; remote_id = cpu_to_be32(IBA_GET(CM_REJ_LOCAL_COMM_ID, rej_msg)); if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_TIMEOUT) { - spin_lock_irq(&cm.lock); - timewait_info = cm_find_remote_id( + cm_id_priv = cm_find_remote_id( *((__be64 *)IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg)), remote_id); - if (!timewait_info) { - spin_unlock_irq(&cm.lock); - return NULL; - } - cm_id_priv = - cm_acquire_id(timewait_info->work.local_id, remote_id); - spin_unlock_irq(&cm.lock); } else if (IBA_GET(CM_REJ_MESSAGE_REJECTED, rej_msg) == CM_MSG_RESPONSE_REQ) cm_id_priv = cm_acquire_id( From 1cc44279f2973b413bae69d9a0bf01f051f382b0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 6 May 2020 10:46:59 +0300 Subject: [PATCH 314/562] RDMA/cm: Remove the cm_free_id() wrapper function Just call xa_erase directly during cm_destroy_id() Link: https://lore.kernel.org/r/20200506074701.9775-9-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 4f77ca2ba8fc..320fe89af5cb 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -580,11 +580,6 @@ static u32 cm_local_id(__be32 local_id) return (__force u32) (local_id ^ cm.random_id_operand); } -static void cm_free_id(__be32 local_id) -{ - xa_erase_irq(&cm.local_id_table, cm_local_id(local_id)); -} - static struct cm_id_private *cm_acquire_id(__be32 local_id, __be32 remote_id) { struct cm_id_private *cm_id_priv; @@ -1136,7 +1131,7 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err) case IB_CM_TIMEWAIT: /* * The cm_acquire_id in cm_timewait_handler will stop working - * once we do cm_free_id() below, so just move to idle here for + * once we do xa_erase below, so just move to idle here for * consistency. */ cm_id->state = IB_CM_IDLE; @@ -1166,7 +1161,7 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err) spin_unlock(&cm.lock); spin_unlock_irq(&cm_id_priv->lock); - cm_free_id(cm_id->local_id); + xa_erase_irq(&cm.local_id_table, cm_local_id(cm_id->local_id)); cm_deref_id(cm_id_priv); wait_for_completion(&cm_id_priv->comp); while ((work = cm_dequeue_work(cm_id_priv)) != NULL) From 51e8463cfc48ca030e532c9127ee1219a95795c3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 6 May 2020 10:47:00 +0300 Subject: [PATCH 315/562] RDMA/cm: Remove needless cm_id variable Just put the expression in the only reader Link: https://lore.kernel.org/r/20200506074701.9775-10-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 320fe89af5cb..15cd5253d2c7 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1958,7 +1958,6 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv; struct cm_timewait_info *timewait_info; struct cm_req_msg *req_msg; - struct ib_cm_id *cm_id; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; @@ -1988,8 +1987,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, NULL, 0); if (cur_cm_id_priv) { - cm_id = &cur_cm_id_priv->id; - ib_send_cm_dreq(cm_id, NULL, 0); + ib_send_cm_dreq(&cur_cm_id_priv->id, NULL, 0); cm_deref_id(cur_cm_id_priv); } return NULL; @@ -2433,7 +2431,6 @@ static int cm_rep_handler(struct cm_work *work) struct cm_rep_msg *rep_msg; int ret; struct cm_id_private *cur_cm_id_priv; - struct ib_cm_id *cm_id; struct cm_timewait_info *timewait_info; rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; @@ -2499,8 +2496,7 @@ static int cm_rep_handler(struct cm_work *work) IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)); if (cur_cm_id_priv) { - cm_id = &cur_cm_id_priv->id; - ib_send_cm_dreq(cm_id, NULL, 0); + ib_send_cm_dreq(&cur_cm_id_priv->id, NULL, 0); cm_deref_id(cur_cm_id_priv); } From a0e46db4e764f56c61f85c235c50bf4578c51a47 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 6 May 2020 10:47:01 +0300 Subject: [PATCH 316/562] RDMA/cm: Increment the refcount inside cm_find_listen() All callers need the 'get', so do it in a central place before returning the pointer. Link: https://lore.kernel.org/r/20200506074701.9775-11-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 15cd5253d2c7..fb47cd55ce42 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -686,9 +686,10 @@ static struct cm_id_private * cm_find_listen(struct ib_device *device, cm_id_priv = rb_entry(node, struct cm_id_private, service_node); if ((cm_id_priv->id.service_mask & service_id) == cm_id_priv->id.service_id && - (cm_id_priv->id.device == device)) + (cm_id_priv->id.device == device)) { + refcount_inc(&cm_id_priv->refcount); return cm_id_priv; - + } if (device < cm_id_priv->id.device) node = node->rb_left; else if (device > cm_id_priv->id.device) @@ -2005,7 +2006,6 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, NULL, 0); return NULL; } - refcount_inc(&listen_cm_id_priv->refcount); spin_unlock_irq(&cm.lock); return listen_cm_id_priv; } @@ -3564,7 +3564,6 @@ static int cm_sidr_req_handler(struct cm_work *work) .status = IB_SIDR_UNSUPPORTED }); goto out; /* No match. */ } - refcount_inc(&listen_cm_id_priv->refcount); spin_unlock_irq(&cm.lock); cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; From 0cb9e4f9e98af4b969b9140e964c646ad6ed7d40 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 13:53:42 -0500 Subject: [PATCH 317/562] IB/rdmavt: Replace zero-length array with flexible-array The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Link: https://lore.kernel.org/r/20200507185342.GA14476@embeddedor Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jason Gunthorpe --- include/rdma/rdmavt_qp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 5fc10108703a..982bf2340840 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -440,7 +440,7 @@ struct rvt_qp { /* * This sge list MUST be last. Do not add anything below here. */ - struct rvt_sge r_sg_list[0] /* verified SGEs */ + struct rvt_sge r_sg_list[] /* verified SGEs */ ____cacheline_aligned_in_smp; }; From b9019507aa6e3e6a039573eb6743b38bf846771b Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 4 May 2020 08:30:11 +0300 Subject: [PATCH 318/562] RDMA/mlx5: Refactor DV create flow Move part of the code that get the destinations into function so the code will be more readable. In addition change the variables definition to be in reversed christmas tree. Link: https://lore.kernel.org/r/20200504053012.270689-4-leon@kernel.org Signed-off-by: Maor Gottlieb Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/flow.c | 108 ++++++++++++++++-------------- 1 file changed, 59 insertions(+), 49 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index 08fd6a650868..5533b5083c29 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -67,40 +67,18 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { }, }; -#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2 -static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( - struct uverbs_attr_bundle *attrs) +static int get_dests(struct uverbs_attr_bundle *attrs, + struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id, + int *dest_type, struct ib_qp **qp) { - struct mlx5_flow_context flow_context = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; - struct mlx5_ib_flow_handler *flow_handler; - struct mlx5_ib_flow_matcher *fs_matcher; - struct ib_uobject **arr_flow_actions; - struct ib_uflow_resources *uflow_res; - struct mlx5_flow_act flow_act = {}; - void *devx_obj; - int dest_id, dest_type; - void *cmd_in; - int inlen; bool dest_devx, dest_qp; - struct ib_qp *qp = NULL; - struct ib_uobject *uobj = - uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); - struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); - int len, ret, i; - u32 counter_id = 0; - u32 *offset_attr; - u32 offset = 0; + void *devx_obj; - if (!capable(CAP_NET_RAW)) - return -EPERM; - - dest_devx = - uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); + dest_devx = uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); dest_qp = uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); - fs_matcher = uverbs_attr_get_obj(attrs, - MLX5_IB_ATTR_CREATE_FLOW_MATCHER); if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS && ((dest_devx && dest_qp) || (!dest_devx && !dest_qp))) return -EINVAL; @@ -114,43 +92,79 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( ((!dest_devx && !dest_qp) || (dest_devx && dest_qp))) return -EINVAL; + *qp = NULL; if (dest_devx) { - devx_obj = uverbs_attr_get_obj( - attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); - if (IS_ERR(devx_obj)) - return PTR_ERR(devx_obj); + devx_obj = + uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); /* Verify that the given DEVX object is a flow * steering destination. */ - if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type)) + if (!mlx5_ib_devx_is_flow_dest(devx_obj, dest_id, dest_type)) return -EINVAL; /* Allow only flow table as dest when inserting to FDB or RDMA_RX */ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB || fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && - dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) return -EINVAL; } else if (dest_qp) { struct mlx5_ib_qp *mqp; - qp = uverbs_attr_get_obj(attrs, - MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); - if (IS_ERR(qp)) - return PTR_ERR(qp); + *qp = uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); + if (IS_ERR(*qp)) + return PTR_ERR(*qp); - if (qp->qp_type != IB_QPT_RAW_PACKET) + if ((*qp)->qp_type != IB_QPT_RAW_PACKET) return -EINVAL; - mqp = to_mqp(qp); + mqp = to_mqp(*qp); if (mqp->is_rss) - dest_id = mqp->rss_qp.tirn; + *dest_id = mqp->rss_qp.tirn; else - dest_id = mqp->raw_packet_qp.rq.tirn; - dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; - } else { - dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; + *dest_id = mqp->raw_packet_qp.rq.tirn; + *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { + *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; } + if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR && + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) + return -EINVAL; + + return 0; +} + +#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2 +static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_flow_context flow_context = {.flow_tag = + MLX5_FS_DEFAULT_FLOW_TAG}; + u32 *offset_attr, offset = 0, counter_id = 0; + int dest_id, dest_type, inlen, len, ret, i; + struct mlx5_ib_flow_handler *flow_handler; + struct mlx5_ib_flow_matcher *fs_matcher; + struct ib_uobject **arr_flow_actions; + struct ib_uflow_resources *uflow_res; + struct mlx5_flow_act flow_act = {}; + struct ib_qp *qp = NULL; + void *devx_obj, *cmd_in; + struct ib_uobject *uobj; + struct mlx5_ib_dev *dev; + + if (!capable(CAP_NET_RAW)) + return -EPERM; + + fs_matcher = uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_MATCHER); + uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); + dev = mlx5_udata_to_mdev(&attrs->driver_udata); + + if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp)) + return -EINVAL; + len = uverbs_attr_get_uobjs_arr(attrs, MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions); if (len) { @@ -180,10 +194,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; } - if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR && - fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) - return -EINVAL; - cmd_in = uverbs_attr_get_alloced_ptr( attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); inlen = uverbs_attr_get_len(attrs, From 8c112a5f29a343f89072bed4b9fa176fea226798 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 4 May 2020 08:30:12 +0300 Subject: [PATCH 319/562] RDMA/mlx5: Add support in steering default miss User can configure default miss rule in order to skip matching in the user domain and forward the packet to the kernel steering domain. When user requests a default miss rule, we add steering rule to forward the traffic to the next namespace. Link: https://lore.kernel.org/r/20200504053012.270689-5-leon@kernel.org Signed-off-by: Maor Gottlieb Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/flow.c | 32 +++++++++++++++++++----- drivers/infiniband/hw/mlx5/main.c | 9 +++---- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 5 ++++ 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index 5533b5083c29..3fa66474afa6 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -69,19 +69,32 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { static int get_dests(struct uverbs_attr_bundle *attrs, struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id, - int *dest_type, struct ib_qp **qp) + int *dest_type, struct ib_qp **qp, bool *def_miss) { bool dest_devx, dest_qp; void *devx_obj; + u32 flags; + int err; dest_devx = uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); dest_qp = uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS && - ((dest_devx && dest_qp) || (!dest_devx && !dest_qp))) - return -EINVAL; + *def_miss = false; + err = uverbs_get_flags32(&flags, attrs, + MLX5_IB_ATTR_CREATE_FLOW_FLAGS, + MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS); + if (err) + return err; + *def_miss = flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS; + + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { + if (dest_devx && (dest_qp || *def_miss)) + return -EINVAL; + else if (dest_qp && *def_miss) + return -EINVAL; + } /* Allow only DEVX object as dest when inserting to FDB */ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !dest_devx) @@ -153,6 +166,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( void *devx_obj, *cmd_in; struct ib_uobject *uobj; struct mlx5_ib_dev *dev; + bool def_miss; if (!capable(CAP_NET_RAW)) return -EPERM; @@ -162,9 +176,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); dev = mlx5_udata_to_mdev(&attrs->driver_udata); - if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp)) + if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &def_miss)) return -EINVAL; + if (def_miss) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS; + len = uverbs_attr_get_uobjs_arr(attrs, MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions); if (len) { @@ -636,7 +653,10 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, UVERBS_ATTR_MIN_SIZE(sizeof(u32)), UA_OPTIONAL, - UA_ALLOC_AND_COPY)); + UA_ALLOC_AND_COPY), + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS, + enum mlx5_ib_create_flow_flags, + UA_OPTIONAL)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( MLX5_IB_METHOD_DESTROY_FLOW, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 3af57dfe7224..38bf3841741c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4200,18 +4200,17 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { dst[dst_num].type = dest_type; - dst[dst_num].tir_num = dest_id; + dst[dst_num++].tir_num = dest_id; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; - dst[dst_num].ft_num = dest_id; + dst[dst_num++].ft_num = dest_id; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } else { - dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_PORT; + } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) { + dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; } - dst_num++; if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 24f3388c3182..07cf54333193 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -241,6 +241,10 @@ enum mlx5_ib_flow_type { MLX5_IB_FLOW_TYPE_MC_DEFAULT, }; +enum mlx5_ib_create_flow_flags { + MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS = 1 << 0, +}; + enum mlx5_ib_create_flow_attrs { MLX5_IB_ATTR_CREATE_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT), MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE, @@ -251,6 +255,7 @@ enum mlx5_ib_create_flow_attrs { MLX5_IB_ATTR_CREATE_FLOW_TAG, MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, + MLX5_IB_ATTR_CREATE_FLOW_FLAGS, }; enum mlx5_ib_destoy_flow_attrs { From f29de9eee78253d9ae57cd58a6b21eed021742c8 Mon Sep 17 00:00:00 2001 From: Daria Velikovsky Date: Mon, 4 May 2020 08:42:27 +0300 Subject: [PATCH 320/562] RDMA/mlx5: Add support for drop action in DV steering When drop action is used the matching packet will stop processing in steering and will be dropped. This functionality will allow users to drop matching packets. Link: https://lore.kernel.org/r/20200504054227.271486-1-leon@kernel.org Signed-off-by: Daria Velikovsky Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/flow.c | 35 ++++++++++++++---------- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 1 + 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index 3fa66474afa6..6fa1a510c5d7 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -69,11 +69,10 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { static int get_dests(struct uverbs_attr_bundle *attrs, struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id, - int *dest_type, struct ib_qp **qp, bool *def_miss) + int *dest_type, struct ib_qp **qp, u32 *flags) { bool dest_devx, dest_qp; void *devx_obj; - u32 flags; int err; dest_devx = uverbs_attr_is_valid(attrs, @@ -81,23 +80,28 @@ static int get_dests(struct uverbs_attr_bundle *attrs, dest_qp = uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); - *def_miss = false; - err = uverbs_get_flags32(&flags, attrs, - MLX5_IB_ATTR_CREATE_FLOW_FLAGS, - MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS); + *flags = 0; + err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS, + MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS | + MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP); if (err) return err; - *def_miss = flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS; + + /* Both flags are not allowed */ + if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS && + *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP) + return -EINVAL; if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { - if (dest_devx && (dest_qp || *def_miss)) + if (dest_devx && (dest_qp || *flags)) return -EINVAL; - else if (dest_qp && *def_miss) + else if (dest_qp && *flags) return -EINVAL; } - /* Allow only DEVX object as dest when inserting to FDB */ - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !dest_devx) + /* Allow only DEVX object, drop as dest for FDB */ + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !(dest_devx || + (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP))) return -EINVAL; /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */ @@ -166,7 +170,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( void *devx_obj, *cmd_in; struct ib_uobject *uobj; struct mlx5_ib_dev *dev; - bool def_miss; + u32 flags; if (!capable(CAP_NET_RAW)) return -EPERM; @@ -176,12 +180,15 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); dev = mlx5_udata_to_mdev(&attrs->driver_udata); - if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &def_miss)) + if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags)) return -EINVAL; - if (def_miss) + if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS) flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS; + if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + len = uverbs_attr_get_uobjs_arr(attrs, MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions); if (len) { diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 07cf54333193..8e316ef896b5 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -243,6 +243,7 @@ enum mlx5_ib_flow_type { enum mlx5_ib_create_flow_flags { MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS = 1 << 0, + MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP = 1 << 1, }; enum mlx5_ib_create_flow_attrs { From 59dde4d19cf8de232c17c79c08e0db67636b022b Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 13 May 2020 13:08:09 +0300 Subject: [PATCH 321/562] RDMA/mlx5: Fix query_srq_cmd() function The output buffer used in mlx5_cmd_exec_inout() was wrongly changed from pre-allocated srq_out pointer to an input "out" point. That leads to unpredictable results in the get_srqc() call later. Fixes: 31578defe4eb ("RDMA/mlx5: Update mlx5_ib to use new cmd interface") Link: https://lore.kernel.org/r/20200513100809.246315-1-leon@kernel.org Reported-by: Dan Carpenter Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/srq_cmd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index bc50a712bf2e..6f5eadc4d183 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -169,16 +169,16 @@ static int query_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(query_srq_in, in, opcode, MLX5_CMD_OP_QUERY_SRQ); MLX5_SET(query_srq_in, in, srqn, srq->srqn); - err = mlx5_cmd_exec_inout(dev->mdev, query_srq, in, out); + err = mlx5_cmd_exec_inout(dev->mdev, query_srq, in, srq_out); if (err) goto out; - srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); + srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry); get_srqc(srqc, out); if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD) out->flags |= MLX5_SRQ_FLAG_ERR; out: - kvfree(out); + kvfree(srq_out); return err; } From 25c21d20bcfdb97ba869007b284a5dbf5328ffe2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 May 2020 12:37:03 +0300 Subject: [PATCH 322/562] scsi: aacraid: Fix an oops in error handling If the memdup_user() function fails then it results in an Oops in the error handling code when we try to kfree() and error pointer. Link: https://lore.kernel.org/r/20200513093703.GB347693@mwanda Fixes: 8d925b1f00e6 ("scsi: aacraid: Use memdup_user() as a cleanup") Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/commctrl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c index 102658bdc15a..34e65dea992e 100644 --- a/drivers/scsi/aacraid/commctrl.c +++ b/drivers/scsi/aacraid/commctrl.c @@ -516,6 +516,7 @@ static int aac_send_raw_srb(struct aac_dev* dev, void __user * arg) user_srbcmd = memdup_user(user_srb, fibsize); if (IS_ERR(user_srbcmd)) { rcode = PTR_ERR(user_srbcmd); + user_srbcmd = NULL; goto cleanup; } From 21d2b76831fdee3754eeee995e15080098603ec5 Mon Sep 17 00:00:00 2001 From: ChenTao Date: Thu, 14 May 2020 09:26:55 +0800 Subject: [PATCH 323/562] scsi: ufs-mediatek: Make ufs_mtk_fixup_dev_quirks static Fix the following warning: drivers/scsi/ufs/ufs-mediatek.c:585:6: warning: symbol 'ufs_mtk_fixup_dev_quirks' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200514012655.127202-1-chentao107@huawei.com Reported-by: Hulk Robot Reviewed-by: Stanley Chu Signed-off-by: ChenTao Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-mediatek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c index c543142554d3..73e4a4f9a3a2 100644 --- a/drivers/scsi/ufs/ufs-mediatek.c +++ b/drivers/scsi/ufs/ufs-mediatek.c @@ -582,7 +582,7 @@ static int ufs_mtk_apply_dev_quirks(struct ufs_hba *hba) return 0; } -void ufs_mtk_fixup_dev_quirks(struct ufs_hba *hba) +static void ufs_mtk_fixup_dev_quirks(struct ufs_hba *hba) { struct ufs_dev_info *dev_info = &hba->dev_info; u16 mid = dev_info->wmanufacturerid; From 0bd735df7681e41647a69b1fad0eb5594c60a727 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 14 May 2020 17:19:53 +0900 Subject: [PATCH 324/562] scsi: sd: Signal drive managed SMR disks Print a message indicating that a disk is a drive-managed SMR model when such drive is found using the ZONED field of the Block Device Characteristics VPD page (IDENTIFY data on ATA side). [mkp: typo] Link: https://lore.kernel.org/r/20200514081953.1252087-1-damien.lemoal@wdc.com Reviewed-by: Johannes Thumshirn Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index a793cb08d025..8929e178c6f8 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2955,6 +2955,9 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp) * with partitions as regular block devices. */ q->limits.zoned = BLK_ZONED_NONE; + if (sdkp->zoned == 2 && sdkp->first_scan) + sd_printk(KERN_NOTICE, sdkp, + "Drive-managed SMR disk\n"); } } if (blk_queue_is_zoned(q) && sdkp->first_scan) From 90b8491c0033915ec1b290bc1c0de27935cb132d Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Sat, 9 May 2020 17:37:13 +0800 Subject: [PATCH 325/562] scsi: ufs: Introduce ufs_hba_variant_params to group customizable parameters The UFS driver is growing more and more customizable parameters. Collect them in one place. Link: https://lore.kernel.org/r/20200509093716.21010-2-stanley.chu@mediatek.com Reviewed-by: Asutosh Das Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-mediatek.c | 4 ++-- drivers/scsi/ufs/ufshcd.c | 38 +++++++++++++-------------------- drivers/scsi/ufs/ufshcd.h | 8 ++++++- 3 files changed, 24 insertions(+), 26 deletions(-) diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c index 73e4a4f9a3a2..74baba93f51e 100644 --- a/drivers/scsi/ufs/ufs-mediatek.c +++ b/drivers/scsi/ufs/ufs-mediatek.c @@ -79,9 +79,9 @@ static int ufs_mtk_hce_enable_notify(struct ufs_hba *hba, if (status == PRE_CHANGE) { if (host->unipro_lpm) - hba->hba_enable_delay_us = 0; + hba->vps->hba_enable_delay_us = 0; else - hba->hba_enable_delay_us = 600; + hba->vps->hba_enable_delay_us = 600; } return 0; diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 426073a518ef..cdacbe6378a1 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -1353,23 +1353,6 @@ static int ufshcd_devfreq_get_dev_status(struct device *dev, return 0; } -static struct devfreq_dev_profile ufs_devfreq_profile = { - .polling_ms = 100, - .target = ufshcd_devfreq_target, - .get_dev_status = ufshcd_devfreq_get_dev_status, -}; - -#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND) -static struct devfreq_simple_ondemand_data ufs_ondemand_data = { - .upthreshold = 70, - .downdifferential = 5, -}; - -static void *gov_data = &ufs_ondemand_data; -#else -static void *gov_data; /* NULL */ -#endif - static int ufshcd_devfreq_init(struct ufs_hba *hba) { struct list_head *clk_list = &hba->clk_list_head; @@ -1385,12 +1368,12 @@ static int ufshcd_devfreq_init(struct ufs_hba *hba) dev_pm_opp_add(hba->dev, clki->min_freq, 0); dev_pm_opp_add(hba->dev, clki->max_freq, 0); - ufshcd_vops_config_scaling_param(hba, &ufs_devfreq_profile, - gov_data); + ufshcd_vops_config_scaling_param(hba, &hba->vps->devfreq_profile, + &hba->vps->ondemand_data); devfreq = devfreq_add_device(hba->dev, - &ufs_devfreq_profile, + &hba->vps->devfreq_profile, DEVFREQ_GOV_SIMPLE_ONDEMAND, - gov_data); + &hba->vps->ondemand_data); if (IS_ERR(devfreq)) { ret = PTR_ERR(devfreq); dev_err(hba->dev, "Unable to register with devfreq %d\n", ret); @@ -4314,7 +4297,7 @@ int ufshcd_hba_enable(struct ufs_hba *hba) * instruction might be read back. * This delay can be changed based on the controller. */ - ufshcd_delay_us(hba->hba_enable_delay_us, 100); + ufshcd_delay_us(hba->vps->hba_enable_delay_us, 100); /* wait for the host controller to complete initialization */ retry = 50; @@ -7477,6 +7460,15 @@ static const struct attribute_group *ufshcd_driver_groups[] = { NULL, }; +static struct ufs_hba_variant_params ufs_hba_vps = { + .hba_enable_delay_us = 1000, + .devfreq_profile.polling_ms = 100, + .devfreq_profile.target = ufshcd_devfreq_target, + .devfreq_profile.get_dev_status = ufshcd_devfreq_get_dev_status, + .ondemand_data.upthreshold = 70, + .ondemand_data.downdifferential = 5, +}; + static struct scsi_host_template ufshcd_driver_template = { .module = THIS_MODULE, .name = UFSHCD, @@ -8724,7 +8716,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) hba->mmio_base = mmio_base; hba->irq = irq; - hba->hba_enable_delay_us = 1000; + hba->vps = &ufs_hba_vps; err = ufshcd_hba_init(hba); if (err) diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 23a434c03c2a..f7bdf52ba8b0 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -566,6 +566,12 @@ enum ufshcd_caps { UFSHCD_CAP_WB_EN = 1 << 7, }; +struct ufs_hba_variant_params { + struct devfreq_dev_profile devfreq_profile; + struct devfreq_simple_ondemand_data ondemand_data; + u16 hba_enable_delay_us; +}; + /** * struct ufs_hba - per adapter private structure * @mmio_base: UFSHCI base register address @@ -663,6 +669,7 @@ struct ufs_hba { int nutmrs; u32 ufs_version; const struct ufs_hba_variant_ops *vops; + struct ufs_hba_variant_params *vps; void *priv; unsigned int irq; bool is_irq_enabled; @@ -684,7 +691,6 @@ struct ufs_hba { u32 eh_flags; u32 intr_mask; u16 ee_ctrl_mask; - u16 hba_enable_delay_us; bool is_powered; /* Work Queues */ From d14734ae3ae79d3b0286a2431782054e5066da8f Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Sat, 9 May 2020 17:37:15 +0800 Subject: [PATCH 326/562] scsi: ufs: Customize flush threshold for WriteBooster Allow flush threshold for WriteBooster to be customizable by vendors. To achieve this, make the value a variable in struct ufs_hba_variant_params. Also introduce UFS_WB_BUF_REMAIN_PERCENT() macro to provide a more flexible way to specify WriteBooster available buffer values. Link: https://lore.kernel.org/r/20200509093716.21010-4-stanley.chu@mediatek.com Reviewed-by: Asutosh Das Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs.h | 5 +---- drivers/scsi/ufs/ufshcd.c | 7 ++++--- drivers/scsi/ufs/ufshcd.h | 1 + 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h index b3135344ab3f..fadba3a3bbcd 100644 --- a/drivers/scsi/ufs/ufs.h +++ b/drivers/scsi/ufs/ufs.h @@ -478,10 +478,7 @@ enum ufs_dev_pwr_mode { UFS_POWERDOWN_PWR_MODE = 3, }; -enum ufs_dev_wb_buf_avail_size { - UFS_WB_10_PERCENT_BUF_REMAIN = 0x1, - UFS_WB_40_PERCENT_BUF_REMAIN = 0x4, -}; +#define UFS_WB_BUF_REMAIN_PERCENT(val) ((val) / 10) /** * struct utp_cmd_rsp - Response UPIU structure diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index cdacbe6378a1..aca50ed39844 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -5301,8 +5301,8 @@ static bool ufshcd_wb_presrv_usrspc_keep_vcc_on(struct ufs_hba *hba, cur_buf); return false; } - /* Let it continue to flush when >60% full */ - if (avail_buf < UFS_WB_40_PERCENT_BUF_REMAIN) + /* Let it continue to flush when available buffer exceeds threshold */ + if (avail_buf < hba->vps->wb_flush_threshold) return true; return false; @@ -5336,7 +5336,7 @@ static bool ufshcd_wb_keep_vcc_on(struct ufs_hba *hba) } if (!hba->dev_info.b_presrv_uspc_en) { - if (avail_buf <= UFS_WB_10_PERCENT_BUF_REMAIN) + if (avail_buf <= UFS_WB_BUF_REMAIN_PERCENT(10)) return true; return false; } @@ -7462,6 +7462,7 @@ static const struct attribute_group *ufshcd_driver_groups[] = { static struct ufs_hba_variant_params ufs_hba_vps = { .hba_enable_delay_us = 1000, + .wb_flush_threshold = UFS_WB_BUF_REMAIN_PERCENT(40), .devfreq_profile.polling_ms = 100, .devfreq_profile.target = ufshcd_devfreq_target, .devfreq_profile.get_dev_status = ufshcd_devfreq_get_dev_status, diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index f7bdf52ba8b0..e3dfb48e669e 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -570,6 +570,7 @@ struct ufs_hba_variant_params { struct devfreq_dev_profile devfreq_profile; struct devfreq_simple_ondemand_data ondemand_data; u16 hba_enable_delay_us; + u32 wb_flush_threshold; }; /** From f48b285ae658b50f4ba2d3a267522f572720de6d Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Sat, 9 May 2020 17:37:16 +0800 Subject: [PATCH 327/562] scsi: ufs-mediatek: Customize WriteBooster flush policy Change the WriteBooster policy to keep VCC on during runtime suspend if available WriteBooster buffer is less than 80%. Link: https://lore.kernel.org/r/20200509093716.21010-5-stanley.chu@mediatek.com Reviewed-by: Asutosh Das Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-mediatek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c index 74baba93f51e..d56ce8d97d4e 100644 --- a/drivers/scsi/ufs/ufs-mediatek.c +++ b/drivers/scsi/ufs/ufs-mediatek.c @@ -271,6 +271,7 @@ static int ufs_mtk_init(struct ufs_hba *hba) /* Enable WriteBooster */ hba->caps |= UFSHCD_CAP_WB_EN; + hba->vps->wb_flush_threshold = UFS_WB_BUF_REMAIN_PERCENT(80); /* * ufshcd_vops_init() is invoked after From 582838ea5bea41258d136c82a6c493a95cda652c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 May 2020 12:18:28 +0200 Subject: [PATCH 328/562] gpiolib: Document that GPIO line names are not globally unique gpiochip_set_desc_names() no longer rejects GPIO line name collisions. Hence GPIO line names are not guaranteed to be globally unique. In case of multiple GPIO lines with the same name, gpio_name_to_desc() will return the first match found. Update the comments for gpio_name_to_desc() and gpiochip_set_desc_names() to match reality. Fixes: f881bab038c9667d ("gpio: keep the GPIO line names internal") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200511101828.30046-1-geert+renesas@glider.be Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 48b4f70e2898..fe5f14a417bd 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -296,6 +296,9 @@ static int gpiodev_add_to_list(struct gpio_device *gdev) /* * Convert a GPIO name to its descriptor + * Note that there is no guarantee that GPIO names are globally unique! + * Hence this function will return, if it exists, a reference to the first GPIO + * line found that matches the given name. */ static struct gpio_desc *gpio_name_to_desc(const char * const name) { @@ -329,10 +332,12 @@ static struct gpio_desc *gpio_name_to_desc(const char * const name) } /* - * Takes the names from gc->names and checks if they are all unique. If they - * are, they are assigned to their gpio descriptors. + * Take the names from gc->names and assign them to their GPIO descriptors. + * Warn if a name is already used for a GPIO line on a different GPIO chip. * - * Warning if one of the names is already used for a different GPIO. + * Note that: + * 1. Non-unique names are still accepted, + * 2. Name collisions within the same GPIO chip are not reported. */ static int gpiochip_set_desc_names(struct gpio_chip *gc) { From 9ddacff18b159fd9852734f611a6db6100432635 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:07 +0200 Subject: [PATCH 329/562] sysfs: export sysfs_remove_file_self() Function is going to be used in transport over RDMA module in subsequent patches, so export it to GPL modules. Link: https://lore.kernel.org/r/20200511135131.27580-2-danil.kipnis@cloud.ionos.com Signed-off-by: Roman Pen Acked-by: Tejun Heo Cc: linux-kernel@vger.kernel.org [jwang: extend the commit message] Signed-off-by: Jack Wang Reviewed-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- fs/sysfs/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 26bbf960e2a2..d81f9f974a35 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -492,6 +492,7 @@ bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr) kernfs_put(kn); return ret; } +EXPORT_SYMBOL_GPL(sysfs_remove_file_self); void sysfs_remove_files(struct kobject *kobj, const struct attribute * const *ptr) { From b5c27cdb094ed9cce562e98c931f53669b6409f7 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:08 +0200 Subject: [PATCH 330/562] RDMA/rtrs: public interface header to establish RDMA connections Introduce public header which provides set of API functions to establish RDMA connections from client to server machine using RTRS protocol, which manages RDMA connections for each session, does multipathing and load balancing. Main functions for client (active) side: rtrs_clt_open() - Creates set of RDMA connections incapsulated in IBTRS session and returns pointer on RTRS session object. rtrs_clt_close() - Closes RDMA connections associated with RTRS session. rtrs_clt_request() - Requests zero-copy RDMA transfer to/from server. Main functions for server (passive) side: rtrs_srv_open() - Starts listening for RTRS clients on specified port and invokes RTRS callbacks for incoming RDMA requests or link events. rtrs_srv_close() - Closes RTRS server context. Link: https://lore.kernel.org/r/20200511135131.27580-3-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs.h | 195 +++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 drivers/infiniband/ulp/rtrs/rtrs.h diff --git a/drivers/infiniband/ulp/rtrs/rtrs.h b/drivers/infiniband/ulp/rtrs/rtrs.h new file mode 100644 index 000000000000..9879d40467b6 --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * RDMA Transport Layer + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ +#ifndef RTRS_H +#define RTRS_H + +#include +#include + +struct rtrs_permit; +struct rtrs_clt; +struct rtrs_srv_ctx; +struct rtrs_srv; +struct rtrs_srv_op; + +/* + * RDMA transport (RTRS) client API + */ + +/** + * enum rtrs_clt_link_ev - Events about connectivity state of a client + * @RTRS_CLT_LINK_EV_RECONNECTED Client was reconnected. + * @RTRS_CLT_LINK_EV_DISCONNECTED Client was disconnected. + */ +enum rtrs_clt_link_ev { + RTRS_CLT_LINK_EV_RECONNECTED, + RTRS_CLT_LINK_EV_DISCONNECTED, +}; + +/** + * Source and destination address of a path to be established + */ +struct rtrs_addr { + struct sockaddr_storage *src; + struct sockaddr_storage *dst; +}; + +/** + * rtrs_clt_ops - it holds the link event callback and private pointer. + * @priv: User supplied private data. + * @link_ev: Event notification callback function for connection state changes + * @priv: User supplied data that was passed to rtrs_clt_open() + * @ev: Occurred event + */ +struct rtrs_clt_ops { + void *priv; + void (*link_ev)(void *priv, enum rtrs_clt_link_ev ev); +}; + +struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, + const char *sessname, + const struct rtrs_addr *paths, + size_t path_cnt, u16 port, + size_t pdu_sz, u8 reconnect_delay_sec, + u16 max_segments, + s16 max_reconnect_attempts); + +void rtrs_clt_close(struct rtrs_clt *sess); + +/** + * rtrs_permit_to_pdu() - converts rtrs_permit to opaque pdu pointer + * @permit: RTRS permit pointer, it associates the memory allocation for future + * RDMA operation. + */ +void *rtrs_permit_to_pdu(struct rtrs_permit *permit); + +enum { + RTRS_PERMIT_NOWAIT = 0, + RTRS_PERMIT_WAIT = 1, +}; + +/** + * enum rtrs_clt_con_type() type of ib connection to use with a given + * rtrs_permit + * @ADMIN_CON - use connection reserved for "service" messages + * @IO_CON - use a connection reserved for IO + */ +enum rtrs_clt_con_type { + RTRS_ADMIN_CON, + RTRS_IO_CON +}; + +struct rtrs_permit *rtrs_clt_get_permit(struct rtrs_clt *sess, + enum rtrs_clt_con_type con_type, + int wait); + +void rtrs_clt_put_permit(struct rtrs_clt *sess, struct rtrs_permit *permit); + +/** + * rtrs_clt_req_ops - it holds the request confirmation callback + * and a private pointer. + * @priv: User supplied private data. + * @conf_fn: callback function to be called as confirmation + * @priv: User provided data, passed back with corresponding + * @(conf) confirmation. + * @errno: error number. + */ +struct rtrs_clt_req_ops { + void *priv; + void (*conf_fn)(void *priv, int errno); +}; + +int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops, + struct rtrs_clt *sess, struct rtrs_permit *permit, + const struct kvec *vec, size_t nr, size_t len, + struct scatterlist *sg, unsigned int sg_cnt); + +/** + * rtrs_attrs - RTRS session attributes + */ +struct rtrs_attrs { + u32 queue_depth; + u32 max_io_size; + u8 sessname[NAME_MAX]; + struct kobject *sess_kobj; +}; + +int rtrs_clt_query(struct rtrs_clt *sess, struct rtrs_attrs *attr); + +/* + * Here goes RTRS server API + */ + +/** + * enum rtrs_srv_link_ev - Server link events + * @RTRS_SRV_LINK_EV_CONNECTED: Connection from client established + * @RTRS_SRV_LINK_EV_DISCONNECTED: Connection was disconnected, all + * connection RTRS resources were freed. + */ +enum rtrs_srv_link_ev { + RTRS_SRV_LINK_EV_CONNECTED, + RTRS_SRV_LINK_EV_DISCONNECTED, +}; + +struct rtrs_srv_ops { + /** + * rdma_ev(): Event notification for RDMA operations + * If the callback returns a value != 0, an error + * message for the data transfer will be sent to + * the client. + + * @sess: Session + * @priv: Private data set by rtrs_srv_set_sess_priv() + * @id: internal RTRS operation id + * @dir: READ/WRITE + * @data: Pointer to (bidirectional) rdma memory area: + * - in case of %RTRS_SRV_RDMA_EV_RECV contains + * data sent by the client + * - in case of %RTRS_SRV_RDMA_EV_WRITE_REQ points + * to the memory area where the response is to be + * written to + * @datalen: Size of the memory area in @data + * @usr: The extra user message sent by the client (%vec) + * @usrlen: Size of the user message + */ + int (*rdma_ev)(struct rtrs_srv *sess, void *priv, + struct rtrs_srv_op *id, int dir, + void *data, size_t datalen, const void *usr, + size_t usrlen); + /** + * link_ev(): Events about connectivity state changes + * If the callback returns != 0 and the event + * %RTRS_SRV_LINK_EV_CONNECTED the corresponding + * session will be destroyed. + * @sess: Session + * @ev: event + * @priv: Private data from user if previously set with + * rtrs_srv_set_sess_priv() + */ + int (*link_ev)(struct rtrs_srv *sess, enum rtrs_srv_link_ev ev, + void *priv); +}; + +struct rtrs_srv_ctx *rtrs_srv_open(struct rtrs_srv_ops *ops, u16 port); + +void rtrs_srv_close(struct rtrs_srv_ctx *ctx); + +bool rtrs_srv_resp_rdma(struct rtrs_srv_op *id, int errno); + +void rtrs_srv_set_sess_priv(struct rtrs_srv *sess, void *priv); + +int rtrs_srv_get_sess_name(struct rtrs_srv *sess, char *sessname, size_t len); + +int rtrs_srv_get_queue_depth(struct rtrs_srv *sess); + +int rtrs_addr_to_sockaddr(const char *str, size_t len, u16 port, + struct rtrs_addr *addr); + +int sockaddr_to_str(const struct sockaddr *addr, char *buf, size_t len); +#endif From 91fddedd439c2463762275693f784abc9d9613e2 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:09 +0200 Subject: [PATCH 331/562] RDMA/rtrs: private headers with rtrs protocol structs and helpers These are common private headers with rtrs protocol structures, logging, sysfs and other helper functions, which are used on both client and server sides. Link: https://lore.kernel.org/r/20200511135131.27580-4-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-log.h | 28 ++ drivers/infiniband/ulp/rtrs/rtrs-pri.h | 399 +++++++++++++++++++++++++ 2 files changed, 427 insertions(+) create mode 100644 drivers/infiniband/ulp/rtrs/rtrs-log.h create mode 100644 drivers/infiniband/ulp/rtrs/rtrs-pri.h diff --git a/drivers/infiniband/ulp/rtrs/rtrs-log.h b/drivers/infiniband/ulp/rtrs/rtrs-log.h new file mode 100644 index 000000000000..53c785b992f2 --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs-log.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * RDMA Transport Layer + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ +#ifndef RTRS_LOG_H +#define RTRS_LOG_H + +#define rtrs_log(fn, obj, fmt, ...) \ + fn("<%s>: " fmt, obj->sessname, ##__VA_ARGS__) + +#define rtrs_err(obj, fmt, ...) \ + rtrs_log(pr_err, obj, fmt, ##__VA_ARGS__) +#define rtrs_err_rl(obj, fmt, ...) \ + rtrs_log(pr_err_ratelimited, obj, fmt, ##__VA_ARGS__) +#define rtrs_wrn(obj, fmt, ...) \ + rtrs_log(pr_warn, obj, fmt, ##__VA_ARGS__) +#define rtrs_wrn_rl(obj, fmt, ...) \ + rtrs_log(pr_warn_ratelimited, obj, fmt, ##__VA_ARGS__) +#define rtrs_info(obj, fmt, ...) \ + rtrs_log(pr_info, obj, fmt, ##__VA_ARGS__) +#define rtrs_info_rl(obj, fmt, ...) \ + rtrs_log(pr_info_ratelimited, obj, fmt, ##__VA_ARGS__) + +#endif /* RTRS_LOG_H */ diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h new file mode 100644 index 000000000000..0a93c87ef92b --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -0,0 +1,399 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * RDMA Transport Layer + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ + +#ifndef RTRS_PRI_H +#define RTRS_PRI_H + +#include +#include +#include +#include + +#include "rtrs.h" + +#define RTRS_PROTO_VER_MAJOR 2 +#define RTRS_PROTO_VER_MINOR 0 + +#define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \ + __stringify(RTRS_PROTO_VER_MINOR) + +enum rtrs_imm_const { + MAX_IMM_TYPE_BITS = 4, + MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1), + MAX_IMM_PAYL_BITS = 28, + MAX_IMM_PAYL_MASK = ((1 << MAX_IMM_PAYL_BITS) - 1), +}; + +enum rtrs_imm_type { + RTRS_IO_REQ_IMM = 0, /* client to server */ + RTRS_IO_RSP_IMM = 1, /* server to client */ + RTRS_IO_RSP_W_INV_IMM = 2, /* server to client */ + + RTRS_HB_MSG_IMM = 8, /* HB: HeartBeat */ + RTRS_HB_ACK_IMM = 9, + + RTRS_LAST_IMM, +}; + +enum { + SERVICE_CON_QUEUE_DEPTH = 512, + + MAX_PATHS_NUM = 128, + + /* + * With the size of struct rtrs_permit allocated on the client, 4K + * is the maximum number of rtrs_permits we can allocate. This number is + * also used on the client to allocate the IU for the user connection + * to receive the RDMA addresses from the server. + */ + MAX_SESS_QUEUE_DEPTH = 4096, + + RTRS_HB_INTERVAL_MS = 5000, + RTRS_HB_MISSED_MAX = 5, + + RTRS_MAGIC = 0x1BBD, + RTRS_PROTO_VER = (RTRS_PROTO_VER_MAJOR << 8) | RTRS_PROTO_VER_MINOR, +}; + +struct rtrs_ib_dev; + +struct rtrs_rdma_dev_pd_ops { + struct rtrs_ib_dev *(*alloc)(void); + void (*free)(struct rtrs_ib_dev *dev); + int (*init)(struct rtrs_ib_dev *dev); + void (*deinit)(struct rtrs_ib_dev *dev); +}; + +struct rtrs_rdma_dev_pd { + struct mutex mutex; + struct list_head list; + enum ib_pd_flags pd_flags; + const struct rtrs_rdma_dev_pd_ops *ops; +}; + +struct rtrs_ib_dev { + struct ib_device *ib_dev; + struct ib_pd *ib_pd; + struct kref ref; + struct list_head entry; + struct rtrs_rdma_dev_pd *pool; +}; + +struct rtrs_con { + struct rtrs_sess *sess; + struct ib_qp *qp; + struct ib_cq *cq; + struct rdma_cm_id *cm_id; + unsigned int cid; +}; + +struct rtrs_sess { + struct list_head entry; + struct sockaddr_storage dst_addr; + struct sockaddr_storage src_addr; + char sessname[NAME_MAX]; + uuid_t uuid; + struct rtrs_con **con; + unsigned int con_num; + unsigned int recon_cnt; + struct rtrs_ib_dev *dev; + int dev_ref; + struct ib_cqe *hb_cqe; + void (*hb_err_handler)(struct rtrs_con *con); + struct workqueue_struct *hb_wq; + struct delayed_work hb_dwork; + unsigned int hb_interval_ms; + unsigned int hb_missed_cnt; + unsigned int hb_missed_max; +}; + +/* rtrs information unit */ +struct rtrs_iu { + struct list_head list; + struct ib_cqe cqe; + dma_addr_t dma_addr; + void *buf; + size_t size; + enum dma_data_direction direction; +}; + +/** + * enum rtrs_msg_types - RTRS message types, see also rtrs/README + * @RTRS_MSG_INFO_REQ: Client additional info request to the server + * @RTRS_MSG_INFO_RSP: Server additional info response to the client + * @RTRS_MSG_WRITE: Client writes data per RDMA to server + * @RTRS_MSG_READ: Client requests data transfer from server + * @RTRS_MSG_RKEY_RSP: Server refreshed rkey for rbuf + */ +enum rtrs_msg_types { + RTRS_MSG_INFO_REQ, + RTRS_MSG_INFO_RSP, + RTRS_MSG_WRITE, + RTRS_MSG_READ, + RTRS_MSG_RKEY_RSP, +}; + +/** + * enum rtrs_msg_flags - RTRS message flags. + * @RTRS_NEED_INVAL: Send invalidation in response. + * @RTRS_MSG_NEW_RKEY_F: Send refreshed rkey in response. + */ +enum rtrs_msg_flags { + RTRS_MSG_NEED_INVAL_F = 1 << 0, + RTRS_MSG_NEW_RKEY_F = 1 << 1, +}; + +/** + * struct rtrs_sg_desc - RDMA-Buffer entry description + * @addr: Address of RDMA destination buffer + * @key: Authorization rkey to write to the buffer + * @len: Size of the buffer + */ +struct rtrs_sg_desc { + __le64 addr; + __le32 key; + __le32 len; +}; + +/** + * struct rtrs_msg_conn_req - Client connection request to the server + * @magic: RTRS magic + * @version: RTRS protocol version + * @cid: Current connection id + * @cid_num: Number of connections per session + * @recon_cnt: Reconnections counter + * @sess_uuid: UUID of a session (path) + * @paths_uuid: UUID of a group of sessions (paths) + * + * NOTE: max size 56 bytes, see man rdma_connect(). + */ +struct rtrs_msg_conn_req { + /* Is set to 0 by cma.c in case of AF_IB, do not touch that. + * see https://www.spinics.net/lists/linux-rdma/msg22397.html + */ + u8 __cma_version; + /* On sender side that should be set to 0, or cma_save_ip_info() + * extract garbage and will fail. + */ + u8 __ip_version; + __le16 magic; + __le16 version; + __le16 cid; + __le16 cid_num; + __le16 recon_cnt; + uuid_t sess_uuid; + uuid_t paths_uuid; + u8 reserved[12]; +}; + +/** + * struct rtrs_msg_conn_rsp - Server connection response to the client + * @magic: RTRS magic + * @version: RTRS protocol version + * @errno: If rdma_accept() then 0, if rdma_reject() indicates error + * @queue_depth: max inflight messages (queue-depth) in this session + * @max_io_size: max io size server supports + * @max_hdr_size: max msg header size server supports + * + * NOTE: size is 56 bytes, max possible is 136 bytes, see man rdma_accept(). + */ +struct rtrs_msg_conn_rsp { + __le16 magic; + __le16 version; + __le16 errno; + __le16 queue_depth; + __le32 max_io_size; + __le32 max_hdr_size; + __le32 flags; + u8 reserved[36]; +}; + +/** + * struct rtrs_msg_info_req + * @type: @RTRS_MSG_INFO_REQ + * @sessname: Session name chosen by client + */ +struct rtrs_msg_info_req { + __le16 type; + u8 sessname[NAME_MAX]; + u8 reserved[15]; +}; + +/** + * struct rtrs_msg_info_rsp + * @type: @RTRS_MSG_INFO_RSP + * @sg_cnt: Number of @desc entries + * @desc: RDMA buffers where the client can write to server + */ +struct rtrs_msg_info_rsp { + __le16 type; + __le16 sg_cnt; + u8 reserved[4]; + struct rtrs_sg_desc desc[]; +}; + +/** + * struct rtrs_msg_rkey_rsp + * @type: @RTRS_MSG_RKEY_RSP + * @buf_id: RDMA buf_id of the new rkey + * @rkey: new remote key for RDMA buffers id from server + */ +struct rtrs_msg_rkey_rsp { + __le16 type; + __le16 buf_id; + __le32 rkey; +}; + +/** + * struct rtrs_msg_rdma_read - RDMA data transfer request from client + * @type: always @RTRS_MSG_READ + * @usr_len: length of user payload + * @sg_cnt: number of @desc entries + * @desc: RDMA buffers where the server can write the result to + */ +struct rtrs_msg_rdma_read { + __le16 type; + __le16 usr_len; + __le16 flags; + __le16 sg_cnt; + struct rtrs_sg_desc desc[]; +}; + +/** + * struct_msg_rdma_write - Message transferred to server with RDMA-Write + * @type: always @RTRS_MSG_WRITE + * @usr_len: length of user payload + */ +struct rtrs_msg_rdma_write { + __le16 type; + __le16 usr_len; +}; + +/** + * struct_msg_rdma_hdr - header for read or write request + * @type: @RTRS_MSG_WRITE | @RTRS_MSG_READ + */ +struct rtrs_msg_rdma_hdr { + __le16 type; +}; + +/* rtrs.c */ + +struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t t, + struct ib_device *dev, enum dma_data_direction, + void (*done)(struct ib_cq *cq, struct ib_wc *wc)); +void rtrs_iu_free(struct rtrs_iu *iu, enum dma_data_direction dir, + struct ib_device *dev, u32 queue_size); +int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu); +int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, + struct ib_send_wr *head); +int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu, + struct ib_sge *sge, unsigned int num_sge, + u32 rkey, u64 rdma_addr, u32 imm_data, + enum ib_send_flags flags, + struct ib_send_wr *head); + +int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe); +int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe, + u32 imm_data, enum ib_send_flags flags, + struct ib_send_wr *head); + +int rtrs_cq_qp_create(struct rtrs_sess *rtrs_sess, struct rtrs_con *con, + u32 max_send_sge, int cq_vector, u16 cq_size, + u16 wr_queue_size, enum ib_poll_context poll_ctx); +void rtrs_cq_qp_destroy(struct rtrs_con *con); + +void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe, + unsigned int interval_ms, unsigned int missed_max, + void (*err_handler)(struct rtrs_con *con), + struct workqueue_struct *wq); +void rtrs_start_hb(struct rtrs_sess *sess); +void rtrs_stop_hb(struct rtrs_sess *sess); +void rtrs_send_hb_ack(struct rtrs_sess *sess); + +void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags, + struct rtrs_rdma_dev_pd *pool); +void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd *pool); + +struct rtrs_ib_dev *rtrs_ib_dev_find_or_add(struct ib_device *ib_dev, + struct rtrs_rdma_dev_pd *pool); +int rtrs_ib_dev_put(struct rtrs_ib_dev *dev); + +static inline u32 rtrs_to_imm(u32 type, u32 payload) +{ + BUILD_BUG_ON(MAX_IMM_PAYL_BITS + MAX_IMM_TYPE_BITS != 32); + BUILD_BUG_ON(RTRS_LAST_IMM > (1<> MAX_IMM_PAYL_BITS; +} + +static inline u32 rtrs_to_io_req_imm(u32 addr) +{ + return rtrs_to_imm(RTRS_IO_REQ_IMM, addr); +} + +static inline u32 rtrs_to_io_rsp_imm(u32 msg_id, int errno, bool w_inval) +{ + enum rtrs_imm_type type; + u32 payload; + + /* 9 bits for errno, 19 bits for msg_id */ + payload = (abs(errno) & 0x1ff) << 19 | (msg_id & 0x7ffff); + type = w_inval ? RTRS_IO_RSP_W_INV_IMM : RTRS_IO_RSP_IMM; + + return rtrs_to_imm(type, payload); +} + +static inline void rtrs_from_io_rsp_imm(u32 payload, u32 *msg_id, int *errno) +{ + /* 9 bits for errno, 19 bits for msg_id */ + *msg_id = payload & 0x7ffff; + *errno = -(int)((payload >> 19) & 0x1ff); +} + +#define STAT_STORE_FUNC(type, set_value, reset) \ +static ssize_t set_value##_store(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + const char *buf, size_t count) \ +{ \ + int ret = -EINVAL; \ + type *stats = container_of(kobj, type, kobj_stats); \ + \ + if (sysfs_streq(buf, "1")) \ + ret = reset(stats, true); \ + else if (sysfs_streq(buf, "0")) \ + ret = reset(stats, false); \ + if (ret) \ + return ret; \ + \ + return count; \ +} + +#define STAT_SHOW_FUNC(type, get_value, print) \ +static ssize_t get_value##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *page) \ +{ \ + type *stats = container_of(kobj, type, kobj_stats); \ + \ + return print(stats, page, PAGE_SIZE); \ +} + +#define STAT_ATTR(type, stat, print, reset) \ +STAT_STORE_FUNC(type, stat, reset) \ +STAT_SHOW_FUNC(type, stat, print) \ +static struct kobj_attribute stat##_attr = __ATTR_RW(stat) + +#endif /* RTRS_PRI_H */ From c0894b3ea69d35995bd220020b20570c45a1e6b4 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:10 +0200 Subject: [PATCH 332/562] RDMA/rtrs: core: lib functions shared between client and server modules This is a set of library functions existing as a rtrs-core module, used by client and server modules. Mainly these functions wrap IB and RDMA calls and provide a bit higher abstraction for implementing of RTRS protocol on client or server sides. Link: https://lore.kernel.org/r/20200511135131.27580-5-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs.c | 612 +++++++++++++++++++++++++++++ 1 file changed, 612 insertions(+) create mode 100644 drivers/infiniband/ulp/rtrs/rtrs.c diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c new file mode 100644 index 000000000000..ff1093d6e4bc --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -0,0 +1,612 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RDMA Transport Layer + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt + +#include +#include + +#include "rtrs-pri.h" +#include "rtrs-log.h" + +MODULE_DESCRIPTION("RDMA Transport Core"); +MODULE_LICENSE("GPL"); + +struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t gfp_mask, + struct ib_device *dma_dev, + enum dma_data_direction dir, + void (*done)(struct ib_cq *cq, struct ib_wc *wc)) +{ + struct rtrs_iu *ius, *iu; + int i; + + ius = kcalloc(queue_size, sizeof(*ius), gfp_mask); + if (!ius) + return NULL; + for (i = 0; i < queue_size; i++) { + iu = &ius[i]; + iu->buf = kzalloc(size, gfp_mask); + if (!iu->buf) + goto err; + + iu->dma_addr = ib_dma_map_single(dma_dev, iu->buf, size, dir); + if (ib_dma_mapping_error(dma_dev, iu->dma_addr)) + goto err; + + iu->cqe.done = done; + iu->size = size; + iu->direction = dir; + } + return ius; +err: + rtrs_iu_free(ius, dir, dma_dev, i); + return NULL; +} +EXPORT_SYMBOL_GPL(rtrs_iu_alloc); + +void rtrs_iu_free(struct rtrs_iu *ius, enum dma_data_direction dir, + struct ib_device *ibdev, u32 queue_size) +{ + struct rtrs_iu *iu; + int i; + + if (!ius) + return; + + for (i = 0; i < queue_size; i++) { + iu = &ius[i]; + ib_dma_unmap_single(ibdev, iu->dma_addr, iu->size, dir); + kfree(iu->buf); + } + kfree(ius); +} +EXPORT_SYMBOL_GPL(rtrs_iu_free); + +int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu) +{ + struct rtrs_sess *sess = con->sess; + struct ib_recv_wr wr; + struct ib_sge list; + + list.addr = iu->dma_addr; + list.length = iu->size; + list.lkey = sess->dev->ib_pd->local_dma_lkey; + + if (list.length == 0) { + rtrs_wrn(con->sess, + "Posting receive work request failed, sg list is empty\n"); + return -EINVAL; + } + wr = (struct ib_recv_wr) { + .wr_cqe = &iu->cqe, + .sg_list = &list, + .num_sge = 1, + }; + + return ib_post_recv(con->qp, &wr, NULL); +} +EXPORT_SYMBOL_GPL(rtrs_iu_post_recv); + +int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe) +{ + struct ib_recv_wr wr; + + wr = (struct ib_recv_wr) { + .wr_cqe = cqe, + }; + + return ib_post_recv(con->qp, &wr, NULL); +} +EXPORT_SYMBOL_GPL(rtrs_post_recv_empty); + +int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, + struct ib_send_wr *head) +{ + struct rtrs_sess *sess = con->sess; + struct ib_send_wr wr; + struct ib_sge list; + + if (WARN_ON(size == 0)) + return -EINVAL; + + list.addr = iu->dma_addr; + list.length = size; + list.lkey = sess->dev->ib_pd->local_dma_lkey; + + wr = (struct ib_send_wr) { + .wr_cqe = &iu->cqe, + .sg_list = &list, + .num_sge = 1, + .opcode = IB_WR_SEND, + .send_flags = IB_SEND_SIGNALED, + }; + + if (head) { + struct ib_send_wr *tail = head; + + while (tail->next) + tail = tail->next; + tail->next = ≀ + } else { + head = ≀ + } + + return ib_post_send(con->qp, head, NULL); +} +EXPORT_SYMBOL_GPL(rtrs_iu_post_send); + +int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu, + struct ib_sge *sge, unsigned int num_sge, + u32 rkey, u64 rdma_addr, u32 imm_data, + enum ib_send_flags flags, + struct ib_send_wr *head) +{ + struct ib_rdma_wr wr; + int i; + + wr = (struct ib_rdma_wr) { + .wr.wr_cqe = &iu->cqe, + .wr.sg_list = sge, + .wr.num_sge = num_sge, + .rkey = rkey, + .remote_addr = rdma_addr, + .wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM, + .wr.ex.imm_data = cpu_to_be32(imm_data), + .wr.send_flags = flags, + }; + + /* + * If one of the sges has 0 size, the operation will fail with a + * length error + */ + for (i = 0; i < num_sge; i++) + if (WARN_ON(sge[i].length == 0)) + return -EINVAL; + + if (head) { + struct ib_send_wr *tail = head; + + while (tail->next) + tail = tail->next; + tail->next = &wr.wr; + } else { + head = &wr.wr; + } + + return ib_post_send(con->qp, head, NULL); +} +EXPORT_SYMBOL_GPL(rtrs_iu_post_rdma_write_imm); + +int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe, + u32 imm_data, enum ib_send_flags flags, + struct ib_send_wr *head) +{ + struct ib_send_wr wr; + + wr = (struct ib_send_wr) { + .wr_cqe = cqe, + .send_flags = flags, + .opcode = IB_WR_RDMA_WRITE_WITH_IMM, + .ex.imm_data = cpu_to_be32(imm_data), + }; + + if (head) { + struct ib_send_wr *tail = head; + + while (tail->next) + tail = tail->next; + tail->next = ≀ + } else { + head = ≀ + } + + return ib_post_send(con->qp, head, NULL); +} +EXPORT_SYMBOL_GPL(rtrs_post_rdma_write_imm_empty); + +static void qp_event_handler(struct ib_event *ev, void *ctx) +{ + struct rtrs_con *con = ctx; + + switch (ev->event) { + case IB_EVENT_COMM_EST: + rtrs_info(con->sess, "QP event %s (%d) received\n", + ib_event_msg(ev->event), ev->event); + rdma_notify(con->cm_id, IB_EVENT_COMM_EST); + break; + default: + rtrs_info(con->sess, "Unhandled QP event %s (%d) received\n", + ib_event_msg(ev->event), ev->event); + break; + } +} + +static int create_cq(struct rtrs_con *con, int cq_vector, u16 cq_size, + enum ib_poll_context poll_ctx) +{ + struct rdma_cm_id *cm_id = con->cm_id; + struct ib_cq *cq; + + cq = ib_alloc_cq(cm_id->device, con, cq_size, + cq_vector, poll_ctx); + if (IS_ERR(cq)) { + rtrs_err(con->sess, "Creating completion queue failed, errno: %ld\n", + PTR_ERR(cq)); + return PTR_ERR(cq); + } + con->cq = cq; + + return 0; +} + +static int create_qp(struct rtrs_con *con, struct ib_pd *pd, + u16 wr_queue_size, u32 max_sge) +{ + struct ib_qp_init_attr init_attr = {NULL}; + struct rdma_cm_id *cm_id = con->cm_id; + int ret; + + init_attr.cap.max_send_wr = wr_queue_size; + init_attr.cap.max_recv_wr = wr_queue_size; + init_attr.cap.max_recv_sge = 1; + init_attr.event_handler = qp_event_handler; + init_attr.qp_context = con; + init_attr.cap.max_send_sge = max_sge; + + init_attr.qp_type = IB_QPT_RC; + init_attr.send_cq = con->cq; + init_attr.recv_cq = con->cq; + init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; + + ret = rdma_create_qp(cm_id, pd, &init_attr); + if (ret) { + rtrs_err(con->sess, "Creating QP failed, err: %d\n", ret); + return ret; + } + con->qp = cm_id->qp; + + return ret; +} + +int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, + u32 max_send_sge, int cq_vector, u16 cq_size, + u16 wr_queue_size, enum ib_poll_context poll_ctx) +{ + int err; + + err = create_cq(con, cq_vector, cq_size, poll_ctx); + if (err) + return err; + + err = create_qp(con, sess->dev->ib_pd, wr_queue_size, max_send_sge); + if (err) { + ib_free_cq(con->cq); + con->cq = NULL; + return err; + } + con->sess = sess; + + return 0; +} +EXPORT_SYMBOL_GPL(rtrs_cq_qp_create); + +void rtrs_cq_qp_destroy(struct rtrs_con *con) +{ + if (con->qp) { + rdma_destroy_qp(con->cm_id); + con->qp = NULL; + } + if (con->cq) { + ib_free_cq(con->cq); + con->cq = NULL; + } +} +EXPORT_SYMBOL_GPL(rtrs_cq_qp_destroy); + +static void schedule_hb(struct rtrs_sess *sess) +{ + queue_delayed_work(sess->hb_wq, &sess->hb_dwork, + msecs_to_jiffies(sess->hb_interval_ms)); +} + +void rtrs_send_hb_ack(struct rtrs_sess *sess) +{ + struct rtrs_con *usr_con = sess->con[0]; + u32 imm; + int err; + + imm = rtrs_to_imm(RTRS_HB_ACK_IMM, 0); + err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm, + IB_SEND_SIGNALED, NULL); + if (err) { + sess->hb_err_handler(usr_con); + return; + } +} +EXPORT_SYMBOL_GPL(rtrs_send_hb_ack); + +static void hb_work(struct work_struct *work) +{ + struct rtrs_con *usr_con; + struct rtrs_sess *sess; + u32 imm; + int err; + + sess = container_of(to_delayed_work(work), typeof(*sess), hb_dwork); + usr_con = sess->con[0]; + + if (sess->hb_missed_cnt > sess->hb_missed_max) { + sess->hb_err_handler(usr_con); + return; + } + if (sess->hb_missed_cnt++) { + /* Reschedule work without sending hb */ + schedule_hb(sess); + return; + } + imm = rtrs_to_imm(RTRS_HB_MSG_IMM, 0); + err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm, + IB_SEND_SIGNALED, NULL); + if (err) { + sess->hb_err_handler(usr_con); + return; + } + + schedule_hb(sess); +} + +void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe, + unsigned int interval_ms, unsigned int missed_max, + void (*err_handler)(struct rtrs_con *con), + struct workqueue_struct *wq) +{ + sess->hb_cqe = cqe; + sess->hb_interval_ms = interval_ms; + sess->hb_err_handler = err_handler; + sess->hb_wq = wq; + sess->hb_missed_max = missed_max; + sess->hb_missed_cnt = 0; + INIT_DELAYED_WORK(&sess->hb_dwork, hb_work); +} +EXPORT_SYMBOL_GPL(rtrs_init_hb); + +void rtrs_start_hb(struct rtrs_sess *sess) +{ + schedule_hb(sess); +} +EXPORT_SYMBOL_GPL(rtrs_start_hb); + +void rtrs_stop_hb(struct rtrs_sess *sess) +{ + cancel_delayed_work_sync(&sess->hb_dwork); + sess->hb_missed_cnt = 0; + sess->hb_missed_max = 0; +} +EXPORT_SYMBOL_GPL(rtrs_stop_hb); + +static int rtrs_str_gid_to_sockaddr(const char *addr, size_t len, + short port, struct sockaddr_storage *dst) +{ + struct sockaddr_ib *dst_ib = (struct sockaddr_ib *)dst; + int ret; + + /* + * We can use some of the IPv6 functions since GID is a valid + * IPv6 address format + */ + ret = in6_pton(addr, len, dst_ib->sib_addr.sib_raw, '\0', NULL); + if (ret == 0) + return -EINVAL; + + dst_ib->sib_family = AF_IB; + /* + * Use the same TCP server port number as the IB service ID + * on the IB port space range + */ + dst_ib->sib_sid = cpu_to_be64(RDMA_IB_IP_PS_IB | port); + dst_ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); + dst_ib->sib_pkey = cpu_to_be16(0xffff); + + return 0; +} + +/** + * rtrs_str_to_sockaddr() - Convert rtrs address string to sockaddr + * @addr: String representation of an addr (IPv4, IPv6 or IB GID): + * - "ip:192.168.1.1" + * - "ip:fe80::200:5aee:feaa:20a2" + * - "gid:fe80::200:5aee:feaa:20a2" + * @len: String address length + * @port: Destination port + * @dst: Destination sockaddr structure + * + * Returns 0 if conversion successful. Non-zero on error. + */ +static int rtrs_str_to_sockaddr(const char *addr, size_t len, + u16 port, struct sockaddr_storage *dst) +{ + if (strncmp(addr, "gid:", 4) == 0) { + return rtrs_str_gid_to_sockaddr(addr + 4, len - 4, port, dst); + } else if (strncmp(addr, "ip:", 3) == 0) { + char port_str[8]; + char *cpy; + int err; + + snprintf(port_str, sizeof(port_str), "%u", port); + cpy = kstrndup(addr + 3, len - 3, GFP_KERNEL); + err = cpy ? inet_pton_with_scope(&init_net, AF_UNSPEC, + cpy, port_str, dst) : -ENOMEM; + kfree(cpy); + + return err; + } + return -EPROTONOSUPPORT; +} + +/** + * sockaddr_to_str() - convert sockaddr to a string. + * @addr: the sockadddr structure to be converted. + * @buf: string containing socket addr. + * @len: string length. + * + * The return value is the number of characters written into buf not + * including the trailing '\0'. If len is == 0 the function returns 0.. + */ +int sockaddr_to_str(const struct sockaddr *addr, char *buf, size_t len) +{ + + switch (addr->sa_family) { + case AF_IB: + return scnprintf(buf, len, "gid:%pI6", + &((struct sockaddr_ib *)addr)->sib_addr.sib_raw); + case AF_INET: + return scnprintf(buf, len, "ip:%pI4", + &((struct sockaddr_in *)addr)->sin_addr); + case AF_INET6: + return scnprintf(buf, len, "ip:%pI6c", + &((struct sockaddr_in6 *)addr)->sin6_addr); + } + return scnprintf(buf, len, ""); +} +EXPORT_SYMBOL(sockaddr_to_str); + +/** + * rtrs_addr_to_sockaddr() - convert path string "src,dst" or "src@dst" + * to sockaddreses + * @str: string containing source and destination addr of a path + * separated by ',' or '@' I.e. "ip:1.1.1.1,ip:1.1.1.2" or + * "ip:1.1.1.1@ip:1.1.1.2". If str contains only one address it's + * considered to be destination. + * @len: string length + * @port: Destination port number. + * @addr: will be set to the source/destination address or to NULL + * if str doesn't contain any source address. + * + * Returns zero if conversion successful. Non-zero otherwise. + */ +int rtrs_addr_to_sockaddr(const char *str, size_t len, u16 port, + struct rtrs_addr *addr) +{ + const char *d; + + d = strchr(str, ','); + if (!d) + d = strchr(str, '@'); + if (d) { + if (rtrs_str_to_sockaddr(str, d - str, 0, addr->src)) + return -EINVAL; + d += 1; + len -= d - str; + str = d; + + } else { + addr->src = NULL; + } + return rtrs_str_to_sockaddr(str, len, port, addr->dst); +} +EXPORT_SYMBOL(rtrs_addr_to_sockaddr); + +void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags, + struct rtrs_rdma_dev_pd *pool) +{ + WARN_ON(pool->ops && (!pool->ops->alloc ^ !pool->ops->free)); + INIT_LIST_HEAD(&pool->list); + mutex_init(&pool->mutex); + pool->pd_flags = pd_flags; +} +EXPORT_SYMBOL(rtrs_rdma_dev_pd_init); + +void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd *pool) +{ + mutex_destroy(&pool->mutex); + WARN_ON(!list_empty(&pool->list)); +} +EXPORT_SYMBOL(rtrs_rdma_dev_pd_deinit); + +static void dev_free(struct kref *ref) +{ + struct rtrs_rdma_dev_pd *pool; + struct rtrs_ib_dev *dev; + + dev = container_of(ref, typeof(*dev), ref); + pool = dev->pool; + + mutex_lock(&pool->mutex); + list_del(&dev->entry); + mutex_unlock(&pool->mutex); + + if (pool->ops && pool->ops->deinit) + pool->ops->deinit(dev); + + ib_dealloc_pd(dev->ib_pd); + + if (pool->ops && pool->ops->free) + pool->ops->free(dev); + else + kfree(dev); +} + +int rtrs_ib_dev_put(struct rtrs_ib_dev *dev) +{ + return kref_put(&dev->ref, dev_free); +} +EXPORT_SYMBOL(rtrs_ib_dev_put); + +static int rtrs_ib_dev_get(struct rtrs_ib_dev *dev) +{ + return kref_get_unless_zero(&dev->ref); +} + +struct rtrs_ib_dev * +rtrs_ib_dev_find_or_add(struct ib_device *ib_dev, + struct rtrs_rdma_dev_pd *pool) +{ + struct rtrs_ib_dev *dev; + + mutex_lock(&pool->mutex); + list_for_each_entry(dev, &pool->list, entry) { + if (dev->ib_dev->node_guid == ib_dev->node_guid && + rtrs_ib_dev_get(dev)) + goto out_unlock; + } + mutex_unlock(&pool->mutex); + if (pool->ops && pool->ops->alloc) + dev = pool->ops->alloc(); + else + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (IS_ERR_OR_NULL(dev)) + goto out_err; + + kref_init(&dev->ref); + dev->pool = pool; + dev->ib_dev = ib_dev; + dev->ib_pd = ib_alloc_pd(ib_dev, pool->pd_flags); + if (IS_ERR(dev->ib_pd)) + goto out_free_dev; + + if (pool->ops && pool->ops->init && pool->ops->init(dev)) + goto out_free_pd; + + mutex_lock(&pool->mutex); + list_add(&dev->entry, &pool->list); +out_unlock: + mutex_unlock(&pool->mutex); + return dev; + +out_free_pd: + ib_dealloc_pd(dev->ib_pd); +out_free_dev: + if (pool->ops && pool->ops->free) + pool->ops->free(dev); + else + kfree(dev); +out_err: + return NULL; +} +EXPORT_SYMBOL(rtrs_ib_dev_find_or_add); From cb80329c9434c64493789e7ea5b1f2957021ce61 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:11 +0200 Subject: [PATCH 333/562] RDMA/rtrs: client: private header with client structs and functions This header describes main structs and functions used by rtrs-client module, mainly for managing rtrs sessions, creating/destroying sysfs entries, accounting statistics on client side. Link: https://lore.kernel.org/r/20200511135131.27580-6-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.h | 251 +++++++++++++++++++++++++ 1 file changed, 251 insertions(+) create mode 100644 drivers/infiniband/ulp/rtrs/rtrs-clt.h diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h new file mode 100644 index 000000000000..039a2ebba2f9 --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -0,0 +1,251 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * RDMA Transport Layer + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ + +#ifndef RTRS_CLT_H +#define RTRS_CLT_H + +#include +#include "rtrs-pri.h" + +/** + * enum rtrs_clt_state - Client states. + */ +enum rtrs_clt_state { + RTRS_CLT_CONNECTING, + RTRS_CLT_CONNECTING_ERR, + RTRS_CLT_RECONNECTING, + RTRS_CLT_CONNECTED, + RTRS_CLT_CLOSING, + RTRS_CLT_CLOSED, + RTRS_CLT_DEAD, +}; + +enum rtrs_mp_policy { + MP_POLICY_RR, + MP_POLICY_MIN_INFLIGHT, +}; + +/* see Documentation/ABI/testing/sysfs-class-rtrs-client for details */ +struct rtrs_clt_stats_reconnects { + int successful_cnt; + int fail_cnt; +}; + +/* see Documentation/ABI/testing/sysfs-class-rtrs-client for details */ +struct rtrs_clt_stats_cpu_migr { + atomic_t from; + int to; +}; + +/* stats for Read and write operation. + * see Documentation/ABI/testing/sysfs-class-rtrs-client for details + */ +struct rtrs_clt_stats_rdma { + struct { + u64 cnt; + u64 size_total; + } dir[2]; + + u64 failover_cnt; +}; + +struct rtrs_clt_stats_pcpu { + struct rtrs_clt_stats_cpu_migr cpu_migr; + struct rtrs_clt_stats_rdma rdma; +}; + +struct rtrs_clt_stats { + struct kobject kobj_stats; + struct rtrs_clt_stats_pcpu __percpu *pcpu_stats; + struct rtrs_clt_stats_reconnects reconnects; + atomic_t inflight; +}; + +struct rtrs_clt_con { + struct rtrs_con c; + struct rtrs_iu *rsp_ius; + u32 queue_size; + unsigned int cpu; + atomic_t io_cnt; + int cm_err; +}; + +/** + * rtrs_permit - permits the memory allocation for future RDMA operation. + * Combine with irq pinning to keep IO on same CPU. + */ +struct rtrs_permit { + enum rtrs_clt_con_type con_type; + unsigned int cpu_id; + unsigned int mem_id; + unsigned int mem_off; +}; + +/** + * rtrs_clt_io_req - describes one inflight IO request + */ +struct rtrs_clt_io_req { + struct list_head list; + struct rtrs_iu *iu; + struct scatterlist *sglist; /* list holding user data */ + unsigned int sg_cnt; + unsigned int sg_size; + unsigned int data_len; + unsigned int usr_len; + void *priv; + bool in_use; + struct rtrs_clt_con *con; + struct rtrs_sg_desc *desc; + struct ib_sge *sge; + struct rtrs_permit *permit; + enum dma_data_direction dir; + void (*conf)(void *priv, int errno); + unsigned long start_jiffies; + + struct ib_mr *mr; + struct ib_cqe inv_cqe; + struct completion inv_comp; + int inv_errno; + bool need_inv_comp; + bool need_inv; +}; + +struct rtrs_rbuf { + u64 addr; + u32 rkey; +}; + +struct rtrs_clt_sess { + struct rtrs_sess s; + struct rtrs_clt *clt; + wait_queue_head_t state_wq; + enum rtrs_clt_state state; + atomic_t connected_cnt; + struct mutex init_mutex; + struct rtrs_clt_io_req *reqs; + struct delayed_work reconnect_dwork; + struct work_struct close_work; + unsigned int reconnect_attempts; + bool established; + struct rtrs_rbuf *rbufs; + size_t max_io_size; + u32 max_hdr_size; + u32 chunk_size; + size_t queue_depth; + u32 max_pages_per_mr; + int max_send_sge; + u32 flags; + struct kobject kobj; + struct rtrs_clt_stats *stats; + /* cache hca_port and hca_name to display in sysfs */ + u8 hca_port; + char hca_name[IB_DEVICE_NAME_MAX]; + struct list_head __percpu + *mp_skip_entry; +}; + +struct rtrs_clt { + struct list_head paths_list; /* rcu protected list */ + size_t paths_num; + struct rtrs_clt_sess + __rcu * __percpu *pcpu_path; + uuid_t paths_uuid; + int paths_up; + struct mutex paths_mutex; + struct mutex paths_ev_mutex; + char sessname[NAME_MAX]; + u16 port; + unsigned int max_reconnect_attempts; + unsigned int reconnect_delay_sec; + unsigned int max_segments; + void *permits; + unsigned long *permits_map; + size_t queue_depth; + size_t max_io_size; + wait_queue_head_t permits_wait; + size_t pdu_sz; + void *priv; + void (*link_ev)(void *priv, + enum rtrs_clt_link_ev ev); + struct device dev; + struct kobject *kobj_paths; + enum rtrs_mp_policy mp_policy; +}; + +static inline struct rtrs_clt_con *to_clt_con(struct rtrs_con *c) +{ + return container_of(c, struct rtrs_clt_con, c); +} + +static inline struct rtrs_clt_sess *to_clt_sess(struct rtrs_sess *s) +{ + return container_of(s, struct rtrs_clt_sess, s); +} + +static inline int permit_size(struct rtrs_clt *clt) +{ + return sizeof(struct rtrs_permit) + clt->pdu_sz; +} + +static inline struct rtrs_permit *get_permit(struct rtrs_clt *clt, int idx) +{ + return (struct rtrs_permit *)(clt->permits + permit_size(clt) * idx); +} + +int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_sess *sess); +int rtrs_clt_disconnect_from_sysfs(struct rtrs_clt_sess *sess); +int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt, + struct rtrs_addr *addr); +int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_sess *sess, + const struct attribute *sysfs_self); + +void rtrs_clt_set_max_reconnect_attempts(struct rtrs_clt *clt, int value); +int rtrs_clt_get_max_reconnect_attempts(const struct rtrs_clt *clt); +void free_sess(struct rtrs_clt_sess *sess); + +/* rtrs-clt-stats.c */ + +int rtrs_clt_init_stats(struct rtrs_clt_stats *stats); + +void rtrs_clt_inc_failover_cnt(struct rtrs_clt_stats *s); + +void rtrs_clt_update_wc_stats(struct rtrs_clt_con *con); +void rtrs_clt_update_all_stats(struct rtrs_clt_io_req *req, int dir); + +int rtrs_clt_reset_rdma_lat_distr_stats(struct rtrs_clt_stats *stats, + bool enable); +ssize_t rtrs_clt_stats_rdma_lat_distr_to_str(struct rtrs_clt_stats *stats, + char *page, size_t len); +int rtrs_clt_reset_cpu_migr_stats(struct rtrs_clt_stats *stats, bool enable); +int rtrs_clt_stats_migration_cnt_to_str(struct rtrs_clt_stats *stats, char *buf, + size_t len); +int rtrs_clt_reset_reconnects_stat(struct rtrs_clt_stats *stats, bool enable); +int rtrs_clt_stats_reconnects_to_str(struct rtrs_clt_stats *stats, char *buf, + size_t len); +int rtrs_clt_reset_wc_comp_stats(struct rtrs_clt_stats *stats, bool enable); +int rtrs_clt_stats_wc_completion_to_str(struct rtrs_clt_stats *stats, char *buf, + size_t len); +int rtrs_clt_reset_rdma_stats(struct rtrs_clt_stats *stats, bool enable); +ssize_t rtrs_clt_stats_rdma_to_str(struct rtrs_clt_stats *stats, + char *page, size_t len); +int rtrs_clt_reset_all_stats(struct rtrs_clt_stats *stats, bool enable); +ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *stats, + char *page, size_t len); + +/* rtrs-clt-sysfs.c */ + +int rtrs_clt_create_sysfs_root_files(struct rtrs_clt *clt); +void rtrs_clt_destroy_sysfs_root_folders(struct rtrs_clt *clt); +void rtrs_clt_destroy_sysfs_root_files(struct rtrs_clt *clt); + +int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess); +void rtrs_clt_destroy_sess_files(struct rtrs_clt_sess *sess, + const struct attribute *sysfs_self); + +#endif /* RTRS_CLT_H */ From 6a98d71daea186247005099758af549e6afdd244 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:12 +0200 Subject: [PATCH 334/562] RDMA/rtrs: client: main functionality This is main functionality of rtrs-client module, which manages set of RDMA connections for each rtrs session, does multipathing, load balancing and failover of RDMA requests. Link: https://lore.kernel.org/r/20200511135131.27580-7-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 2994 ++++++++++++++++++++++++ 1 file changed, 2994 insertions(+) create mode 100644 drivers/infiniband/ulp/rtrs/rtrs-clt.c diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c new file mode 100644 index 000000000000..468fdd0d8713 --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -0,0 +1,2994 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RDMA Transport Layer + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ + +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt + +#include +#include +#include /* for BLK_MAX_SEGMENT_SIZE */ + +#include "rtrs-clt.h" +#include "rtrs-log.h" + +#define RTRS_CONNECT_TIMEOUT_MS 30000 +/* + * Wait a bit before trying to reconnect after a failure + * in order to give server time to finish clean up which + * leads to "false positives" failed reconnect attempts + */ +#define RTRS_RECONNECT_BACKOFF 1000 + +MODULE_DESCRIPTION("RDMA Transport Client"); +MODULE_LICENSE("GPL"); + +static const struct rtrs_rdma_dev_pd_ops dev_pd_ops; +static struct rtrs_rdma_dev_pd dev_pd = { + .ops = &dev_pd_ops +}; + +static struct workqueue_struct *rtrs_wq; +static struct class *rtrs_clt_dev_class; + +static inline bool rtrs_clt_is_connected(const struct rtrs_clt *clt) +{ + struct rtrs_clt_sess *sess; + bool connected = false; + + rcu_read_lock(); + list_for_each_entry_rcu(sess, &clt->paths_list, s.entry) + connected |= READ_ONCE(sess->state) == RTRS_CLT_CONNECTED; + rcu_read_unlock(); + + return connected; +} + +static struct rtrs_permit * +__rtrs_get_permit(struct rtrs_clt *clt, enum rtrs_clt_con_type con_type) +{ + size_t max_depth = clt->queue_depth; + struct rtrs_permit *permit; + int bit; + + /* + * Adapted from null_blk get_tag(). Callers from different cpus may + * grab the same bit, since find_first_zero_bit is not atomic. + * But then the test_and_set_bit_lock will fail for all the + * callers but one, so that they will loop again. + * This way an explicit spinlock is not required. + */ + do { + bit = find_first_zero_bit(clt->permits_map, max_depth); + if (unlikely(bit >= max_depth)) + return NULL; + } while (unlikely(test_and_set_bit_lock(bit, clt->permits_map))); + + permit = get_permit(clt, bit); + WARN_ON(permit->mem_id != bit); + permit->cpu_id = raw_smp_processor_id(); + permit->con_type = con_type; + + return permit; +} + +static inline void __rtrs_put_permit(struct rtrs_clt *clt, + struct rtrs_permit *permit) +{ + clear_bit_unlock(permit->mem_id, clt->permits_map); +} + +/** + * rtrs_clt_get_permit() - allocates permit for future RDMA operation + * @clt: Current session + * @con_type: Type of connection to use with the permit + * @can_wait: Wait type + * + * Description: + * Allocates permit for the following RDMA operation. Permit is used + * to preallocate all resources and to propagate memory pressure + * up earlier. + * + * Context: + * Can sleep if @wait == RTRS_TAG_WAIT + */ +struct rtrs_permit *rtrs_clt_get_permit(struct rtrs_clt *clt, + enum rtrs_clt_con_type con_type, + int can_wait) +{ + struct rtrs_permit *permit; + DEFINE_WAIT(wait); + + permit = __rtrs_get_permit(clt, con_type); + if (likely(permit) || !can_wait) + return permit; + + do { + prepare_to_wait(&clt->permits_wait, &wait, + TASK_UNINTERRUPTIBLE); + permit = __rtrs_get_permit(clt, con_type); + if (likely(permit)) + break; + + io_schedule(); + } while (1); + + finish_wait(&clt->permits_wait, &wait); + + return permit; +} +EXPORT_SYMBOL(rtrs_clt_get_permit); + +/** + * rtrs_clt_put_permit() - puts allocated permit + * @clt: Current session + * @permit: Permit to be freed + * + * Context: + * Does not matter + */ +void rtrs_clt_put_permit(struct rtrs_clt *clt, struct rtrs_permit *permit) +{ + if (WARN_ON(!test_bit(permit->mem_id, clt->permits_map))) + return; + + __rtrs_put_permit(clt, permit); + + /* + * rtrs_clt_get_permit() adds itself to the &clt->permits_wait list + * before calling schedule(). So if rtrs_clt_get_permit() is sleeping + * it must have added itself to &clt->permits_wait before + * __rtrs_put_permit() finished. + * Hence it is safe to guard wake_up() with a waitqueue_active() test. + */ + if (waitqueue_active(&clt->permits_wait)) + wake_up(&clt->permits_wait); +} +EXPORT_SYMBOL(rtrs_clt_put_permit); + +void *rtrs_permit_to_pdu(struct rtrs_permit *permit) +{ + return permit + 1; +} +EXPORT_SYMBOL(rtrs_permit_to_pdu); + +/** + * rtrs_permit_to_clt_con() - returns RDMA connection pointer by the permit + * @sess: client session pointer + * @permit: permit for the allocation of the RDMA buffer + * Note: + * IO connection starts from 1. + * 0 connection is for user messages. + */ +static +struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess, + struct rtrs_permit *permit) +{ + int id = 0; + + if (likely(permit->con_type == RTRS_IO_CON)) + id = (permit->cpu_id % (sess->s.con_num - 1)) + 1; + + return to_clt_con(sess->s.con[id]); +} + +/** + * __rtrs_clt_change_state() - change the session state through session state + * machine. + * + * @sess: client session to change the state of. + * @new_state: state to change to. + * + * returns true if successful, false if the requested state can not be set. + * + * Locks: + * state_wq lock must be hold. + */ +static bool __rtrs_clt_change_state(struct rtrs_clt_sess *sess, + enum rtrs_clt_state new_state) +{ + enum rtrs_clt_state old_state; + bool changed = false; + + lockdep_assert_held(&sess->state_wq.lock); + + old_state = sess->state; + switch (new_state) { + case RTRS_CLT_CONNECTING: + switch (old_state) { + case RTRS_CLT_RECONNECTING: + changed = true; + fallthrough; + default: + break; + } + break; + case RTRS_CLT_RECONNECTING: + switch (old_state) { + case RTRS_CLT_CONNECTED: + case RTRS_CLT_CONNECTING_ERR: + case RTRS_CLT_CLOSED: + changed = true; + fallthrough; + default: + break; + } + break; + case RTRS_CLT_CONNECTED: + switch (old_state) { + case RTRS_CLT_CONNECTING: + changed = true; + fallthrough; + default: + break; + } + break; + case RTRS_CLT_CONNECTING_ERR: + switch (old_state) { + case RTRS_CLT_CONNECTING: + changed = true; + fallthrough; + default: + break; + } + break; + case RTRS_CLT_CLOSING: + switch (old_state) { + case RTRS_CLT_CONNECTING: + case RTRS_CLT_CONNECTING_ERR: + case RTRS_CLT_RECONNECTING: + case RTRS_CLT_CONNECTED: + changed = true; + fallthrough; + default: + break; + } + break; + case RTRS_CLT_CLOSED: + switch (old_state) { + case RTRS_CLT_CLOSING: + changed = true; + fallthrough; + default: + break; + } + break; + case RTRS_CLT_DEAD: + switch (old_state) { + case RTRS_CLT_CLOSED: + changed = true; + fallthrough; + default: + break; + } + break; + default: + break; + } + if (changed) { + sess->state = new_state; + wake_up_locked(&sess->state_wq); + } + + return changed; +} + +static bool rtrs_clt_change_state_from_to(struct rtrs_clt_sess *sess, + enum rtrs_clt_state old_state, + enum rtrs_clt_state new_state) +{ + bool changed = false; + + spin_lock_irq(&sess->state_wq.lock); + if (sess->state == old_state) + changed = __rtrs_clt_change_state(sess, new_state); + spin_unlock_irq(&sess->state_wq.lock); + + return changed; +} + +static void rtrs_rdma_error_recovery(struct rtrs_clt_con *con) +{ + struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + + if (rtrs_clt_change_state_from_to(sess, + RTRS_CLT_CONNECTED, + RTRS_CLT_RECONNECTING)) { + struct rtrs_clt *clt = sess->clt; + unsigned int delay_ms; + + /* + * Normal scenario, reconnect if we were successfully connected + */ + delay_ms = clt->reconnect_delay_sec * 1000; + queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, + msecs_to_jiffies(delay_ms)); + } else { + /* + * Error can happen just on establishing new connection, + * so notify waiter with error state, waiter is responsible + * for cleaning the rest and reconnect if needed. + */ + rtrs_clt_change_state_from_to(sess, + RTRS_CLT_CONNECTING, + RTRS_CLT_CONNECTING_ERR); + } +} + +static void rtrs_clt_fast_reg_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct rtrs_clt_con *con = cq->cq_context; + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + rtrs_err(con->c.sess, "Failed IB_WR_REG_MR: %s\n", + ib_wc_status_msg(wc->status)); + rtrs_rdma_error_recovery(con); + } +} + +static struct ib_cqe fast_reg_cqe = { + .done = rtrs_clt_fast_reg_done +}; + +static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, + bool notify, bool can_wait); + +static void rtrs_clt_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct rtrs_clt_io_req *req = + container_of(wc->wr_cqe, typeof(*req), inv_cqe); + struct rtrs_clt_con *con = cq->cq_context; + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + rtrs_err(con->c.sess, "Failed IB_WR_LOCAL_INV: %s\n", + ib_wc_status_msg(wc->status)); + rtrs_rdma_error_recovery(con); + } + req->need_inv = false; + if (likely(req->need_inv_comp)) + complete(&req->inv_comp); + else + /* Complete request from INV callback */ + complete_rdma_req(req, req->inv_errno, true, false); +} + +static int rtrs_inv_rkey(struct rtrs_clt_io_req *req) +{ + struct rtrs_clt_con *con = req->con; + struct ib_send_wr wr = { + .opcode = IB_WR_LOCAL_INV, + .wr_cqe = &req->inv_cqe, + .send_flags = IB_SEND_SIGNALED, + .ex.invalidate_rkey = req->mr->rkey, + }; + req->inv_cqe.done = rtrs_clt_inv_rkey_done; + + return ib_post_send(con->c.qp, &wr, NULL); +} + +static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, + bool notify, bool can_wait) +{ + struct rtrs_clt_con *con = req->con; + struct rtrs_clt_sess *sess; + int err; + + if (WARN_ON(!req->in_use)) + return; + if (WARN_ON(!req->con)) + return; + sess = to_clt_sess(con->c.sess); + + if (req->sg_cnt) { + if (unlikely(req->dir == DMA_FROM_DEVICE && req->need_inv)) { + /* + * We are here to invalidate read requests + * ourselves. In normal scenario server should + * send INV for all read requests, but + * we are here, thus two things could happen: + * + * 1. this is failover, when errno != 0 + * and can_wait == 1, + * + * 2. something totally bad happened and + * server forgot to send INV, so we + * should do that ourselves. + */ + + if (likely(can_wait)) { + req->need_inv_comp = true; + } else { + /* This should be IO path, so always notify */ + WARN_ON(!notify); + /* Save errno for INV callback */ + req->inv_errno = errno; + } + + err = rtrs_inv_rkey(req); + if (unlikely(err)) { + rtrs_err(con->c.sess, "Send INV WR key=%#x: %d\n", + req->mr->rkey, err); + } else if (likely(can_wait)) { + wait_for_completion(&req->inv_comp); + } else { + /* + * Something went wrong, so request will be + * completed from INV callback. + */ + WARN_ON_ONCE(1); + + return; + } + } + ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist, + req->sg_cnt, req->dir); + } + if (sess->clt->mp_policy == MP_POLICY_MIN_INFLIGHT) + atomic_dec(&sess->stats->inflight); + + req->in_use = false; + req->con = NULL; + + if (notify) + req->conf(req->priv, errno); +} + +static int rtrs_post_send_rdma(struct rtrs_clt_con *con, + struct rtrs_clt_io_req *req, + struct rtrs_rbuf *rbuf, u32 off, + u32 imm, struct ib_send_wr *wr) +{ + struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + enum ib_send_flags flags; + struct ib_sge sge; + + if (unlikely(!req->sg_size)) { + rtrs_wrn(con->c.sess, + "Doing RDMA Write failed, no data supplied\n"); + return -EINVAL; + } + + /* user data and user message in the first list element */ + sge.addr = req->iu->dma_addr; + sge.length = req->sg_size; + sge.lkey = sess->s.dev->ib_pd->local_dma_lkey; + + /* + * From time to time we have to post signalled sends, + * or send queue will fill up and only QP reset can help. + */ + flags = atomic_inc_return(&con->io_cnt) % sess->queue_depth ? + 0 : IB_SEND_SIGNALED; + + ib_dma_sync_single_for_device(sess->s.dev->ib_dev, req->iu->dma_addr, + req->sg_size, DMA_TO_DEVICE); + + return rtrs_iu_post_rdma_write_imm(&con->c, req->iu, &sge, 1, + rbuf->rkey, rbuf->addr + off, + imm, flags, wr); +} + +static void process_io_rsp(struct rtrs_clt_sess *sess, u32 msg_id, + s16 errno, bool w_inval) +{ + struct rtrs_clt_io_req *req; + + if (WARN_ON(msg_id >= sess->queue_depth)) + return; + + req = &sess->reqs[msg_id]; + /* Drop need_inv if server responded with send with invalidation */ + req->need_inv &= !w_inval; + complete_rdma_req(req, errno, true, false); +} + +static void rtrs_clt_recv_done(struct rtrs_clt_con *con, struct ib_wc *wc) +{ + struct rtrs_iu *iu; + int err; + struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + + WARN_ON(sess->flags != RTRS_MSG_NEW_RKEY_F); + iu = container_of(wc->wr_cqe, struct rtrs_iu, + cqe); + err = rtrs_iu_post_recv(&con->c, iu); + if (unlikely(err)) { + rtrs_err(con->c.sess, "post iu failed %d\n", err); + rtrs_rdma_error_recovery(con); + } +} + +static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc) +{ + struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_msg_rkey_rsp *msg; + u32 imm_type, imm_payload; + bool w_inval = false; + struct rtrs_iu *iu; + u32 buf_id; + int err; + + WARN_ON(sess->flags != RTRS_MSG_NEW_RKEY_F); + + iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); + + if (unlikely(wc->byte_len < sizeof(*msg))) { + rtrs_err(con->c.sess, "rkey response is malformed: size %d\n", + wc->byte_len); + goto out; + } + ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr, + iu->size, DMA_FROM_DEVICE); + msg = iu->buf; + if (unlikely(le16_to_cpu(msg->type) != RTRS_MSG_RKEY_RSP)) { + rtrs_err(sess->clt, "rkey response is malformed: type %d\n", + le16_to_cpu(msg->type)); + goto out; + } + buf_id = le16_to_cpu(msg->buf_id); + if (WARN_ON(buf_id >= sess->queue_depth)) + goto out; + + rtrs_from_imm(be32_to_cpu(wc->ex.imm_data), &imm_type, &imm_payload); + if (likely(imm_type == RTRS_IO_RSP_IMM || + imm_type == RTRS_IO_RSP_W_INV_IMM)) { + u32 msg_id; + + w_inval = (imm_type == RTRS_IO_RSP_W_INV_IMM); + rtrs_from_io_rsp_imm(imm_payload, &msg_id, &err); + + if (WARN_ON(buf_id != msg_id)) + goto out; + sess->rbufs[buf_id].rkey = le32_to_cpu(msg->rkey); + process_io_rsp(sess, msg_id, err, w_inval); + } + ib_dma_sync_single_for_device(sess->s.dev->ib_dev, iu->dma_addr, + iu->size, DMA_FROM_DEVICE); + return rtrs_clt_recv_done(con, wc); +out: + rtrs_rdma_error_recovery(con); +} + +static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc); + +static struct ib_cqe io_comp_cqe = { + .done = rtrs_clt_rdma_done +}; + +/* + * Post x2 empty WRs: first is for this RDMA with IMM, + * second is for RECV with INV, which happened earlier. + */ +static int rtrs_post_recv_empty_x2(struct rtrs_con *con, struct ib_cqe *cqe) +{ + struct ib_recv_wr wr_arr[2], *wr; + int i; + + memset(wr_arr, 0, sizeof(wr_arr)); + for (i = 0; i < ARRAY_SIZE(wr_arr); i++) { + wr = &wr_arr[i]; + wr->wr_cqe = cqe; + if (i) + /* Chain backwards */ + wr->next = &wr_arr[i - 1]; + } + + return ib_post_recv(con->qp, wr, NULL); +} + +static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct rtrs_clt_con *con = cq->cq_context; + struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + u32 imm_type, imm_payload; + bool w_inval = false; + int err; + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + if (wc->status != IB_WC_WR_FLUSH_ERR) { + rtrs_err(sess->clt, "RDMA failed: %s\n", + ib_wc_status_msg(wc->status)); + rtrs_rdma_error_recovery(con); + } + return; + } + rtrs_clt_update_wc_stats(con); + + switch (wc->opcode) { + case IB_WC_RECV_RDMA_WITH_IMM: + /* + * post_recv() RDMA write completions of IO reqs (read/write) + * and hb + */ + if (WARN_ON(wc->wr_cqe->done != rtrs_clt_rdma_done)) + return; + rtrs_from_imm(be32_to_cpu(wc->ex.imm_data), + &imm_type, &imm_payload); + if (likely(imm_type == RTRS_IO_RSP_IMM || + imm_type == RTRS_IO_RSP_W_INV_IMM)) { + u32 msg_id; + + w_inval = (imm_type == RTRS_IO_RSP_W_INV_IMM); + rtrs_from_io_rsp_imm(imm_payload, &msg_id, &err); + + process_io_rsp(sess, msg_id, err, w_inval); + } else if (imm_type == RTRS_HB_MSG_IMM) { + WARN_ON(con->c.cid); + rtrs_send_hb_ack(&sess->s); + if (sess->flags == RTRS_MSG_NEW_RKEY_F) + return rtrs_clt_recv_done(con, wc); + } else if (imm_type == RTRS_HB_ACK_IMM) { + WARN_ON(con->c.cid); + sess->s.hb_missed_cnt = 0; + if (sess->flags == RTRS_MSG_NEW_RKEY_F) + return rtrs_clt_recv_done(con, wc); + } else { + rtrs_wrn(con->c.sess, "Unknown IMM type %u\n", + imm_type); + } + if (w_inval) + /* + * Post x2 empty WRs: first is for this RDMA with IMM, + * second is for RECV with INV, which happened earlier. + */ + err = rtrs_post_recv_empty_x2(&con->c, &io_comp_cqe); + else + err = rtrs_post_recv_empty(&con->c, &io_comp_cqe); + if (unlikely(err)) { + rtrs_err(con->c.sess, "rtrs_post_recv_empty(): %d\n", + err); + rtrs_rdma_error_recovery(con); + break; + } + break; + case IB_WC_RECV: + /* + * Key invalidations from server side + */ + WARN_ON(!(wc->wc_flags & IB_WC_WITH_INVALIDATE || + wc->wc_flags & IB_WC_WITH_IMM)); + WARN_ON(wc->wr_cqe->done != rtrs_clt_rdma_done); + if (sess->flags == RTRS_MSG_NEW_RKEY_F) { + if (wc->wc_flags & IB_WC_WITH_INVALIDATE) + return rtrs_clt_recv_done(con, wc); + + return rtrs_clt_rkey_rsp_done(con, wc); + } + break; + case IB_WC_RDMA_WRITE: + /* + * post_send() RDMA write completions of IO reqs (read/write) + * and hb + */ + break; + + default: + rtrs_wrn(sess->clt, "Unexpected WC type: %d\n", wc->opcode); + return; + } +} + +static int post_recv_io(struct rtrs_clt_con *con, size_t q_size) +{ + int err, i; + struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + + for (i = 0; i < q_size; i++) { + if (sess->flags == RTRS_MSG_NEW_RKEY_F) { + struct rtrs_iu *iu = &con->rsp_ius[i]; + + err = rtrs_iu_post_recv(&con->c, iu); + } else { + err = rtrs_post_recv_empty(&con->c, &io_comp_cqe); + } + if (unlikely(err)) + return err; + } + + return 0; +} + +static int post_recv_sess(struct rtrs_clt_sess *sess) +{ + size_t q_size = 0; + int err, cid; + + for (cid = 0; cid < sess->s.con_num; cid++) { + if (cid == 0) + q_size = SERVICE_CON_QUEUE_DEPTH; + else + q_size = sess->queue_depth; + + /* + * x2 for RDMA read responses + FR key invalidations, + * RDMA writes do not require any FR registrations. + */ + q_size *= 2; + + err = post_recv_io(to_clt_con(sess->s.con[cid]), q_size); + if (unlikely(err)) { + rtrs_err(sess->clt, "post_recv_io(), err: %d\n", err); + return err; + } + } + + return 0; +} + +struct path_it { + int i; + struct list_head skip_list; + struct rtrs_clt *clt; + struct rtrs_clt_sess *(*next_path)(struct path_it *it); +}; + +#define do_each_path(path, clt, it) { \ + path_it_init(it, clt); \ + rcu_read_lock(); \ + for ((it)->i = 0; ((path) = ((it)->next_path)(it)) && \ + (it)->i < (it)->clt->paths_num; \ + (it)->i++) + +#define while_each_path(it) \ + path_it_deinit(it); \ + rcu_read_unlock(); \ + } + +/** + * list_next_or_null_rr_rcu - get next list element in round-robin fashion. + * @head: the head for the list. + * @ptr: the list head to take the next element from. + * @type: the type of the struct this is embedded in. + * @memb: the name of the list_head within the struct. + * + * Next element returned in round-robin fashion, i.e. head will be skipped, + * but if list is observed as empty, NULL will be returned. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). + */ +#define list_next_or_null_rr_rcu(head, ptr, type, memb) \ +({ \ + list_next_or_null_rcu(head, ptr, type, memb) ?: \ + list_next_or_null_rcu(head, READ_ONCE((ptr)->next), \ + type, memb); \ +}) + +/** + * get_next_path_rr() - Returns path in round-robin fashion. + * @it: the path pointer + * + * Related to @MP_POLICY_RR + * + * Locks: + * rcu_read_lock() must be hold. + */ +static struct rtrs_clt_sess *get_next_path_rr(struct path_it *it) +{ + struct rtrs_clt_sess __rcu **ppcpu_path; + struct rtrs_clt_sess *path; + struct rtrs_clt *clt; + + clt = it->clt; + + /* + * Here we use two RCU objects: @paths_list and @pcpu_path + * pointer. See rtrs_clt_remove_path_from_arr() for details + * how that is handled. + */ + + ppcpu_path = this_cpu_ptr(clt->pcpu_path); + path = rcu_dereference(*ppcpu_path); + if (unlikely(!path)) + path = list_first_or_null_rcu(&clt->paths_list, + typeof(*path), s.entry); + else + path = list_next_or_null_rr_rcu(&clt->paths_list, + &path->s.entry, + typeof(*path), + s.entry); + rcu_assign_pointer(*ppcpu_path, path); + + return path; +} + +/** + * get_next_path_min_inflight() - Returns path with minimal inflight count. + * @it: the path pointer + * + * Related to @MP_POLICY_MIN_INFLIGHT + * + * Locks: + * rcu_read_lock() must be hold. + */ +static struct rtrs_clt_sess *get_next_path_min_inflight(struct path_it *it) +{ + struct rtrs_clt_sess *min_path = NULL; + struct rtrs_clt *clt = it->clt; + struct rtrs_clt_sess *sess; + int min_inflight = INT_MAX; + int inflight; + + list_for_each_entry_rcu(sess, &clt->paths_list, s.entry) { + if (unlikely(!list_empty(raw_cpu_ptr(sess->mp_skip_entry)))) + continue; + + inflight = atomic_read(&sess->stats->inflight); + + if (inflight < min_inflight) { + min_inflight = inflight; + min_path = sess; + } + } + + /* + * add the path to the skip list, so that next time we can get + * a different one + */ + if (min_path) + list_add(raw_cpu_ptr(min_path->mp_skip_entry), &it->skip_list); + + return min_path; +} + +static inline void path_it_init(struct path_it *it, struct rtrs_clt *clt) +{ + INIT_LIST_HEAD(&it->skip_list); + it->clt = clt; + it->i = 0; + + if (clt->mp_policy == MP_POLICY_RR) + it->next_path = get_next_path_rr; + else + it->next_path = get_next_path_min_inflight; +} + +static inline void path_it_deinit(struct path_it *it) +{ + struct list_head *skip, *tmp; + /* + * The skip_list is used only for the MIN_INFLIGHT policy. + * We need to remove paths from it, so that next IO can insert + * paths (->mp_skip_entry) into a skip_list again. + */ + list_for_each_safe(skip, tmp, &it->skip_list) + list_del_init(skip); +} + +/** + * rtrs_clt_init_req() Initialize an rtrs_clt_io_req holding information + * about an inflight IO. + * The user buffer holding user control message (not data) is copied into + * the corresponding buffer of rtrs_iu (req->iu->buf), which later on will + * also hold the control message of rtrs. + * @req: an io request holding information about IO. + * @sess: client session + * @conf: conformation callback function to notify upper layer. + * @permit: permit for allocation of RDMA remote buffer + * @priv: private pointer + * @vec: kernel vector containing control message + * @usr_len: length of the user message + * @sg: scater list for IO data + * @sg_cnt: number of scater list entries + * @data_len: length of the IO data + * @dir: direction of the IO. + */ +static void rtrs_clt_init_req(struct rtrs_clt_io_req *req, + struct rtrs_clt_sess *sess, + void (*conf)(void *priv, int errno), + struct rtrs_permit *permit, void *priv, + const struct kvec *vec, size_t usr_len, + struct scatterlist *sg, size_t sg_cnt, + size_t data_len, int dir) +{ + struct iov_iter iter; + size_t len; + + req->permit = permit; + req->in_use = true; + req->usr_len = usr_len; + req->data_len = data_len; + req->sglist = sg; + req->sg_cnt = sg_cnt; + req->priv = priv; + req->dir = dir; + req->con = rtrs_permit_to_clt_con(sess, permit); + req->conf = conf; + req->need_inv = false; + req->need_inv_comp = false; + req->inv_errno = 0; + + iov_iter_kvec(&iter, READ, vec, 1, usr_len); + len = _copy_from_iter(req->iu->buf, usr_len, &iter); + WARN_ON(len != usr_len); + + reinit_completion(&req->inv_comp); +} + +static struct rtrs_clt_io_req * +rtrs_clt_get_req(struct rtrs_clt_sess *sess, + void (*conf)(void *priv, int errno), + struct rtrs_permit *permit, void *priv, + const struct kvec *vec, size_t usr_len, + struct scatterlist *sg, size_t sg_cnt, + size_t data_len, int dir) +{ + struct rtrs_clt_io_req *req; + + req = &sess->reqs[permit->mem_id]; + rtrs_clt_init_req(req, sess, conf, permit, priv, vec, usr_len, + sg, sg_cnt, data_len, dir); + return req; +} + +static struct rtrs_clt_io_req * +rtrs_clt_get_copy_req(struct rtrs_clt_sess *alive_sess, + struct rtrs_clt_io_req *fail_req) +{ + struct rtrs_clt_io_req *req; + struct kvec vec = { + .iov_base = fail_req->iu->buf, + .iov_len = fail_req->usr_len + }; + + req = &alive_sess->reqs[fail_req->permit->mem_id]; + rtrs_clt_init_req(req, alive_sess, fail_req->conf, fail_req->permit, + fail_req->priv, &vec, fail_req->usr_len, + fail_req->sglist, fail_req->sg_cnt, + fail_req->data_len, fail_req->dir); + return req; +} + +static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con, + struct rtrs_clt_io_req *req, + struct rtrs_rbuf *rbuf, + u32 size, u32 imm) +{ + struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct ib_sge *sge = req->sge; + enum ib_send_flags flags; + struct scatterlist *sg; + size_t num_sge; + int i; + + for_each_sg(req->sglist, sg, req->sg_cnt, i) { + sge[i].addr = sg_dma_address(sg); + sge[i].length = sg_dma_len(sg); + sge[i].lkey = sess->s.dev->ib_pd->local_dma_lkey; + } + sge[i].addr = req->iu->dma_addr; + sge[i].length = size; + sge[i].lkey = sess->s.dev->ib_pd->local_dma_lkey; + + num_sge = 1 + req->sg_cnt; + + /* + * From time to time we have to post signalled sends, + * or send queue will fill up and only QP reset can help. + */ + flags = atomic_inc_return(&con->io_cnt) % sess->queue_depth ? + 0 : IB_SEND_SIGNALED; + + ib_dma_sync_single_for_device(sess->s.dev->ib_dev, req->iu->dma_addr, + size, DMA_TO_DEVICE); + + return rtrs_iu_post_rdma_write_imm(&con->c, req->iu, sge, num_sge, + rbuf->rkey, rbuf->addr, imm, + flags, NULL); +} + +static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) +{ + struct rtrs_clt_con *con = req->con; + struct rtrs_sess *s = con->c.sess; + struct rtrs_clt_sess *sess = to_clt_sess(s); + struct rtrs_msg_rdma_write *msg; + + struct rtrs_rbuf *rbuf; + int ret, count = 0; + u32 imm, buf_id; + + const size_t tsize = sizeof(*msg) + req->data_len + req->usr_len; + + if (unlikely(tsize > sess->chunk_size)) { + rtrs_wrn(s, "Write request failed, size too big %zu > %d\n", + tsize, sess->chunk_size); + return -EMSGSIZE; + } + if (req->sg_cnt) { + count = ib_dma_map_sg(sess->s.dev->ib_dev, req->sglist, + req->sg_cnt, req->dir); + if (unlikely(!count)) { + rtrs_wrn(s, "Write request failed, map failed\n"); + return -EINVAL; + } + } + /* put rtrs msg after sg and user message */ + msg = req->iu->buf + req->usr_len; + msg->type = cpu_to_le16(RTRS_MSG_WRITE); + msg->usr_len = cpu_to_le16(req->usr_len); + + /* rtrs message on server side will be after user data and message */ + imm = req->permit->mem_off + req->data_len + req->usr_len; + imm = rtrs_to_io_req_imm(imm); + buf_id = req->permit->mem_id; + req->sg_size = tsize; + rbuf = &sess->rbufs[buf_id]; + + /* + * Update stats now, after request is successfully sent it is not + * safe anymore to touch it. + */ + rtrs_clt_update_all_stats(req, WRITE); + + ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, + req->usr_len + sizeof(*msg), + imm); + if (unlikely(ret)) { + rtrs_err(s, "Write request failed: %d\n", ret); + if (sess->clt->mp_policy == MP_POLICY_MIN_INFLIGHT) + atomic_dec(&sess->stats->inflight); + if (req->sg_cnt) + ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist, + req->sg_cnt, req->dir); + } + + return ret; +} + +static int rtrs_map_sg_fr(struct rtrs_clt_io_req *req, size_t count) +{ + int nr; + + /* Align the MR to a 4K page size to match the block virt boundary */ + nr = ib_map_mr_sg(req->mr, req->sglist, count, NULL, SZ_4K); + if (unlikely(nr < req->sg_cnt)) { + if (nr < 0) + return nr; + return -EINVAL; + } + ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); + + return nr; +} + +static int rtrs_clt_read_req(struct rtrs_clt_io_req *req) +{ + struct rtrs_clt_con *con = req->con; + struct rtrs_sess *s = con->c.sess; + struct rtrs_clt_sess *sess = to_clt_sess(s); + struct rtrs_msg_rdma_read *msg; + struct rtrs_ib_dev *dev; + + struct ib_reg_wr rwr; + struct ib_send_wr *wr = NULL; + + int ret, count = 0; + u32 imm, buf_id; + + const size_t tsize = sizeof(*msg) + req->data_len + req->usr_len; + + s = &sess->s; + dev = sess->s.dev; + + if (unlikely(tsize > sess->chunk_size)) { + rtrs_wrn(s, + "Read request failed, message size is %zu, bigger than CHUNK_SIZE %d\n", + tsize, sess->chunk_size); + return -EMSGSIZE; + } + + if (req->sg_cnt) { + count = ib_dma_map_sg(dev->ib_dev, req->sglist, req->sg_cnt, + req->dir); + if (unlikely(!count)) { + rtrs_wrn(s, + "Read request failed, dma map failed\n"); + return -EINVAL; + } + } + /* put our message into req->buf after user message*/ + msg = req->iu->buf + req->usr_len; + msg->type = cpu_to_le16(RTRS_MSG_READ); + msg->usr_len = cpu_to_le16(req->usr_len); + + if (count) { + ret = rtrs_map_sg_fr(req, count); + if (ret < 0) { + rtrs_err_rl(s, + "Read request failed, failed to map fast reg. data, err: %d\n", + ret); + ib_dma_unmap_sg(dev->ib_dev, req->sglist, req->sg_cnt, + req->dir); + return ret; + } + rwr = (struct ib_reg_wr) { + .wr.opcode = IB_WR_REG_MR, + .wr.wr_cqe = &fast_reg_cqe, + .mr = req->mr, + .key = req->mr->rkey, + .access = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE), + }; + wr = &rwr.wr; + + msg->sg_cnt = cpu_to_le16(1); + msg->flags = cpu_to_le16(RTRS_MSG_NEED_INVAL_F); + + msg->desc[0].addr = cpu_to_le64(req->mr->iova); + msg->desc[0].key = cpu_to_le32(req->mr->rkey); + msg->desc[0].len = cpu_to_le32(req->mr->length); + + /* Further invalidation is required */ + req->need_inv = !!RTRS_MSG_NEED_INVAL_F; + + } else { + msg->sg_cnt = 0; + msg->flags = 0; + } + /* + * rtrs message will be after the space reserved for disk data and + * user message + */ + imm = req->permit->mem_off + req->data_len + req->usr_len; + imm = rtrs_to_io_req_imm(imm); + buf_id = req->permit->mem_id; + + req->sg_size = sizeof(*msg); + req->sg_size += le16_to_cpu(msg->sg_cnt) * sizeof(struct rtrs_sg_desc); + req->sg_size += req->usr_len; + + /* + * Update stats now, after request is successfully sent it is not + * safe anymore to touch it. + */ + rtrs_clt_update_all_stats(req, READ); + + ret = rtrs_post_send_rdma(req->con, req, &sess->rbufs[buf_id], + req->data_len, imm, wr); + if (unlikely(ret)) { + rtrs_err(s, "Read request failed: %d\n", ret); + if (sess->clt->mp_policy == MP_POLICY_MIN_INFLIGHT) + atomic_dec(&sess->stats->inflight); + req->need_inv = false; + if (req->sg_cnt) + ib_dma_unmap_sg(dev->ib_dev, req->sglist, + req->sg_cnt, req->dir); + } + + return ret; +} + +/** + * rtrs_clt_failover_req() Try to find an active path for a failed request + * @clt: clt context + * @fail_req: a failed io request. + */ +static int rtrs_clt_failover_req(struct rtrs_clt *clt, + struct rtrs_clt_io_req *fail_req) +{ + struct rtrs_clt_sess *alive_sess; + struct rtrs_clt_io_req *req; + int err = -ECONNABORTED; + struct path_it it; + + do_each_path(alive_sess, clt, &it) { + if (unlikely(READ_ONCE(alive_sess->state) != + RTRS_CLT_CONNECTED)) + continue; + req = rtrs_clt_get_copy_req(alive_sess, fail_req); + if (req->dir == DMA_TO_DEVICE) + err = rtrs_clt_write_req(req); + else + err = rtrs_clt_read_req(req); + if (unlikely(err)) { + req->in_use = false; + continue; + } + /* Success path */ + rtrs_clt_inc_failover_cnt(alive_sess->stats); + break; + } while_each_path(&it); + + return err; +} + +static void fail_all_outstanding_reqs(struct rtrs_clt_sess *sess) +{ + struct rtrs_clt *clt = sess->clt; + struct rtrs_clt_io_req *req; + int i, err; + + if (!sess->reqs) + return; + for (i = 0; i < sess->queue_depth; ++i) { + req = &sess->reqs[i]; + if (!req->in_use) + continue; + + /* + * Safely (without notification) complete failed request. + * After completion this request is still useble and can + * be failovered to another path. + */ + complete_rdma_req(req, -ECONNABORTED, false, true); + + err = rtrs_clt_failover_req(clt, req); + if (unlikely(err)) + /* Failover failed, notify anyway */ + req->conf(req->priv, err); + } +} + +static void free_sess_reqs(struct rtrs_clt_sess *sess) +{ + struct rtrs_clt_io_req *req; + int i; + + if (!sess->reqs) + return; + for (i = 0; i < sess->queue_depth; ++i) { + req = &sess->reqs[i]; + if (req->mr) + ib_dereg_mr(req->mr); + kfree(req->sge); + rtrs_iu_free(req->iu, DMA_TO_DEVICE, + sess->s.dev->ib_dev, 1); + } + kfree(sess->reqs); + sess->reqs = NULL; +} + +static int alloc_sess_reqs(struct rtrs_clt_sess *sess) +{ + struct rtrs_clt_io_req *req; + struct rtrs_clt *clt = sess->clt; + int i, err = -ENOMEM; + + sess->reqs = kcalloc(sess->queue_depth, sizeof(*sess->reqs), + GFP_KERNEL); + if (!sess->reqs) + return -ENOMEM; + + for (i = 0; i < sess->queue_depth; ++i) { + req = &sess->reqs[i]; + req->iu = rtrs_iu_alloc(1, sess->max_hdr_size, GFP_KERNEL, + sess->s.dev->ib_dev, + DMA_TO_DEVICE, + rtrs_clt_rdma_done); + if (!req->iu) + goto out; + + req->sge = kmalloc_array(clt->max_segments + 1, + sizeof(*req->sge), GFP_KERNEL); + if (!req->sge) + goto out; + + req->mr = ib_alloc_mr(sess->s.dev->ib_pd, IB_MR_TYPE_MEM_REG, + sess->max_pages_per_mr); + if (IS_ERR(req->mr)) { + err = PTR_ERR(req->mr); + req->mr = NULL; + pr_err("Failed to alloc sess->max_pages_per_mr %d\n", + sess->max_pages_per_mr); + goto out; + } + + init_completion(&req->inv_comp); + } + + return 0; + +out: + free_sess_reqs(sess); + + return err; +} + +static int alloc_permits(struct rtrs_clt *clt) +{ + unsigned int chunk_bits; + int err, i; + + clt->permits_map = kcalloc(BITS_TO_LONGS(clt->queue_depth), + sizeof(long), GFP_KERNEL); + if (!clt->permits_map) { + err = -ENOMEM; + goto out_err; + } + clt->permits = kcalloc(clt->queue_depth, permit_size(clt), GFP_KERNEL); + if (!clt->permits) { + err = -ENOMEM; + goto err_map; + } + chunk_bits = ilog2(clt->queue_depth - 1) + 1; + for (i = 0; i < clt->queue_depth; i++) { + struct rtrs_permit *permit; + + permit = get_permit(clt, i); + permit->mem_id = i; + permit->mem_off = i << (MAX_IMM_PAYL_BITS - chunk_bits); + } + + return 0; + +err_map: + kfree(clt->permits_map); + clt->permits_map = NULL; +out_err: + return err; +} + +static void free_permits(struct rtrs_clt *clt) +{ + kfree(clt->permits_map); + clt->permits_map = NULL; + kfree(clt->permits); + clt->permits = NULL; +} + +static void query_fast_reg_mode(struct rtrs_clt_sess *sess) +{ + struct ib_device *ib_dev; + u64 max_pages_per_mr; + int mr_page_shift; + + ib_dev = sess->s.dev->ib_dev; + + /* + * Use the smallest page size supported by the HCA, down to a + * minimum of 4096 bytes. We're unlikely to build large sglists + * out of smaller entries. + */ + mr_page_shift = max(12, ffs(ib_dev->attrs.page_size_cap) - 1); + max_pages_per_mr = ib_dev->attrs.max_mr_size; + do_div(max_pages_per_mr, (1ull << mr_page_shift)); + sess->max_pages_per_mr = + min3(sess->max_pages_per_mr, (u32)max_pages_per_mr, + ib_dev->attrs.max_fast_reg_page_list_len); + sess->max_send_sge = ib_dev->attrs.max_send_sge; +} + +static bool rtrs_clt_change_state_get_old(struct rtrs_clt_sess *sess, + enum rtrs_clt_state new_state, + enum rtrs_clt_state *old_state) +{ + bool changed; + + spin_lock_irq(&sess->state_wq.lock); + *old_state = sess->state; + changed = __rtrs_clt_change_state(sess, new_state); + spin_unlock_irq(&sess->state_wq.lock); + + return changed; +} + +static bool rtrs_clt_change_state(struct rtrs_clt_sess *sess, + enum rtrs_clt_state new_state) +{ + enum rtrs_clt_state old_state; + + return rtrs_clt_change_state_get_old(sess, new_state, &old_state); +} + +static void rtrs_clt_hb_err_handler(struct rtrs_con *c) +{ + struct rtrs_clt_con *con = container_of(c, typeof(*con), c); + + rtrs_rdma_error_recovery(con); +} + +static void rtrs_clt_init_hb(struct rtrs_clt_sess *sess) +{ + rtrs_init_hb(&sess->s, &io_comp_cqe, + RTRS_HB_INTERVAL_MS, + RTRS_HB_MISSED_MAX, + rtrs_clt_hb_err_handler, + rtrs_wq); +} + +static void rtrs_clt_start_hb(struct rtrs_clt_sess *sess) +{ + rtrs_start_hb(&sess->s); +} + +static void rtrs_clt_stop_hb(struct rtrs_clt_sess *sess) +{ + rtrs_stop_hb(&sess->s); +} + +static void rtrs_clt_reconnect_work(struct work_struct *work); +static void rtrs_clt_close_work(struct work_struct *work); + +static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt, + const struct rtrs_addr *path, + size_t con_num, u16 max_segments) +{ + struct rtrs_clt_sess *sess; + int err = -ENOMEM; + int cpu; + + sess = kzalloc(sizeof(*sess), GFP_KERNEL); + if (!sess) + goto err; + + /* Extra connection for user messages */ + con_num += 1; + + sess->s.con = kcalloc(con_num, sizeof(*sess->s.con), GFP_KERNEL); + if (!sess->s.con) + goto err_free_sess; + + sess->stats = kzalloc(sizeof(*sess->stats), GFP_KERNEL); + if (!sess->stats) + goto err_free_con; + + mutex_init(&sess->init_mutex); + uuid_gen(&sess->s.uuid); + memcpy(&sess->s.dst_addr, path->dst, + rdma_addr_size((struct sockaddr *)path->dst)); + + /* + * rdma_resolve_addr() passes src_addr to cma_bind_addr, which + * checks the sa_family to be non-zero. If user passed src_addr=NULL + * the sess->src_addr will contain only zeros, which is then fine. + */ + if (path->src) + memcpy(&sess->s.src_addr, path->src, + rdma_addr_size((struct sockaddr *)path->src)); + strlcpy(sess->s.sessname, clt->sessname, sizeof(sess->s.sessname)); + sess->s.con_num = con_num; + sess->clt = clt; + sess->max_pages_per_mr = max_segments * BLK_MAX_SEGMENT_SIZE >> 12; + init_waitqueue_head(&sess->state_wq); + sess->state = RTRS_CLT_CONNECTING; + atomic_set(&sess->connected_cnt, 0); + INIT_WORK(&sess->close_work, rtrs_clt_close_work); + INIT_DELAYED_WORK(&sess->reconnect_dwork, rtrs_clt_reconnect_work); + rtrs_clt_init_hb(sess); + + sess->mp_skip_entry = alloc_percpu(typeof(*sess->mp_skip_entry)); + if (!sess->mp_skip_entry) + goto err_free_stats; + + for_each_possible_cpu(cpu) + INIT_LIST_HEAD(per_cpu_ptr(sess->mp_skip_entry, cpu)); + + err = rtrs_clt_init_stats(sess->stats); + if (err) + goto err_free_percpu; + + return sess; + +err_free_percpu: + free_percpu(sess->mp_skip_entry); +err_free_stats: + kfree(sess->stats); +err_free_con: + kfree(sess->s.con); +err_free_sess: + kfree(sess); +err: + return ERR_PTR(err); +} + +void free_sess(struct rtrs_clt_sess *sess) +{ + free_percpu(sess->mp_skip_entry); + mutex_destroy(&sess->init_mutex); + kfree(sess->s.con); + kfree(sess->rbufs); + kfree(sess); +} + +static int create_con(struct rtrs_clt_sess *sess, unsigned int cid) +{ + struct rtrs_clt_con *con; + + con = kzalloc(sizeof(*con), GFP_KERNEL); + if (!con) + return -ENOMEM; + + /* Map first two connections to the first CPU */ + con->cpu = (cid ? cid - 1 : 0) % nr_cpu_ids; + con->c.cid = cid; + con->c.sess = &sess->s; + atomic_set(&con->io_cnt, 0); + + sess->s.con[cid] = &con->c; + + return 0; +} + +static void destroy_con(struct rtrs_clt_con *con) +{ + struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + + sess->s.con[con->c.cid] = NULL; + kfree(con); +} + +static int create_con_cq_qp(struct rtrs_clt_con *con) +{ + struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + u16 wr_queue_size; + int err, cq_vector; + struct rtrs_msg_rkey_rsp *rsp; + + /* + * This function can fail, but still destroy_con_cq_qp() should + * be called, this is because create_con_cq_qp() is called on cm + * event path, thus caller/waiter never knows: have we failed before + * create_con_cq_qp() or after. To solve this dilemma without + * creating any additional flags just allow destroy_con_cq_qp() be + * called many times. + */ + + if (con->c.cid == 0) { + /* + * One completion for each receive and two for each send + * (send request + registration) + * + 2 for drain and heartbeat + * in case qp gets into error state + */ + wr_queue_size = SERVICE_CON_QUEUE_DEPTH * 3 + 2; + /* We must be the first here */ + if (WARN_ON(sess->s.dev)) + return -EINVAL; + + /* + * The whole session uses device from user connection. + * Be careful not to close user connection before ib dev + * is gracefully put. + */ + sess->s.dev = rtrs_ib_dev_find_or_add(con->c.cm_id->device, + &dev_pd); + if (!sess->s.dev) { + rtrs_wrn(sess->clt, + "rtrs_ib_dev_find_get_or_add(): no memory\n"); + return -ENOMEM; + } + sess->s.dev_ref = 1; + query_fast_reg_mode(sess); + } else { + /* + * Here we assume that session members are correctly set. + * This is always true if user connection (cid == 0) is + * established first. + */ + if (WARN_ON(!sess->s.dev)) + return -EINVAL; + if (WARN_ON(!sess->queue_depth)) + return -EINVAL; + + /* Shared between connections */ + sess->s.dev_ref++; + wr_queue_size = + min_t(int, sess->s.dev->ib_dev->attrs.max_qp_wr, + /* QD * (REQ + RSP + FR REGS or INVS) + drain */ + sess->queue_depth * 3 + 1); + } + /* alloc iu to recv new rkey reply when server reports flags set */ + if (sess->flags == RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) { + con->rsp_ius = rtrs_iu_alloc(wr_queue_size, sizeof(*rsp), + GFP_KERNEL, sess->s.dev->ib_dev, + DMA_FROM_DEVICE, + rtrs_clt_rdma_done); + if (!con->rsp_ius) + return -ENOMEM; + con->queue_size = wr_queue_size; + } + cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors; + err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge, + cq_vector, wr_queue_size, wr_queue_size, + IB_POLL_SOFTIRQ); + /* + * In case of error we do not bother to clean previous allocations, + * since destroy_con_cq_qp() must be called. + */ + + if (err) + return err; + return err; +} + +static void destroy_con_cq_qp(struct rtrs_clt_con *con) +{ + struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + + /* + * Be careful here: destroy_con_cq_qp() can be called even + * create_con_cq_qp() failed, see comments there. + */ + + rtrs_cq_qp_destroy(&con->c); + if (con->rsp_ius) { + rtrs_iu_free(con->rsp_ius, DMA_FROM_DEVICE, + sess->s.dev->ib_dev, con->queue_size); + con->rsp_ius = NULL; + con->queue_size = 0; + } + if (sess->s.dev_ref && !--sess->s.dev_ref) { + rtrs_ib_dev_put(sess->s.dev); + sess->s.dev = NULL; + } +} + +static void stop_cm(struct rtrs_clt_con *con) +{ + rdma_disconnect(con->c.cm_id); + if (con->c.qp) + ib_drain_qp(con->c.qp); +} + +static void destroy_cm(struct rtrs_clt_con *con) +{ + rdma_destroy_id(con->c.cm_id); + con->c.cm_id = NULL; +} + +static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con) +{ + struct rtrs_sess *s = con->c.sess; + int err; + + err = create_con_cq_qp(con); + if (err) { + rtrs_err(s, "create_con_cq_qp(), err: %d\n", err); + return err; + } + err = rdma_resolve_route(con->c.cm_id, RTRS_CONNECT_TIMEOUT_MS); + if (err) { + rtrs_err(s, "Resolving route failed, err: %d\n", err); + destroy_con_cq_qp(con); + } + + return err; +} + +static int rtrs_rdma_route_resolved(struct rtrs_clt_con *con) +{ + struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt *clt = sess->clt; + struct rtrs_msg_conn_req msg; + struct rdma_conn_param param; + + int err; + + param = (struct rdma_conn_param) { + .retry_count = 7, + .rnr_retry_count = 7, + .private_data = &msg, + .private_data_len = sizeof(msg), + }; + + msg = (struct rtrs_msg_conn_req) { + .magic = cpu_to_le16(RTRS_MAGIC), + .version = cpu_to_le16(RTRS_PROTO_VER), + .cid = cpu_to_le16(con->c.cid), + .cid_num = cpu_to_le16(sess->s.con_num), + .recon_cnt = cpu_to_le16(sess->s.recon_cnt), + }; + uuid_copy(&msg.sess_uuid, &sess->s.uuid); + uuid_copy(&msg.paths_uuid, &clt->paths_uuid); + + err = rdma_connect(con->c.cm_id, ¶m); + if (err) + rtrs_err(clt, "rdma_connect(): %d\n", err); + + return err; +} + +static int rtrs_rdma_conn_established(struct rtrs_clt_con *con, + struct rdma_cm_event *ev) +{ + struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt *clt = sess->clt; + const struct rtrs_msg_conn_rsp *msg; + u16 version, queue_depth; + int errno; + u8 len; + + msg = ev->param.conn.private_data; + len = ev->param.conn.private_data_len; + if (len < sizeof(*msg)) { + rtrs_err(clt, "Invalid RTRS connection response\n"); + return -ECONNRESET; + } + if (le16_to_cpu(msg->magic) != RTRS_MAGIC) { + rtrs_err(clt, "Invalid RTRS magic\n"); + return -ECONNRESET; + } + version = le16_to_cpu(msg->version); + if (version >> 8 != RTRS_PROTO_VER_MAJOR) { + rtrs_err(clt, "Unsupported major RTRS version: %d, expected %d\n", + version >> 8, RTRS_PROTO_VER_MAJOR); + return -ECONNRESET; + } + errno = le16_to_cpu(msg->errno); + if (errno) { + rtrs_err(clt, "Invalid RTRS message: errno %d\n", + errno); + return -ECONNRESET; + } + if (con->c.cid == 0) { + queue_depth = le16_to_cpu(msg->queue_depth); + + if (queue_depth > MAX_SESS_QUEUE_DEPTH) { + rtrs_err(clt, "Invalid RTRS message: queue=%d\n", + queue_depth); + return -ECONNRESET; + } + if (!sess->rbufs || sess->queue_depth < queue_depth) { + kfree(sess->rbufs); + sess->rbufs = kcalloc(queue_depth, sizeof(*sess->rbufs), + GFP_KERNEL); + if (!sess->rbufs) + return -ENOMEM; + } + sess->queue_depth = queue_depth; + sess->max_hdr_size = le32_to_cpu(msg->max_hdr_size); + sess->max_io_size = le32_to_cpu(msg->max_io_size); + sess->flags = le32_to_cpu(msg->flags); + sess->chunk_size = sess->max_io_size + sess->max_hdr_size; + + /* + * Global queue depth and IO size is always a minimum. + * If while a reconnection server sends us a value a bit + * higher - client does not care and uses cached minimum. + * + * Since we can have several sessions (paths) restablishing + * connections in parallel, use lock. + */ + mutex_lock(&clt->paths_mutex); + clt->queue_depth = min_not_zero(sess->queue_depth, + clt->queue_depth); + clt->max_io_size = min_not_zero(sess->max_io_size, + clt->max_io_size); + mutex_unlock(&clt->paths_mutex); + + /* + * Cache the hca_port and hca_name for sysfs + */ + sess->hca_port = con->c.cm_id->port_num; + scnprintf(sess->hca_name, sizeof(sess->hca_name), + sess->s.dev->ib_dev->name); + sess->s.src_addr = con->c.cm_id->route.addr.src_addr; + } + + return 0; +} + +static inline void flag_success_on_conn(struct rtrs_clt_con *con) +{ + struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + + atomic_inc(&sess->connected_cnt); + con->cm_err = 1; +} + +static int rtrs_rdma_conn_rejected(struct rtrs_clt_con *con, + struct rdma_cm_event *ev) +{ + struct rtrs_sess *s = con->c.sess; + const struct rtrs_msg_conn_rsp *msg; + const char *rej_msg; + int status, errno; + u8 data_len; + + status = ev->status; + rej_msg = rdma_reject_msg(con->c.cm_id, status); + msg = rdma_consumer_reject_data(con->c.cm_id, ev, &data_len); + + if (msg && data_len >= sizeof(*msg)) { + errno = (int16_t)le16_to_cpu(msg->errno); + if (errno == -EBUSY) + rtrs_err(s, + "Previous session is still exists on the server, please reconnect later\n"); + else + rtrs_err(s, + "Connect rejected: status %d (%s), rtrs errno %d\n", + status, rej_msg, errno); + } else { + rtrs_err(s, + "Connect rejected but with malformed message: status %d (%s)\n", + status, rej_msg); + } + + return -ECONNRESET; +} + +static void rtrs_clt_close_conns(struct rtrs_clt_sess *sess, bool wait) +{ + if (rtrs_clt_change_state(sess, RTRS_CLT_CLOSING)) + queue_work(rtrs_wq, &sess->close_work); + if (wait) + flush_work(&sess->close_work); +} + +static inline void flag_error_on_conn(struct rtrs_clt_con *con, int cm_err) +{ + if (con->cm_err == 1) { + struct rtrs_clt_sess *sess; + + sess = to_clt_sess(con->c.sess); + if (atomic_dec_and_test(&sess->connected_cnt)) + + wake_up(&sess->state_wq); + } + con->cm_err = cm_err; +} + +static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *ev) +{ + struct rtrs_clt_con *con = cm_id->context; + struct rtrs_sess *s = con->c.sess; + struct rtrs_clt_sess *sess = to_clt_sess(s); + int cm_err = 0; + + switch (ev->event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + cm_err = rtrs_rdma_addr_resolved(con); + break; + case RDMA_CM_EVENT_ROUTE_RESOLVED: + cm_err = rtrs_rdma_route_resolved(con); + break; + case RDMA_CM_EVENT_ESTABLISHED: + con->cm_err = rtrs_rdma_conn_established(con, ev); + if (likely(!con->cm_err)) { + /* + * Report success and wake up. Here we abuse state_wq, + * i.e. wake up without state change, but we set cm_err. + */ + flag_success_on_conn(con); + wake_up(&sess->state_wq); + return 0; + } + break; + case RDMA_CM_EVENT_REJECTED: + cm_err = rtrs_rdma_conn_rejected(con, ev); + break; + case RDMA_CM_EVENT_CONNECT_ERROR: + case RDMA_CM_EVENT_UNREACHABLE: + rtrs_wrn(s, "CM error event %d\n", ev->event); + cm_err = -ECONNRESET; + break; + case RDMA_CM_EVENT_ADDR_ERROR: + case RDMA_CM_EVENT_ROUTE_ERROR: + cm_err = -EHOSTUNREACH; + break; + case RDMA_CM_EVENT_DISCONNECTED: + case RDMA_CM_EVENT_ADDR_CHANGE: + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + cm_err = -ECONNRESET; + break; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + /* + * Device removal is a special case. Queue close and return 0. + */ + rtrs_clt_close_conns(sess, false); + return 0; + default: + rtrs_err(s, "Unexpected RDMA CM event (%d)\n", ev->event); + cm_err = -ECONNRESET; + break; + } + + if (cm_err) { + /* + * cm error makes sense only on connection establishing, + * in other cases we rely on normal procedure of reconnecting. + */ + flag_error_on_conn(con, cm_err); + rtrs_rdma_error_recovery(con); + } + + return 0; +} + +static int create_cm(struct rtrs_clt_con *con) +{ + struct rtrs_sess *s = con->c.sess; + struct rtrs_clt_sess *sess = to_clt_sess(s); + struct rdma_cm_id *cm_id; + int err; + + cm_id = rdma_create_id(&init_net, rtrs_clt_rdma_cm_handler, con, + sess->s.dst_addr.ss_family == AF_IB ? + RDMA_PS_IB : RDMA_PS_TCP, IB_QPT_RC); + if (IS_ERR(cm_id)) { + err = PTR_ERR(cm_id); + rtrs_err(s, "Failed to create CM ID, err: %d\n", err); + + return err; + } + con->c.cm_id = cm_id; + con->cm_err = 0; + /* allow the port to be reused */ + err = rdma_set_reuseaddr(cm_id, 1); + if (err != 0) { + rtrs_err(s, "Set address reuse failed, err: %d\n", err); + goto destroy_cm; + } + err = rdma_resolve_addr(cm_id, (struct sockaddr *)&sess->s.src_addr, + (struct sockaddr *)&sess->s.dst_addr, + RTRS_CONNECT_TIMEOUT_MS); + if (err) { + rtrs_err(s, "Failed to resolve address, err: %d\n", err); + goto destroy_cm; + } + /* + * Combine connection status and session events. This is needed + * for waiting two possible cases: cm_err has something meaningful + * or session state was really changed to error by device removal. + */ + err = wait_event_interruptible_timeout( + sess->state_wq, + con->cm_err || sess->state != RTRS_CLT_CONNECTING, + msecs_to_jiffies(RTRS_CONNECT_TIMEOUT_MS)); + if (err == 0 || err == -ERESTARTSYS) { + if (err == 0) + err = -ETIMEDOUT; + /* Timedout or interrupted */ + goto errr; + } + if (con->cm_err < 0) { + err = con->cm_err; + goto errr; + } + if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTING) { + /* Device removal */ + err = -ECONNABORTED; + goto errr; + } + + return 0; + +errr: + stop_cm(con); + /* Is safe to call destroy if cq_qp is not inited */ + destroy_con_cq_qp(con); +destroy_cm: + destroy_cm(con); + + return err; +} + +static void rtrs_clt_sess_up(struct rtrs_clt_sess *sess) +{ + struct rtrs_clt *clt = sess->clt; + int up; + + /* + * We can fire RECONNECTED event only when all paths were + * connected on rtrs_clt_open(), then each was disconnected + * and the first one connected again. That's why this nasty + * game with counter value. + */ + + mutex_lock(&clt->paths_ev_mutex); + up = ++clt->paths_up; + /* + * Here it is safe to access paths num directly since up counter + * is greater than MAX_PATHS_NUM only while rtrs_clt_open() is + * in progress, thus paths removals are impossible. + */ + if (up > MAX_PATHS_NUM && up == MAX_PATHS_NUM + clt->paths_num) + clt->paths_up = clt->paths_num; + else if (up == 1) + clt->link_ev(clt->priv, RTRS_CLT_LINK_EV_RECONNECTED); + mutex_unlock(&clt->paths_ev_mutex); + + /* Mark session as established */ + sess->established = true; + sess->reconnect_attempts = 0; + sess->stats->reconnects.successful_cnt++; +} + +static void rtrs_clt_sess_down(struct rtrs_clt_sess *sess) +{ + struct rtrs_clt *clt = sess->clt; + + if (!sess->established) + return; + + sess->established = false; + mutex_lock(&clt->paths_ev_mutex); + WARN_ON(!clt->paths_up); + if (--clt->paths_up == 0) + clt->link_ev(clt->priv, RTRS_CLT_LINK_EV_DISCONNECTED); + mutex_unlock(&clt->paths_ev_mutex); +} + +static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess) +{ + struct rtrs_clt_con *con; + unsigned int cid; + + WARN_ON(READ_ONCE(sess->state) == RTRS_CLT_CONNECTED); + + /* + * Possible race with rtrs_clt_open(), when DEVICE_REMOVAL comes + * exactly in between. Start destroying after it finishes. + */ + mutex_lock(&sess->init_mutex); + mutex_unlock(&sess->init_mutex); + + /* + * All IO paths must observe !CONNECTED state before we + * free everything. + */ + synchronize_rcu(); + + rtrs_clt_stop_hb(sess); + + /* + * The order it utterly crucial: firstly disconnect and complete all + * rdma requests with error (thus set in_use=false for requests), + * then fail outstanding requests checking in_use for each, and + * eventually notify upper layer about session disconnection. + */ + + for (cid = 0; cid < sess->s.con_num; cid++) { + if (!sess->s.con[cid]) + break; + con = to_clt_con(sess->s.con[cid]); + stop_cm(con); + } + fail_all_outstanding_reqs(sess); + free_sess_reqs(sess); + rtrs_clt_sess_down(sess); + + /* + * Wait for graceful shutdown, namely when peer side invokes + * rdma_disconnect(). 'connected_cnt' is decremented only on + * CM events, thus if other side had crashed and hb has detected + * something is wrong, here we will stuck for exactly timeout ms, + * since CM does not fire anything. That is fine, we are not in + * hurry. + */ + wait_event_timeout(sess->state_wq, !atomic_read(&sess->connected_cnt), + msecs_to_jiffies(RTRS_CONNECT_TIMEOUT_MS)); + + for (cid = 0; cid < sess->s.con_num; cid++) { + if (!sess->s.con[cid]) + break; + con = to_clt_con(sess->s.con[cid]); + destroy_con_cq_qp(con); + destroy_cm(con); + destroy_con(con); + } +} + +static inline bool xchg_sessions(struct rtrs_clt_sess __rcu **rcu_ppcpu_path, + struct rtrs_clt_sess *sess, + struct rtrs_clt_sess *next) +{ + struct rtrs_clt_sess **ppcpu_path; + + /* Call cmpxchg() without sparse warnings */ + ppcpu_path = (typeof(ppcpu_path))rcu_ppcpu_path; + return sess == cmpxchg(ppcpu_path, sess, next); +} + +static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess) +{ + struct rtrs_clt *clt = sess->clt; + struct rtrs_clt_sess *next; + bool wait_for_grace = false; + int cpu; + + mutex_lock(&clt->paths_mutex); + list_del_rcu(&sess->s.entry); + + /* Make sure everybody observes path removal. */ + synchronize_rcu(); + + /* + * At this point nobody sees @sess in the list, but still we have + * dangling pointer @pcpu_path which _can_ point to @sess. Since + * nobody can observe @sess in the list, we guarantee that IO path + * will not assign @sess to @pcpu_path, i.e. @pcpu_path can be equal + * to @sess, but can never again become @sess. + */ + + /* + * Decrement paths number only after grace period, because + * caller of do_each_path() must firstly observe list without + * path and only then decremented paths number. + * + * Otherwise there can be the following situation: + * o Two paths exist and IO is coming. + * o One path is removed: + * CPU#0 CPU#1 + * do_each_path(): rtrs_clt_remove_path_from_arr(): + * path = get_next_path() + * ^^^ list_del_rcu(path) + * [!CONNECTED path] clt->paths_num-- + * ^^^^^^^^^ + * load clt->paths_num from 2 to 1 + * ^^^^^^^^^ + * sees 1 + * + * path is observed as !CONNECTED, but do_each_path() loop + * ends, because expression i < clt->paths_num is false. + */ + clt->paths_num--; + + /* + * Get @next connection from current @sess which is going to be + * removed. If @sess is the last element, then @next is NULL. + */ + rcu_read_lock(); + next = list_next_or_null_rr_rcu(&clt->paths_list, &sess->s.entry, + typeof(*next), s.entry); + rcu_read_unlock(); + + /* + * @pcpu paths can still point to the path which is going to be + * removed, so change the pointer manually. + */ + for_each_possible_cpu(cpu) { + struct rtrs_clt_sess __rcu **ppcpu_path; + + ppcpu_path = per_cpu_ptr(clt->pcpu_path, cpu); + if (rcu_dereference_protected(*ppcpu_path, + lockdep_is_held(&clt->paths_mutex)) != sess) + /* + * synchronize_rcu() was called just after deleting + * entry from the list, thus IO code path cannot + * change pointer back to the pointer which is going + * to be removed, we are safe here. + */ + continue; + + /* + * We race with IO code path, which also changes pointer, + * thus we have to be careful not to overwrite it. + */ + if (xchg_sessions(ppcpu_path, sess, next)) + /* + * @ppcpu_path was successfully replaced with @next, + * that means that someone could also pick up the + * @sess and dereferencing it right now, so wait for + * a grace period is required. + */ + wait_for_grace = true; + } + if (wait_for_grace) + synchronize_rcu(); + + mutex_unlock(&clt->paths_mutex); +} + +static void rtrs_clt_add_path_to_arr(struct rtrs_clt_sess *sess, + struct rtrs_addr *addr) +{ + struct rtrs_clt *clt = sess->clt; + + mutex_lock(&clt->paths_mutex); + clt->paths_num++; + + list_add_tail_rcu(&sess->s.entry, &clt->paths_list); + mutex_unlock(&clt->paths_mutex); +} + +static void rtrs_clt_close_work(struct work_struct *work) +{ + struct rtrs_clt_sess *sess; + + sess = container_of(work, struct rtrs_clt_sess, close_work); + + cancel_delayed_work_sync(&sess->reconnect_dwork); + rtrs_clt_stop_and_destroy_conns(sess); + rtrs_clt_change_state(sess, RTRS_CLT_CLOSED); +} + +static int init_conns(struct rtrs_clt_sess *sess) +{ + unsigned int cid; + int err; + + /* + * On every new session connections increase reconnect counter + * to avoid clashes with previous sessions not yet closed + * sessions on a server side. + */ + sess->s.recon_cnt++; + + /* Establish all RDMA connections */ + for (cid = 0; cid < sess->s.con_num; cid++) { + err = create_con(sess, cid); + if (err) + goto destroy; + + err = create_cm(to_clt_con(sess->s.con[cid])); + if (err) { + destroy_con(to_clt_con(sess->s.con[cid])); + goto destroy; + } + } + err = alloc_sess_reqs(sess); + if (err) + goto destroy; + + rtrs_clt_start_hb(sess); + + return 0; + +destroy: + while (cid--) { + struct rtrs_clt_con *con = to_clt_con(sess->s.con[cid]); + + stop_cm(con); + destroy_con_cq_qp(con); + destroy_cm(con); + destroy_con(con); + } + /* + * If we've never taken async path and got an error, say, + * doing rdma_resolve_addr(), switch to CONNECTION_ERR state + * manually to keep reconnecting. + */ + rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR); + + return err; +} + +static void rtrs_clt_info_req_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct rtrs_clt_con *con = cq->cq_context; + struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_iu *iu; + + iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); + rtrs_iu_free(iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + rtrs_err(sess->clt, "Sess info request send failed: %s\n", + ib_wc_status_msg(wc->status)); + rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR); + return; + } + + rtrs_clt_update_wc_stats(con); +} + +static int process_info_rsp(struct rtrs_clt_sess *sess, + const struct rtrs_msg_info_rsp *msg) +{ + unsigned int sg_cnt, total_len; + int i, sgi; + + sg_cnt = le16_to_cpu(msg->sg_cnt); + if (unlikely(!sg_cnt)) + return -EINVAL; + /* + * Check if IB immediate data size is enough to hold the mem_id and + * the offset inside the memory chunk. + */ + if (unlikely((ilog2(sg_cnt - 1) + 1) + + (ilog2(sess->chunk_size - 1) + 1) > + MAX_IMM_PAYL_BITS)) { + rtrs_err(sess->clt, + "RDMA immediate size (%db) not enough to encode %d buffers of size %dB\n", + MAX_IMM_PAYL_BITS, sg_cnt, sess->chunk_size); + return -EINVAL; + } + if (unlikely(!sg_cnt || (sess->queue_depth % sg_cnt))) { + rtrs_err(sess->clt, "Incorrect sg_cnt %d, is not multiple\n", + sg_cnt); + return -EINVAL; + } + total_len = 0; + for (sgi = 0, i = 0; sgi < sg_cnt && i < sess->queue_depth; sgi++) { + const struct rtrs_sg_desc *desc = &msg->desc[sgi]; + u32 len, rkey; + u64 addr; + + addr = le64_to_cpu(desc->addr); + rkey = le32_to_cpu(desc->key); + len = le32_to_cpu(desc->len); + + total_len += len; + + if (unlikely(!len || (len % sess->chunk_size))) { + rtrs_err(sess->clt, "Incorrect [%d].len %d\n", sgi, + len); + return -EINVAL; + } + for ( ; len && i < sess->queue_depth; i++) { + sess->rbufs[i].addr = addr; + sess->rbufs[i].rkey = rkey; + + len -= sess->chunk_size; + addr += sess->chunk_size; + } + } + /* Sanity check */ + if (unlikely(sgi != sg_cnt || i != sess->queue_depth)) { + rtrs_err(sess->clt, "Incorrect sg vector, not fully mapped\n"); + return -EINVAL; + } + if (unlikely(total_len != sess->chunk_size * sess->queue_depth)) { + rtrs_err(sess->clt, "Incorrect total_len %d\n", total_len); + return -EINVAL; + } + + return 0; +} + +static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct rtrs_clt_con *con = cq->cq_context; + struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_msg_info_rsp *msg; + enum rtrs_clt_state state; + struct rtrs_iu *iu; + size_t rx_sz; + int err; + + state = RTRS_CLT_CONNECTING_ERR; + + WARN_ON(con->c.cid); + iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); + if (unlikely(wc->status != IB_WC_SUCCESS)) { + rtrs_err(sess->clt, "Sess info response recv failed: %s\n", + ib_wc_status_msg(wc->status)); + goto out; + } + WARN_ON(wc->opcode != IB_WC_RECV); + + if (unlikely(wc->byte_len < sizeof(*msg))) { + rtrs_err(sess->clt, "Sess info response is malformed: size %d\n", + wc->byte_len); + goto out; + } + ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr, + iu->size, DMA_FROM_DEVICE); + msg = iu->buf; + if (unlikely(le16_to_cpu(msg->type) != RTRS_MSG_INFO_RSP)) { + rtrs_err(sess->clt, "Sess info response is malformed: type %d\n", + le16_to_cpu(msg->type)); + goto out; + } + rx_sz = sizeof(*msg); + rx_sz += sizeof(msg->desc[0]) * le16_to_cpu(msg->sg_cnt); + if (unlikely(wc->byte_len < rx_sz)) { + rtrs_err(sess->clt, "Sess info response is malformed: size %d\n", + wc->byte_len); + goto out; + } + err = process_info_rsp(sess, msg); + if (unlikely(err)) + goto out; + + err = post_recv_sess(sess); + if (unlikely(err)) + goto out; + + state = RTRS_CLT_CONNECTED; + +out: + rtrs_clt_update_wc_stats(con); + rtrs_iu_free(iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_clt_change_state(sess, state); +} + +static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) +{ + struct rtrs_clt_con *usr_con = to_clt_con(sess->s.con[0]); + struct rtrs_msg_info_req *msg; + struct rtrs_iu *tx_iu, *rx_iu; + size_t rx_sz; + int err; + + rx_sz = sizeof(struct rtrs_msg_info_rsp); + rx_sz += sizeof(u64) * MAX_SESS_QUEUE_DEPTH; + + tx_iu = rtrs_iu_alloc(1, sizeof(struct rtrs_msg_info_req), GFP_KERNEL, + sess->s.dev->ib_dev, DMA_TO_DEVICE, + rtrs_clt_info_req_done); + rx_iu = rtrs_iu_alloc(1, rx_sz, GFP_KERNEL, sess->s.dev->ib_dev, + DMA_FROM_DEVICE, rtrs_clt_info_rsp_done); + if (unlikely(!tx_iu || !rx_iu)) { + err = -ENOMEM; + goto out; + } + /* Prepare for getting info response */ + err = rtrs_iu_post_recv(&usr_con->c, rx_iu); + if (unlikely(err)) { + rtrs_err(sess->clt, "rtrs_iu_post_recv(), err: %d\n", err); + goto out; + } + rx_iu = NULL; + + msg = tx_iu->buf; + msg->type = cpu_to_le16(RTRS_MSG_INFO_REQ); + memcpy(msg->sessname, sess->s.sessname, sizeof(msg->sessname)); + + ib_dma_sync_single_for_device(sess->s.dev->ib_dev, tx_iu->dma_addr, + tx_iu->size, DMA_TO_DEVICE); + + /* Send info request */ + err = rtrs_iu_post_send(&usr_con->c, tx_iu, sizeof(*msg), NULL); + if (unlikely(err)) { + rtrs_err(sess->clt, "rtrs_iu_post_send(), err: %d\n", err); + goto out; + } + tx_iu = NULL; + + /* Wait for state change */ + wait_event_interruptible_timeout(sess->state_wq, + sess->state != RTRS_CLT_CONNECTING, + msecs_to_jiffies( + RTRS_CONNECT_TIMEOUT_MS)); + if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)) { + if (READ_ONCE(sess->state) == RTRS_CLT_CONNECTING_ERR) + err = -ECONNRESET; + else + err = -ETIMEDOUT; + goto out; + } + +out: + if (tx_iu) + rtrs_iu_free(tx_iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + if (rx_iu) + rtrs_iu_free(rx_iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + if (unlikely(err)) + /* If we've never taken async path because of malloc problems */ + rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR); + + return err; +} + +/** + * init_sess() - establishes all session connections and does handshake + * @sess: client session. + * In case of error full close or reconnect procedure should be taken, + * because reconnect or close async works can be started. + */ +static int init_sess(struct rtrs_clt_sess *sess) +{ + int err; + + mutex_lock(&sess->init_mutex); + err = init_conns(sess); + if (err) { + rtrs_err(sess->clt, "init_conns(), err: %d\n", err); + goto out; + } + err = rtrs_send_sess_info(sess); + if (err) { + rtrs_err(sess->clt, "rtrs_send_sess_info(), err: %d\n", err); + goto out; + } + rtrs_clt_sess_up(sess); +out: + mutex_unlock(&sess->init_mutex); + + return err; +} + +static void rtrs_clt_reconnect_work(struct work_struct *work) +{ + struct rtrs_clt_sess *sess; + struct rtrs_clt *clt; + unsigned int delay_ms; + int err; + + sess = container_of(to_delayed_work(work), struct rtrs_clt_sess, + reconnect_dwork); + clt = sess->clt; + + if (READ_ONCE(sess->state) != RTRS_CLT_RECONNECTING) + return; + + if (sess->reconnect_attempts >= clt->max_reconnect_attempts) { + /* Close a session completely if max attempts is reached */ + rtrs_clt_close_conns(sess, false); + return; + } + sess->reconnect_attempts++; + + /* Stop everything */ + rtrs_clt_stop_and_destroy_conns(sess); + msleep(RTRS_RECONNECT_BACKOFF); + if (rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING)) { + err = init_sess(sess); + if (err) + goto reconnect_again; + } + + return; + +reconnect_again: + if (rtrs_clt_change_state(sess, RTRS_CLT_RECONNECTING)) { + sess->stats->reconnects.fail_cnt++; + delay_ms = clt->reconnect_delay_sec * 1000; + queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, + msecs_to_jiffies(delay_ms)); + } +} + +static void rtrs_clt_dev_release(struct device *dev) +{ + struct rtrs_clt *clt = container_of(dev, struct rtrs_clt, dev); + + kfree(clt); +} + +static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, + u16 port, size_t pdu_sz, void *priv, + void (*link_ev)(void *priv, + enum rtrs_clt_link_ev ev), + unsigned int max_segments, + unsigned int reconnect_delay_sec, + unsigned int max_reconnect_attempts) +{ + struct rtrs_clt *clt; + int err; + + if (!paths_num || paths_num > MAX_PATHS_NUM) + return ERR_PTR(-EINVAL); + + if (strlen(sessname) >= sizeof(clt->sessname)) + return ERR_PTR(-EINVAL); + + clt = kzalloc(sizeof(*clt), GFP_KERNEL); + if (!clt) + return ERR_PTR(-ENOMEM); + + clt->pcpu_path = alloc_percpu(typeof(*clt->pcpu_path)); + if (!clt->pcpu_path) { + kfree(clt); + return ERR_PTR(-ENOMEM); + } + + uuid_gen(&clt->paths_uuid); + INIT_LIST_HEAD_RCU(&clt->paths_list); + clt->paths_num = paths_num; + clt->paths_up = MAX_PATHS_NUM; + clt->port = port; + clt->pdu_sz = pdu_sz; + clt->max_segments = max_segments; + clt->reconnect_delay_sec = reconnect_delay_sec; + clt->max_reconnect_attempts = max_reconnect_attempts; + clt->priv = priv; + clt->link_ev = link_ev; + clt->mp_policy = MP_POLICY_MIN_INFLIGHT; + strlcpy(clt->sessname, sessname, sizeof(clt->sessname)); + init_waitqueue_head(&clt->permits_wait); + mutex_init(&clt->paths_ev_mutex); + mutex_init(&clt->paths_mutex); + + clt->dev.class = rtrs_clt_dev_class; + clt->dev.release = rtrs_clt_dev_release; + err = dev_set_name(&clt->dev, "%s", sessname); + if (err) { + free_percpu(clt->pcpu_path); + kfree(clt); + return ERR_PTR(err); + } + /* + * Suppress user space notification until + * sysfs files are created + */ + dev_set_uevent_suppress(&clt->dev, true); + err = device_register(&clt->dev); + if (err) { + free_percpu(clt->pcpu_path); + put_device(&clt->dev); + return ERR_PTR(err); + } + + clt->kobj_paths = kobject_create_and_add("paths", &clt->dev.kobj); + if (!clt->kobj_paths) { + free_percpu(clt->pcpu_path); + device_unregister(&clt->dev); + return NULL; + } + err = rtrs_clt_create_sysfs_root_files(clt); + if (err) { + free_percpu(clt->pcpu_path); + kobject_del(clt->kobj_paths); + kobject_put(clt->kobj_paths); + device_unregister(&clt->dev); + return ERR_PTR(err); + } + dev_set_uevent_suppress(&clt->dev, false); + kobject_uevent(&clt->dev.kobj, KOBJ_ADD); + + return clt; +} + +static void wait_for_inflight_permits(struct rtrs_clt *clt) +{ + if (clt->permits_map) { + size_t sz = clt->queue_depth; + + wait_event(clt->permits_wait, + find_first_bit(clt->permits_map, sz) >= sz); + } +} + +static void free_clt(struct rtrs_clt *clt) +{ + wait_for_inflight_permits(clt); + free_permits(clt); + free_percpu(clt->pcpu_path); + mutex_destroy(&clt->paths_ev_mutex); + mutex_destroy(&clt->paths_mutex); + /* release callback will free clt in last put */ + device_unregister(&clt->dev); +} + +/** + * rtrs_clt_open() - Open a session to an RTRS server + * @ops: holds the link event callback and the private pointer. + * @sessname: name of the session + * @paths: Paths to be established defined by their src and dst addresses + * @paths_num: Number of elements in the @paths array + * @port: port to be used by the RTRS session + * @pdu_sz: Size of extra payload which can be accessed after permit allocation. + * @reconnect_delay_sec: time between reconnect tries + * @max_segments: Max. number of segments per IO request + * @max_reconnect_attempts: Number of times to reconnect on error before giving + * up, 0 for * disabled, -1 for forever + * + * Starts session establishment with the rtrs_server. The function can block + * up to ~2000ms before it returns. + * + * Return a valid pointer on success otherwise PTR_ERR. + */ +struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, + const char *sessname, + const struct rtrs_addr *paths, + size_t paths_num, u16 port, + size_t pdu_sz, u8 reconnect_delay_sec, + u16 max_segments, + s16 max_reconnect_attempts) +{ + struct rtrs_clt_sess *sess, *tmp; + struct rtrs_clt *clt; + int err, i; + + clt = alloc_clt(sessname, paths_num, port, pdu_sz, ops->priv, + ops->link_ev, + max_segments, reconnect_delay_sec, + max_reconnect_attempts); + if (IS_ERR(clt)) { + err = PTR_ERR(clt); + goto out; + } + for (i = 0; i < paths_num; i++) { + struct rtrs_clt_sess *sess; + + sess = alloc_sess(clt, &paths[i], nr_cpu_ids, + max_segments); + if (IS_ERR(sess)) { + err = PTR_ERR(sess); + goto close_all_sess; + } + list_add_tail_rcu(&sess->s.entry, &clt->paths_list); + + err = init_sess(sess); + if (err) { + list_del_rcu(&sess->s.entry); + rtrs_clt_close_conns(sess, true); + free_sess(sess); + goto close_all_sess; + } + + err = rtrs_clt_create_sess_files(sess); + if (err) { + list_del_rcu(&sess->s.entry); + rtrs_clt_close_conns(sess, true); + free_sess(sess); + goto close_all_sess; + } + } + err = alloc_permits(clt); + if (err) + goto close_all_sess; + + return clt; + +close_all_sess: + list_for_each_entry_safe(sess, tmp, &clt->paths_list, s.entry) { + rtrs_clt_destroy_sess_files(sess, NULL); + rtrs_clt_close_conns(sess, true); + kobject_put(&sess->kobj); + } + rtrs_clt_destroy_sysfs_root_files(clt); + rtrs_clt_destroy_sysfs_root_folders(clt); + free_clt(clt); + +out: + return ERR_PTR(err); +} +EXPORT_SYMBOL(rtrs_clt_open); + +/** + * rtrs_clt_close() - Close a session + * @clt: Session handle. Session is freed upon return. + */ +void rtrs_clt_close(struct rtrs_clt *clt) +{ + struct rtrs_clt_sess *sess, *tmp; + + /* Firstly forbid sysfs access */ + rtrs_clt_destroy_sysfs_root_files(clt); + rtrs_clt_destroy_sysfs_root_folders(clt); + + /* Now it is safe to iterate over all paths without locks */ + list_for_each_entry_safe(sess, tmp, &clt->paths_list, s.entry) { + rtrs_clt_destroy_sess_files(sess, NULL); + rtrs_clt_close_conns(sess, true); + kobject_put(&sess->kobj); + } + free_clt(clt); +} +EXPORT_SYMBOL(rtrs_clt_close); + +int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_sess *sess) +{ + enum rtrs_clt_state old_state; + int err = -EBUSY; + bool changed; + + changed = rtrs_clt_change_state_get_old(sess, RTRS_CLT_RECONNECTING, + &old_state); + if (changed) { + sess->reconnect_attempts = 0; + queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, 0); + } + if (changed || old_state == RTRS_CLT_RECONNECTING) { + /* + * flush_delayed_work() queues pending work for immediate + * execution, so do the flush if we have queued something + * right now or work is pending. + */ + flush_delayed_work(&sess->reconnect_dwork); + err = (READ_ONCE(sess->state) == + RTRS_CLT_CONNECTED ? 0 : -ENOTCONN); + } + + return err; +} + +int rtrs_clt_disconnect_from_sysfs(struct rtrs_clt_sess *sess) +{ + rtrs_clt_close_conns(sess, true); + + return 0; +} + +int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_sess *sess, + const struct attribute *sysfs_self) +{ + enum rtrs_clt_state old_state; + bool changed; + + /* + * Continue stopping path till state was changed to DEAD or + * state was observed as DEAD: + * 1. State was changed to DEAD - we were fast and nobody + * invoked rtrs_clt_reconnect(), which can again start + * reconnecting. + * 2. State was observed as DEAD - we have someone in parallel + * removing the path. + */ + do { + rtrs_clt_close_conns(sess, true); + changed = rtrs_clt_change_state_get_old(sess, + RTRS_CLT_DEAD, + &old_state); + } while (!changed && old_state != RTRS_CLT_DEAD); + + if (likely(changed)) { + rtrs_clt_destroy_sess_files(sess, sysfs_self); + rtrs_clt_remove_path_from_arr(sess); + kobject_put(&sess->kobj); + } + + return 0; +} + +void rtrs_clt_set_max_reconnect_attempts(struct rtrs_clt *clt, int value) +{ + clt->max_reconnect_attempts = (unsigned int)value; +} + +int rtrs_clt_get_max_reconnect_attempts(const struct rtrs_clt *clt) +{ + return (int)clt->max_reconnect_attempts; +} + +/** + * rtrs_clt_request() - Request data transfer to/from server via RDMA. + * + * @dir: READ/WRITE + * @ops: callback function to be called as confirmation, and the pointer. + * @clt: Session + * @permit: Preallocated permit + * @vec: Message that is sent to server together with the request. + * Sum of len of all @vec elements limited to <= IO_MSG_SIZE. + * Since the msg is copied internally it can be allocated on stack. + * @nr: Number of elements in @vec. + * @data_len: length of data sent to/from server + * @sg: Pages to be sent/received to/from server. + * @sg_cnt: Number of elements in the @sg + * + * Return: + * 0: Success + * <0: Error + * + * On dir=READ rtrs client will request a data transfer from Server to client. + * The data that the server will respond with will be stored in @sg when + * the user receives an %RTRS_CLT_RDMA_EV_RDMA_REQUEST_WRITE_COMPL event. + * On dir=WRITE rtrs client will rdma write data in sg to server side. + */ +int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops, + struct rtrs_clt *clt, struct rtrs_permit *permit, + const struct kvec *vec, size_t nr, size_t data_len, + struct scatterlist *sg, unsigned int sg_cnt) +{ + struct rtrs_clt_io_req *req; + struct rtrs_clt_sess *sess; + + enum dma_data_direction dma_dir; + int err = -ECONNABORTED, i; + size_t usr_len, hdr_len; + struct path_it it; + + /* Get kvec length */ + for (i = 0, usr_len = 0; i < nr; i++) + usr_len += vec[i].iov_len; + + if (dir == READ) { + hdr_len = sizeof(struct rtrs_msg_rdma_read) + + sg_cnt * sizeof(struct rtrs_sg_desc); + dma_dir = DMA_FROM_DEVICE; + } else { + hdr_len = sizeof(struct rtrs_msg_rdma_write); + dma_dir = DMA_TO_DEVICE; + } + + do_each_path(sess, clt, &it) { + if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)) + continue; + + if (unlikely(usr_len + hdr_len > sess->max_hdr_size)) { + rtrs_wrn_rl(sess->clt, + "%s request failed, user message size is %zu and header length %zu, but max size is %u\n", + dir == READ ? "Read" : "Write", + usr_len, hdr_len, sess->max_hdr_size); + err = -EMSGSIZE; + break; + } + req = rtrs_clt_get_req(sess, ops->conf_fn, permit, ops->priv, + vec, usr_len, sg, sg_cnt, data_len, + dma_dir); + if (dir == READ) + err = rtrs_clt_read_req(req); + else + err = rtrs_clt_write_req(req); + if (unlikely(err)) { + req->in_use = false; + continue; + } + /* Success path */ + break; + } while_each_path(&it); + + return err; +} +EXPORT_SYMBOL(rtrs_clt_request); + +/** + * rtrs_clt_query() - queries RTRS session attributes + *@clt: session pointer + *@attr: query results for session attributes. + * Returns: + * 0 on success + * -ECOMM no connection to the server + */ +int rtrs_clt_query(struct rtrs_clt *clt, struct rtrs_attrs *attr) +{ + if (!rtrs_clt_is_connected(clt)) + return -ECOMM; + + attr->queue_depth = clt->queue_depth; + attr->max_io_size = clt->max_io_size; + attr->sess_kobj = &clt->dev.kobj; + strlcpy(attr->sessname, clt->sessname, sizeof(attr->sessname)); + + return 0; +} +EXPORT_SYMBOL(rtrs_clt_query); + +int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt, + struct rtrs_addr *addr) +{ + struct rtrs_clt_sess *sess; + int err; + + sess = alloc_sess(clt, addr, nr_cpu_ids, clt->max_segments); + if (IS_ERR(sess)) + return PTR_ERR(sess); + + /* + * It is totally safe to add path in CONNECTING state: coming + * IO will never grab it. Also it is very important to add + * path before init, since init fires LINK_CONNECTED event. + */ + rtrs_clt_add_path_to_arr(sess, addr); + + err = init_sess(sess); + if (err) + goto close_sess; + + err = rtrs_clt_create_sess_files(sess); + if (err) + goto close_sess; + + return 0; + +close_sess: + rtrs_clt_remove_path_from_arr(sess); + rtrs_clt_close_conns(sess, true); + free_sess(sess); + + return err; +} + +static int rtrs_clt_ib_dev_init(struct rtrs_ib_dev *dev) +{ + if (!(dev->ib_dev->attrs.device_cap_flags & + IB_DEVICE_MEM_MGT_EXTENSIONS)) { + pr_err("Memory registrations not supported.\n"); + return -ENOTSUPP; + } + + return 0; +} + +static const struct rtrs_rdma_dev_pd_ops dev_pd_ops = { + .init = rtrs_clt_ib_dev_init +}; + +static int __init rtrs_client_init(void) +{ + rtrs_rdma_dev_pd_init(0, &dev_pd); + + rtrs_clt_dev_class = class_create(THIS_MODULE, "rtrs-client"); + if (IS_ERR(rtrs_clt_dev_class)) { + pr_err("Failed to create rtrs-client dev class\n"); + return PTR_ERR(rtrs_clt_dev_class); + } + rtrs_wq = alloc_workqueue("rtrs_client_wq", WQ_MEM_RECLAIM, 0); + if (!rtrs_wq) { + class_destroy(rtrs_clt_dev_class); + return -ENOMEM; + } + + return 0; +} + +static void __exit rtrs_client_exit(void) +{ + destroy_workqueue(rtrs_wq); + class_destroy(rtrs_clt_dev_class); + rtrs_rdma_dev_pd_deinit(&dev_pd); +} + +module_init(rtrs_client_init); +module_exit(rtrs_client_exit); From 89dd4c3bdc46688b1af53298890161d22f7314cb Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:13 +0200 Subject: [PATCH 335/562] RDMA/rtrs: client: statistics functions This introduces set of functions used on client side to account statistics of RDMA data sent/received, amount of IOs inflight, latency, cpu migrations, etc. Almost all statistics are collected using percpu variables. Link: https://lore.kernel.org/r/20200511135131.27580-8-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c | 200 +++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c new file mode 100644 index 000000000000..26bbe5d6dff5 --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RDMA Transport Layer + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt + +#include "rtrs-clt.h" + +void rtrs_clt_update_wc_stats(struct rtrs_clt_con *con) +{ + struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_stats *stats = sess->stats; + struct rtrs_clt_stats_pcpu *s; + int cpu; + + cpu = raw_smp_processor_id(); + s = this_cpu_ptr(stats->pcpu_stats); + if (unlikely(con->cpu != cpu)) { + s->cpu_migr.to++; + + /* Careful here, override s pointer */ + s = per_cpu_ptr(stats->pcpu_stats, con->cpu); + atomic_inc(&s->cpu_migr.from); + } +} + +void rtrs_clt_inc_failover_cnt(struct rtrs_clt_stats *stats) +{ + struct rtrs_clt_stats_pcpu *s; + + s = this_cpu_ptr(stats->pcpu_stats); + s->rdma.failover_cnt++; +} + +int rtrs_clt_stats_migration_cnt_to_str(struct rtrs_clt_stats *stats, + char *buf, size_t len) +{ + struct rtrs_clt_stats_pcpu *s; + + size_t used; + int cpu; + + used = scnprintf(buf, len, " "); + for_each_possible_cpu(cpu) + used += scnprintf(buf + used, len - used, " CPU%u", cpu); + + used += scnprintf(buf + used, len - used, "\nfrom:"); + for_each_possible_cpu(cpu) { + s = per_cpu_ptr(stats->pcpu_stats, cpu); + used += scnprintf(buf + used, len - used, " %d", + atomic_read(&s->cpu_migr.from)); + } + + used += scnprintf(buf + used, len - used, "\nto :"); + for_each_possible_cpu(cpu) { + s = per_cpu_ptr(stats->pcpu_stats, cpu); + used += scnprintf(buf + used, len - used, " %d", + s->cpu_migr.to); + } + used += scnprintf(buf + used, len - used, "\n"); + + return used; +} + +int rtrs_clt_stats_reconnects_to_str(struct rtrs_clt_stats *stats, char *buf, + size_t len) +{ + return scnprintf(buf, len, "%d %d\n", + stats->reconnects.successful_cnt, + stats->reconnects.fail_cnt); +} + +ssize_t rtrs_clt_stats_rdma_to_str(struct rtrs_clt_stats *stats, + char *page, size_t len) +{ + struct rtrs_clt_stats_rdma sum; + struct rtrs_clt_stats_rdma *r; + int cpu; + + memset(&sum, 0, sizeof(sum)); + + for_each_possible_cpu(cpu) { + r = &per_cpu_ptr(stats->pcpu_stats, cpu)->rdma; + + sum.dir[READ].cnt += r->dir[READ].cnt; + sum.dir[READ].size_total += r->dir[READ].size_total; + sum.dir[WRITE].cnt += r->dir[WRITE].cnt; + sum.dir[WRITE].size_total += r->dir[WRITE].size_total; + sum.failover_cnt += r->failover_cnt; + } + + return scnprintf(page, len, "%llu %llu %llu %llu %u %llu\n", + sum.dir[READ].cnt, sum.dir[READ].size_total, + sum.dir[WRITE].cnt, sum.dir[WRITE].size_total, + atomic_read(&stats->inflight), sum.failover_cnt); +} + +ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *s, + char *page, size_t len) +{ + return scnprintf(page, len, "echo 1 to reset all statistics\n"); +} + +int rtrs_clt_reset_rdma_stats(struct rtrs_clt_stats *stats, bool enable) +{ + struct rtrs_clt_stats_pcpu *s; + int cpu; + + if (!enable) + return -EINVAL; + + for_each_possible_cpu(cpu) { + s = per_cpu_ptr(stats->pcpu_stats, cpu); + memset(&s->rdma, 0, sizeof(s->rdma)); + } + + return 0; +} + +int rtrs_clt_reset_cpu_migr_stats(struct rtrs_clt_stats *stats, bool enable) +{ + struct rtrs_clt_stats_pcpu *s; + int cpu; + + if (!enable) + return -EINVAL; + + for_each_possible_cpu(cpu) { + s = per_cpu_ptr(stats->pcpu_stats, cpu); + memset(&s->cpu_migr, 0, sizeof(s->cpu_migr)); + } + + return 0; +} + +int rtrs_clt_reset_reconnects_stat(struct rtrs_clt_stats *stats, bool enable) +{ + if (!enable) + return -EINVAL; + + memset(&stats->reconnects, 0, sizeof(stats->reconnects)); + + return 0; +} + +int rtrs_clt_reset_all_stats(struct rtrs_clt_stats *s, bool enable) +{ + if (enable) { + rtrs_clt_reset_rdma_stats(s, enable); + rtrs_clt_reset_cpu_migr_stats(s, enable); + rtrs_clt_reset_reconnects_stat(s, enable); + atomic_set(&s->inflight, 0); + return 0; + } + + return -EINVAL; +} + +static inline void rtrs_clt_update_rdma_stats(struct rtrs_clt_stats *stats, + size_t size, int d) +{ + struct rtrs_clt_stats_pcpu *s; + + s = this_cpu_ptr(stats->pcpu_stats); + s->rdma.dir[d].cnt++; + s->rdma.dir[d].size_total += size; +} + +void rtrs_clt_update_all_stats(struct rtrs_clt_io_req *req, int dir) +{ + struct rtrs_clt_con *con = req->con; + struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_stats *stats = sess->stats; + unsigned int len; + + len = req->usr_len + req->data_len; + rtrs_clt_update_rdma_stats(stats, len, dir); + if (sess->clt->mp_policy == MP_POLICY_MIN_INFLIGHT) + atomic_inc(&stats->inflight); +} + +int rtrs_clt_init_stats(struct rtrs_clt_stats *stats) +{ + stats->pcpu_stats = alloc_percpu(typeof(*stats->pcpu_stats)); + if (!stats->pcpu_stats) + return -ENOMEM; + + /* + * successful_cnt will be set to 0 after session + * is established for the first time + */ + stats->reconnects.successful_cnt = -1; + + return 0; +} From 215378b838df0019097a5266ebec1269ebd27f89 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:14 +0200 Subject: [PATCH 336/562] RDMA/rtrs: client: sysfs interface functions This is the sysfs interface to rtrs sessions on client side: /sys/class/rtrs-client// *** rtrs session created by rtrs_clt_open() API call | |- max_reconnect_attempts | *** number of reconnect attempts for session | |- add_path | *** adds another connection path into rtrs session | |- paths// *** established paths to server in a session | |- disconnect | *** disconnect path | |- reconnect | *** reconnect path | |- remove_path | *** remove current path | |- state | *** retrieve current path state | |- hca_port | *** HCA port number | |- hca_name | *** HCA name | |- stats/ *** current path statistics | |- cpu_migration |- rdma |- reconnects |- reset_all Link: https://lore.kernel.org/r/20200511135131.27580-9-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c | 483 +++++++++++++++++++ 1 file changed, 483 insertions(+) create mode 100644 drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c new file mode 100644 index 000000000000..298b747d0330 --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -0,0 +1,483 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RDMA Transport Layer + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt + +#include "rtrs-pri.h" +#include "rtrs-clt.h" +#include "rtrs-log.h" + +#define MIN_MAX_RECONN_ATT -1 +#define MAX_MAX_RECONN_ATT 9999 + +static void rtrs_clt_sess_release(struct kobject *kobj) +{ + struct rtrs_clt_sess *sess; + + sess = container_of(kobj, struct rtrs_clt_sess, kobj); + + free_sess(sess); +} + +static struct kobj_type ktype_sess = { + .sysfs_ops = &kobj_sysfs_ops, + .release = rtrs_clt_sess_release +}; + +static void rtrs_clt_sess_stats_release(struct kobject *kobj) +{ + struct rtrs_clt_stats *stats; + + stats = container_of(kobj, struct rtrs_clt_stats, kobj_stats); + + free_percpu(stats->pcpu_stats); + + kfree(stats); +} + +static struct kobj_type ktype_stats = { + .sysfs_ops = &kobj_sysfs_ops, + .release = rtrs_clt_sess_stats_release, +}; + +static ssize_t max_reconnect_attempts_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct rtrs_clt *clt = container_of(dev, struct rtrs_clt, dev); + + return sprintf(page, "%d\n", rtrs_clt_get_max_reconnect_attempts(clt)); +} + +static ssize_t max_reconnect_attempts_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + int value; + int ret; + struct rtrs_clt *clt = container_of(dev, struct rtrs_clt, dev); + + ret = kstrtoint(buf, 10, &value); + if (ret) { + rtrs_err(clt, "%s: failed to convert string '%s' to int\n", + attr->attr.name, buf); + return ret; + } + if (value > MAX_MAX_RECONN_ATT || + value < MIN_MAX_RECONN_ATT) { + rtrs_err(clt, + "%s: invalid range (provided: '%s', accepted: min: %d, max: %d)\n", + attr->attr.name, buf, MIN_MAX_RECONN_ATT, + MAX_MAX_RECONN_ATT); + return -EINVAL; + } + rtrs_clt_set_max_reconnect_attempts(clt, value); + + return count; +} + +static DEVICE_ATTR_RW(max_reconnect_attempts); + +static ssize_t mpath_policy_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct rtrs_clt *clt; + + clt = container_of(dev, struct rtrs_clt, dev); + + switch (clt->mp_policy) { + case MP_POLICY_RR: + return sprintf(page, "round-robin (RR: %d)\n", clt->mp_policy); + case MP_POLICY_MIN_INFLIGHT: + return sprintf(page, "min-inflight (MI: %d)\n", clt->mp_policy); + default: + return sprintf(page, "Unknown (%d)\n", clt->mp_policy); + } +} + +static ssize_t mpath_policy_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct rtrs_clt *clt; + int value; + int ret; + + clt = container_of(dev, struct rtrs_clt, dev); + + ret = kstrtoint(buf, 10, &value); + if (!ret && (value == MP_POLICY_RR || + value == MP_POLICY_MIN_INFLIGHT)) { + clt->mp_policy = value; + return count; + } + + if (!strncasecmp(buf, "round-robin", 11) || + !strncasecmp(buf, "rr", 2)) + clt->mp_policy = MP_POLICY_RR; + else if (!strncasecmp(buf, "min-inflight", 12) || + !strncasecmp(buf, "mi", 2)) + clt->mp_policy = MP_POLICY_MIN_INFLIGHT; + else + return -EINVAL; + + return count; +} + +static DEVICE_ATTR_RW(mpath_policy); + +static ssize_t add_path_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + return scnprintf(page, PAGE_SIZE, + "Usage: echo [@] > %s\n\n*addr ::= [ ip: | gid: ]\n", + attr->attr.name); +} + +static ssize_t add_path_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sockaddr_storage srcaddr, dstaddr; + struct rtrs_addr addr = { + .src = &srcaddr, + .dst = &dstaddr + }; + struct rtrs_clt *clt; + const char *nl; + size_t len; + int err; + + clt = container_of(dev, struct rtrs_clt, dev); + + nl = strchr(buf, '\n'); + if (nl) + len = nl - buf; + else + len = count; + err = rtrs_addr_to_sockaddr(buf, len, clt->port, &addr); + if (err) + return -EINVAL; + + err = rtrs_clt_create_path_from_sysfs(clt, &addr); + if (err) + return err; + + return count; +} + +static DEVICE_ATTR_RW(add_path); + +static ssize_t rtrs_clt_state_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + struct rtrs_clt_sess *sess; + + sess = container_of(kobj, struct rtrs_clt_sess, kobj); + if (sess->state == RTRS_CLT_CONNECTED) + return sprintf(page, "connected\n"); + + return sprintf(page, "disconnected\n"); +} + +static struct kobj_attribute rtrs_clt_state_attr = + __ATTR(state, 0444, rtrs_clt_state_show, NULL); + +static ssize_t rtrs_clt_reconnect_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *page) +{ + return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", + attr->attr.name); +} + +static ssize_t rtrs_clt_reconnect_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rtrs_clt_sess *sess; + int ret; + + sess = container_of(kobj, struct rtrs_clt_sess, kobj); + if (!sysfs_streq(buf, "1")) { + rtrs_err(sess->clt, "%s: unknown value: '%s'\n", + attr->attr.name, buf); + return -EINVAL; + } + ret = rtrs_clt_reconnect_from_sysfs(sess); + if (ret) + return ret; + + return count; +} + +static struct kobj_attribute rtrs_clt_reconnect_attr = + __ATTR(reconnect, 0644, rtrs_clt_reconnect_show, + rtrs_clt_reconnect_store); + +static ssize_t rtrs_clt_disconnect_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *page) +{ + return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", + attr->attr.name); +} + +static ssize_t rtrs_clt_disconnect_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rtrs_clt_sess *sess; + int ret; + + sess = container_of(kobj, struct rtrs_clt_sess, kobj); + if (!sysfs_streq(buf, "1")) { + rtrs_err(sess->clt, "%s: unknown value: '%s'\n", + attr->attr.name, buf); + return -EINVAL; + } + ret = rtrs_clt_disconnect_from_sysfs(sess); + if (ret) + return ret; + + return count; +} + +static struct kobj_attribute rtrs_clt_disconnect_attr = + __ATTR(disconnect, 0644, rtrs_clt_disconnect_show, + rtrs_clt_disconnect_store); + +static ssize_t rtrs_clt_remove_path_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *page) +{ + return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", + attr->attr.name); +} + +static ssize_t rtrs_clt_remove_path_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rtrs_clt_sess *sess; + int ret; + + sess = container_of(kobj, struct rtrs_clt_sess, kobj); + if (!sysfs_streq(buf, "1")) { + rtrs_err(sess->clt, "%s: unknown value: '%s'\n", + attr->attr.name, buf); + return -EINVAL; + } + ret = rtrs_clt_remove_path_from_sysfs(sess, &attr->attr); + if (ret) + return ret; + + return count; +} + +static struct kobj_attribute rtrs_clt_remove_path_attr = + __ATTR(remove_path, 0644, rtrs_clt_remove_path_show, + rtrs_clt_remove_path_store); + +STAT_ATTR(struct rtrs_clt_stats, cpu_migration, + rtrs_clt_stats_migration_cnt_to_str, + rtrs_clt_reset_cpu_migr_stats); + +STAT_ATTR(struct rtrs_clt_stats, reconnects, + rtrs_clt_stats_reconnects_to_str, + rtrs_clt_reset_reconnects_stat); + +STAT_ATTR(struct rtrs_clt_stats, rdma, + rtrs_clt_stats_rdma_to_str, + rtrs_clt_reset_rdma_stats); + +STAT_ATTR(struct rtrs_clt_stats, reset_all, + rtrs_clt_reset_all_help, + rtrs_clt_reset_all_stats); + +static struct attribute *rtrs_clt_stats_attrs[] = { + &cpu_migration_attr.attr, + &reconnects_attr.attr, + &rdma_attr.attr, + &reset_all_attr.attr, + NULL +}; + +static struct attribute_group rtrs_clt_stats_attr_group = { + .attrs = rtrs_clt_stats_attrs, +}; + +static ssize_t rtrs_clt_hca_port_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *page) +{ + struct rtrs_clt_sess *sess; + + sess = container_of(kobj, typeof(*sess), kobj); + + return scnprintf(page, PAGE_SIZE, "%u\n", sess->hca_port); +} + +static struct kobj_attribute rtrs_clt_hca_port_attr = + __ATTR(hca_port, 0444, rtrs_clt_hca_port_show, NULL); + +static ssize_t rtrs_clt_hca_name_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *page) +{ + struct rtrs_clt_sess *sess; + + sess = container_of(kobj, struct rtrs_clt_sess, kobj); + + return scnprintf(page, PAGE_SIZE, "%s\n", sess->hca_name); +} + +static struct kobj_attribute rtrs_clt_hca_name_attr = + __ATTR(hca_name, 0444, rtrs_clt_hca_name_show, NULL); + +static ssize_t rtrs_clt_src_addr_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *page) +{ + struct rtrs_clt_sess *sess; + int cnt; + + sess = container_of(kobj, struct rtrs_clt_sess, kobj); + cnt = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, + page, PAGE_SIZE); + return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); +} + +static struct kobj_attribute rtrs_clt_src_addr_attr = + __ATTR(src_addr, 0444, rtrs_clt_src_addr_show, NULL); + +static ssize_t rtrs_clt_dst_addr_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *page) +{ + struct rtrs_clt_sess *sess; + int cnt; + + sess = container_of(kobj, struct rtrs_clt_sess, kobj); + cnt = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, + page, PAGE_SIZE); + return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); +} + +static struct kobj_attribute rtrs_clt_dst_addr_attr = + __ATTR(dst_addr, 0444, rtrs_clt_dst_addr_show, NULL); + +static struct attribute *rtrs_clt_sess_attrs[] = { + &rtrs_clt_hca_name_attr.attr, + &rtrs_clt_hca_port_attr.attr, + &rtrs_clt_src_addr_attr.attr, + &rtrs_clt_dst_addr_attr.attr, + &rtrs_clt_state_attr.attr, + &rtrs_clt_reconnect_attr.attr, + &rtrs_clt_disconnect_attr.attr, + &rtrs_clt_remove_path_attr.attr, + NULL, +}; + +static struct attribute_group rtrs_clt_sess_attr_group = { + .attrs = rtrs_clt_sess_attrs, +}; + +int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess) +{ + struct rtrs_clt *clt = sess->clt; + char str[NAME_MAX]; + int err, cnt; + + cnt = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, + str, sizeof(str)); + cnt += scnprintf(str + cnt, sizeof(str) - cnt, "@"); + sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, + str + cnt, sizeof(str) - cnt); + + err = kobject_init_and_add(&sess->kobj, &ktype_sess, clt->kobj_paths, + "%s", str); + if (err) { + pr_err("kobject_init_and_add: %d\n", err); + return err; + } + err = sysfs_create_group(&sess->kobj, &rtrs_clt_sess_attr_group); + if (err) { + pr_err("sysfs_create_group(): %d\n", err); + goto put_kobj; + } + err = kobject_init_and_add(&sess->stats->kobj_stats, &ktype_stats, + &sess->kobj, "stats"); + if (err) { + pr_err("kobject_init_and_add: %d\n", err); + goto remove_group; + } + + err = sysfs_create_group(&sess->stats->kobj_stats, + &rtrs_clt_stats_attr_group); + if (err) { + pr_err("failed to create stats sysfs group, err: %d\n", err); + goto put_kobj_stats; + } + + return 0; + +put_kobj_stats: + kobject_del(&sess->stats->kobj_stats); + kobject_put(&sess->stats->kobj_stats); +remove_group: + sysfs_remove_group(&sess->kobj, &rtrs_clt_sess_attr_group); +put_kobj: + kobject_del(&sess->kobj); + kobject_put(&sess->kobj); + + return err; +} + +void rtrs_clt_destroy_sess_files(struct rtrs_clt_sess *sess, + const struct attribute *sysfs_self) +{ + kobject_del(&sess->stats->kobj_stats); + kobject_put(&sess->stats->kobj_stats); + if (sysfs_self) + sysfs_remove_file_self(&sess->kobj, sysfs_self); + kobject_del(&sess->kobj); +} + +static struct attribute *rtrs_clt_attrs[] = { + &dev_attr_max_reconnect_attempts.attr, + &dev_attr_mpath_policy.attr, + &dev_attr_add_path.attr, + NULL, +}; + +static struct attribute_group rtrs_clt_attr_group = { + .attrs = rtrs_clt_attrs, +}; + +int rtrs_clt_create_sysfs_root_files(struct rtrs_clt *clt) +{ + return sysfs_create_group(&clt->dev.kobj, &rtrs_clt_attr_group); +} + +void rtrs_clt_destroy_sysfs_root_folders(struct rtrs_clt *clt) +{ + if (clt->kobj_paths) { + kobject_del(clt->kobj_paths); + kobject_put(clt->kobj_paths); + } +} + +void rtrs_clt_destroy_sysfs_root_files(struct rtrs_clt *clt) +{ + sysfs_remove_group(&clt->dev.kobj, &rtrs_clt_attr_group); +} From 787f78a6b075ac7678123d5d0cac2c57d98c63e1 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:15 +0200 Subject: [PATCH 337/562] RDMA/rtrs: server: private header with server structs and functions This header describes main structs and functions used by rtrs-server module, mainly for accepting rtrs sessions, creating/destroying sysfs entries, accounting statistics on server side. Link: https://lore.kernel.org/r/20200511135131.27580-10-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv.h | 148 +++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 drivers/infiniband/ulp/rtrs/rtrs-srv.h diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h new file mode 100644 index 000000000000..dc95b0932f0d --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * RDMA Transport Layer + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ + +#ifndef RTRS_SRV_H +#define RTRS_SRV_H + +#include +#include +#include "rtrs-pri.h" + +/* + * enum rtrs_srv_state - Server states. + */ +enum rtrs_srv_state { + RTRS_SRV_CONNECTING, + RTRS_SRV_CONNECTED, + RTRS_SRV_CLOSING, + RTRS_SRV_CLOSED, +}; + +/* stats for Read and write operation. + * see Documentation/ABI/testing/sysfs-class-rtrs-server for details + */ +struct rtrs_srv_stats_rdma_stats { + struct { + atomic64_t cnt; + atomic64_t size_total; + } dir[2]; +}; + +struct rtrs_srv_stats { + struct kobject kobj_stats; + struct rtrs_srv_stats_rdma_stats rdma_stats; + struct rtrs_srv_sess *sess; +}; + +struct rtrs_srv_con { + struct rtrs_con c; + atomic_t wr_cnt; + atomic_t sq_wr_avail; + struct list_head rsp_wr_wait_list; + spinlock_t rsp_wr_wait_lock; +}; + +/* IO context in rtrs_srv, each io has one */ +struct rtrs_srv_op { + struct rtrs_srv_con *con; + u32 msg_id; + u8 dir; + struct rtrs_msg_rdma_read *rd_msg; + struct ib_rdma_wr tx_wr; + struct ib_sge tx_sg; + struct list_head wait_list; + int status; +}; + +/* + * server side memory region context, when always_invalidate=Y, we need + * queue_depth of memory regrion to invalidate each memory region. + */ +struct rtrs_srv_mr { + struct ib_mr *mr; + struct sg_table sgt; + struct ib_cqe inv_cqe; /* only for always_invalidate=true */ + u32 msg_id; /* only for always_invalidate=true */ + u32 msg_off; /* only for always_invalidate=true */ + struct rtrs_iu *iu; /* send buffer for new rkey msg */ +}; + +struct rtrs_srv_sess { + struct rtrs_sess s; + struct rtrs_srv *srv; + struct work_struct close_work; + enum rtrs_srv_state state; + spinlock_t state_lock; + int cur_cq_vector; + struct rtrs_srv_op **ops_ids; + atomic_t ids_inflight; + wait_queue_head_t ids_waitq; + struct rtrs_srv_mr *mrs; + unsigned int mrs_num; + dma_addr_t *dma_addr; + bool established; + unsigned int mem_bits; + struct kobject kobj; + struct rtrs_srv_stats *stats; +}; + +struct rtrs_srv { + struct list_head paths_list; + int paths_up; + struct mutex paths_ev_mutex; + size_t paths_num; + struct mutex paths_mutex; + uuid_t paths_uuid; + refcount_t refcount; + struct rtrs_srv_ctx *ctx; + struct list_head ctx_list; + void *priv; + size_t queue_depth; + struct page **chunks; + struct device dev; + unsigned int dev_ref; + struct kobject *kobj_paths; +}; + +struct rtrs_srv_ctx { + struct rtrs_srv_ops ops; + struct rdma_cm_id *cm_id_ip; + struct rdma_cm_id *cm_id_ib; + struct mutex srv_mutex; + struct list_head srv_list; +}; + +extern struct class *rtrs_dev_class; + +void close_sess(struct rtrs_srv_sess *sess); + +static inline void rtrs_srv_update_rdma_stats(struct rtrs_srv_stats *s, + size_t size, int d) +{ + atomic64_inc(&s->rdma_stats.dir[d].cnt); + atomic64_add(size, &s->rdma_stats.dir[d].size_total); +} + +/* functions which are implemented in rtrs-srv-stats.c */ +int rtrs_srv_reset_rdma_stats(struct rtrs_srv_stats *stats, bool enable); +ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, + char *page, size_t len); +int rtrs_srv_reset_wc_completion_stats(struct rtrs_srv_stats *stats, + bool enable); +int rtrs_srv_stats_wc_completion_to_str(struct rtrs_srv_stats *stats, char *buf, + size_t len); +int rtrs_srv_reset_all_stats(struct rtrs_srv_stats *stats, bool enable); +ssize_t rtrs_srv_reset_all_help(struct rtrs_srv_stats *stats, + char *page, size_t len); + +/* functions which are implemented in rtrs-srv-sysfs.c */ +int rtrs_srv_create_sess_files(struct rtrs_srv_sess *sess); +void rtrs_srv_destroy_sess_files(struct rtrs_srv_sess *sess); + +#endif /* RTRS_SRV_H */ From 9cb837480424e78ed585376f944088246685aec3 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:16 +0200 Subject: [PATCH 338/562] RDMA/rtrs: server: main functionality This is main functionality of rtrs-server module, which accepts set of RDMA connections (so called rtrs session), creates/destroys sysfs entries associated with rtrs session and notifies upper layer (user of RTRS API) about RDMA requests or link events. Link: https://lore.kernel.org/r/20200511135131.27580-11-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 2175 ++++++++++++++++++++++++ 1 file changed, 2175 insertions(+) create mode 100644 drivers/infiniband/ulp/rtrs/rtrs-srv.c diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c new file mode 100644 index 000000000000..ba8ab33b94a2 --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -0,0 +1,2175 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RDMA Transport Layer + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ + +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt + +#include +#include + +#include "rtrs-srv.h" +#include "rtrs-log.h" + +MODULE_DESCRIPTION("RDMA Transport Server"); +MODULE_LICENSE("GPL"); + +/* Must be power of 2, see mask from mr->page_size in ib_sg_to_pages() */ +#define DEFAULT_MAX_CHUNK_SIZE (128 << 10) +#define DEFAULT_SESS_QUEUE_DEPTH 512 +#define MAX_HDR_SIZE PAGE_SIZE + +/* We guarantee to serve 10 paths at least */ +#define CHUNK_POOL_SZ 10 + +static struct rtrs_rdma_dev_pd dev_pd; +static mempool_t *chunk_pool; +struct class *rtrs_dev_class; + +static int __read_mostly max_chunk_size = DEFAULT_MAX_CHUNK_SIZE; +static int __read_mostly sess_queue_depth = DEFAULT_SESS_QUEUE_DEPTH; + +static bool always_invalidate = true; +module_param(always_invalidate, bool, 0444); +MODULE_PARM_DESC(always_invalidate, + "Invalidate memory registration for contiguous memory regions before accessing."); + +module_param_named(max_chunk_size, max_chunk_size, int, 0444); +MODULE_PARM_DESC(max_chunk_size, + "Max size for each IO request, when change the unit is in byte (default: " + __stringify(DEFAULT_MAX_CHUNK_SIZE) "KB)"); + +module_param_named(sess_queue_depth, sess_queue_depth, int, 0444); +MODULE_PARM_DESC(sess_queue_depth, + "Number of buffers for pending I/O requests to allocate per session. Maximum: " + __stringify(MAX_SESS_QUEUE_DEPTH) " (default: " + __stringify(DEFAULT_SESS_QUEUE_DEPTH) ")"); + +static cpumask_t cq_affinity_mask = { CPU_BITS_ALL }; + +static struct workqueue_struct *rtrs_wq; + +static inline struct rtrs_srv_con *to_srv_con(struct rtrs_con *c) +{ + return container_of(c, struct rtrs_srv_con, c); +} + +static inline struct rtrs_srv_sess *to_srv_sess(struct rtrs_sess *s) +{ + return container_of(s, struct rtrs_srv_sess, s); +} + +static bool __rtrs_srv_change_state(struct rtrs_srv_sess *sess, + enum rtrs_srv_state new_state) +{ + enum rtrs_srv_state old_state; + bool changed = false; + + lockdep_assert_held(&sess->state_lock); + old_state = sess->state; + switch (new_state) { + case RTRS_SRV_CONNECTED: + switch (old_state) { + case RTRS_SRV_CONNECTING: + changed = true; + fallthrough; + default: + break; + } + break; + case RTRS_SRV_CLOSING: + switch (old_state) { + case RTRS_SRV_CONNECTING: + case RTRS_SRV_CONNECTED: + changed = true; + fallthrough; + default: + break; + } + break; + case RTRS_SRV_CLOSED: + switch (old_state) { + case RTRS_SRV_CLOSING: + changed = true; + fallthrough; + default: + break; + } + break; + default: + break; + } + if (changed) + sess->state = new_state; + + return changed; +} + +static bool rtrs_srv_change_state_get_old(struct rtrs_srv_sess *sess, + enum rtrs_srv_state new_state, + enum rtrs_srv_state *old_state) +{ + bool changed; + + spin_lock_irq(&sess->state_lock); + *old_state = sess->state; + changed = __rtrs_srv_change_state(sess, new_state); + spin_unlock_irq(&sess->state_lock); + + return changed; +} + +static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess, + enum rtrs_srv_state new_state) +{ + enum rtrs_srv_state old_state; + + return rtrs_srv_change_state_get_old(sess, new_state, &old_state); +} + +static void free_id(struct rtrs_srv_op *id) +{ + if (!id) + return; + kfree(id); +} + +static void rtrs_srv_free_ops_ids(struct rtrs_srv_sess *sess) +{ + struct rtrs_srv *srv = sess->srv; + int i; + + WARN_ON(atomic_read(&sess->ids_inflight)); + if (sess->ops_ids) { + for (i = 0; i < srv->queue_depth; i++) + free_id(sess->ops_ids[i]); + kfree(sess->ops_ids); + sess->ops_ids = NULL; + } +} + +static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc); + +static struct ib_cqe io_comp_cqe = { + .done = rtrs_srv_rdma_done +}; + +static int rtrs_srv_alloc_ops_ids(struct rtrs_srv_sess *sess) +{ + struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_op *id; + int i; + + sess->ops_ids = kcalloc(srv->queue_depth, sizeof(*sess->ops_ids), + GFP_KERNEL); + if (!sess->ops_ids) + goto err; + + for (i = 0; i < srv->queue_depth; ++i) { + id = kzalloc(sizeof(*id), GFP_KERNEL); + if (!id) + goto err; + + sess->ops_ids[i] = id; + } + init_waitqueue_head(&sess->ids_waitq); + atomic_set(&sess->ids_inflight, 0); + + return 0; + +err: + rtrs_srv_free_ops_ids(sess); + return -ENOMEM; +} + +static inline void rtrs_srv_get_ops_ids(struct rtrs_srv_sess *sess) +{ + atomic_inc(&sess->ids_inflight); +} + +static inline void rtrs_srv_put_ops_ids(struct rtrs_srv_sess *sess) +{ + if (atomic_dec_and_test(&sess->ids_inflight)) + wake_up(&sess->ids_waitq); +} + +static void rtrs_srv_wait_ops_ids(struct rtrs_srv_sess *sess) +{ + wait_event(sess->ids_waitq, !atomic_read(&sess->ids_inflight)); +} + + +static void rtrs_srv_reg_mr_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct rtrs_srv_con *con = cq->cq_context; + struct rtrs_sess *s = con->c.sess; + struct rtrs_srv_sess *sess = to_srv_sess(s); + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + rtrs_err(s, "REG MR failed: %s\n", + ib_wc_status_msg(wc->status)); + close_sess(sess); + return; + } +} + +static struct ib_cqe local_reg_cqe = { + .done = rtrs_srv_reg_mr_done +}; + +static int rdma_write_sg(struct rtrs_srv_op *id) +{ + struct rtrs_sess *s = id->con->c.sess; + struct rtrs_srv_sess *sess = to_srv_sess(s); + dma_addr_t dma_addr = sess->dma_addr[id->msg_id]; + struct rtrs_srv_mr *srv_mr; + struct rtrs_srv *srv = sess->srv; + struct ib_send_wr inv_wr, imm_wr; + struct ib_rdma_wr *wr = NULL; + enum ib_send_flags flags; + size_t sg_cnt; + int err, offset; + bool need_inval; + u32 rkey = 0; + struct ib_reg_wr rwr; + struct ib_sge *plist; + struct ib_sge list; + + sg_cnt = le16_to_cpu(id->rd_msg->sg_cnt); + need_inval = le16_to_cpu(id->rd_msg->flags) & RTRS_MSG_NEED_INVAL_F; + if (unlikely(sg_cnt != 1)) + return -EINVAL; + + offset = 0; + + wr = &id->tx_wr; + plist = &id->tx_sg; + plist->addr = dma_addr + offset; + plist->length = le32_to_cpu(id->rd_msg->desc[0].len); + + /* WR will fail with length error + * if this is 0 + */ + if (unlikely(plist->length == 0)) { + rtrs_err(s, "Invalid RDMA-Write sg list length 0\n"); + return -EINVAL; + } + + plist->lkey = sess->s.dev->ib_pd->local_dma_lkey; + offset += plist->length; + + wr->wr.sg_list = plist; + wr->wr.num_sge = 1; + wr->remote_addr = le64_to_cpu(id->rd_msg->desc[0].addr); + wr->rkey = le32_to_cpu(id->rd_msg->desc[0].key); + if (rkey == 0) + rkey = wr->rkey; + else + /* Only one key is actually used */ + WARN_ON_ONCE(rkey != wr->rkey); + + wr->wr.opcode = IB_WR_RDMA_WRITE; + wr->wr.ex.imm_data = 0; + wr->wr.send_flags = 0; + + if (need_inval && always_invalidate) { + wr->wr.next = &rwr.wr; + rwr.wr.next = &inv_wr; + inv_wr.next = &imm_wr; + } else if (always_invalidate) { + wr->wr.next = &rwr.wr; + rwr.wr.next = &imm_wr; + } else if (need_inval) { + wr->wr.next = &inv_wr; + inv_wr.next = &imm_wr; + } else { + wr->wr.next = &imm_wr; + } + /* + * From time to time we have to post signaled sends, + * or send queue will fill up and only QP reset can help. + */ + flags = (atomic_inc_return(&id->con->wr_cnt) % srv->queue_depth) ? + 0 : IB_SEND_SIGNALED; + + if (need_inval) { + inv_wr.sg_list = NULL; + inv_wr.num_sge = 0; + inv_wr.opcode = IB_WR_SEND_WITH_INV; + inv_wr.send_flags = 0; + inv_wr.ex.invalidate_rkey = rkey; + } + + imm_wr.next = NULL; + if (always_invalidate) { + struct rtrs_msg_rkey_rsp *msg; + + srv_mr = &sess->mrs[id->msg_id]; + rwr.wr.opcode = IB_WR_REG_MR; + rwr.wr.num_sge = 0; + rwr.mr = srv_mr->mr; + rwr.wr.send_flags = 0; + rwr.key = srv_mr->mr->rkey; + rwr.access = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE); + msg = srv_mr->iu->buf; + msg->buf_id = cpu_to_le16(id->msg_id); + msg->type = cpu_to_le16(RTRS_MSG_RKEY_RSP); + msg->rkey = cpu_to_le32(srv_mr->mr->rkey); + + list.addr = srv_mr->iu->dma_addr; + list.length = sizeof(*msg); + list.lkey = sess->s.dev->ib_pd->local_dma_lkey; + imm_wr.sg_list = &list; + imm_wr.num_sge = 1; + imm_wr.opcode = IB_WR_SEND_WITH_IMM; + ib_dma_sync_single_for_device(sess->s.dev->ib_dev, + srv_mr->iu->dma_addr, + srv_mr->iu->size, DMA_TO_DEVICE); + } else { + imm_wr.sg_list = NULL; + imm_wr.num_sge = 0; + imm_wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; + } + imm_wr.send_flags = flags; + imm_wr.ex.imm_data = cpu_to_be32(rtrs_to_io_rsp_imm(id->msg_id, + 0, need_inval)); + + imm_wr.wr_cqe = &io_comp_cqe; + ib_dma_sync_single_for_device(sess->s.dev->ib_dev, dma_addr, + offset, DMA_BIDIRECTIONAL); + + err = ib_post_send(id->con->c.qp, &id->tx_wr.wr, NULL); + if (unlikely(err)) + rtrs_err(s, + "Posting RDMA-Write-Request to QP failed, err: %d\n", + err); + + return err; +} + +/** + * send_io_resp_imm() - respond to client with empty IMM on failed READ/WRITE + * requests or on successful WRITE request. + * @con: the connection to send back result + * @id: the id associated with the IO + * @errno: the error number of the IO. + * + * Return 0 on success, errno otherwise. + */ +static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, + int errno) +{ + struct rtrs_sess *s = con->c.sess; + struct rtrs_srv_sess *sess = to_srv_sess(s); + struct ib_send_wr inv_wr, imm_wr, *wr = NULL; + struct ib_reg_wr rwr; + struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_mr *srv_mr; + bool need_inval = false; + enum ib_send_flags flags; + u32 imm; + int err; + + if (id->dir == READ) { + struct rtrs_msg_rdma_read *rd_msg = id->rd_msg; + size_t sg_cnt; + + need_inval = le16_to_cpu(rd_msg->flags) & + RTRS_MSG_NEED_INVAL_F; + sg_cnt = le16_to_cpu(rd_msg->sg_cnt); + + if (need_inval) { + if (likely(sg_cnt)) { + inv_wr.sg_list = NULL; + inv_wr.num_sge = 0; + inv_wr.opcode = IB_WR_SEND_WITH_INV; + inv_wr.send_flags = 0; + /* Only one key is actually used */ + inv_wr.ex.invalidate_rkey = + le32_to_cpu(rd_msg->desc[0].key); + } else { + WARN_ON_ONCE(1); + need_inval = false; + } + } + } + + if (need_inval && always_invalidate) { + wr = &inv_wr; + inv_wr.next = &rwr.wr; + rwr.wr.next = &imm_wr; + } else if (always_invalidate) { + wr = &rwr.wr; + rwr.wr.next = &imm_wr; + } else if (need_inval) { + wr = &inv_wr; + inv_wr.next = &imm_wr; + } else { + wr = &imm_wr; + } + /* + * From time to time we have to post signalled sends, + * or send queue will fill up and only QP reset can help. + */ + flags = (atomic_inc_return(&con->wr_cnt) % srv->queue_depth) ? + 0 : IB_SEND_SIGNALED; + imm = rtrs_to_io_rsp_imm(id->msg_id, errno, need_inval); + imm_wr.next = NULL; + if (always_invalidate) { + struct ib_sge list; + struct rtrs_msg_rkey_rsp *msg; + + srv_mr = &sess->mrs[id->msg_id]; + rwr.wr.next = &imm_wr; + rwr.wr.opcode = IB_WR_REG_MR; + rwr.wr.num_sge = 0; + rwr.wr.send_flags = 0; + rwr.mr = srv_mr->mr; + rwr.key = srv_mr->mr->rkey; + rwr.access = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE); + msg = srv_mr->iu->buf; + msg->buf_id = cpu_to_le16(id->msg_id); + msg->type = cpu_to_le16(RTRS_MSG_RKEY_RSP); + msg->rkey = cpu_to_le32(srv_mr->mr->rkey); + + list.addr = srv_mr->iu->dma_addr; + list.length = sizeof(*msg); + list.lkey = sess->s.dev->ib_pd->local_dma_lkey; + imm_wr.sg_list = &list; + imm_wr.num_sge = 1; + imm_wr.opcode = IB_WR_SEND_WITH_IMM; + ib_dma_sync_single_for_device(sess->s.dev->ib_dev, + srv_mr->iu->dma_addr, + srv_mr->iu->size, DMA_TO_DEVICE); + } else { + imm_wr.sg_list = NULL; + imm_wr.num_sge = 0; + imm_wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; + } + imm_wr.send_flags = flags; + imm_wr.wr_cqe = &io_comp_cqe; + + imm_wr.ex.imm_data = cpu_to_be32(imm); + + err = ib_post_send(id->con->c.qp, wr, NULL); + if (unlikely(err)) + rtrs_err_rl(s, "Posting RDMA-Reply to QP failed, err: %d\n", + err); + + return err; +} + +void close_sess(struct rtrs_srv_sess *sess) +{ + enum rtrs_srv_state old_state; + + if (rtrs_srv_change_state_get_old(sess, RTRS_SRV_CLOSING, + &old_state)) + queue_work(rtrs_wq, &sess->close_work); + WARN_ON(sess->state != RTRS_SRV_CLOSING); +} + +static inline const char *rtrs_srv_state_str(enum rtrs_srv_state state) +{ + switch (state) { + case RTRS_SRV_CONNECTING: + return "RTRS_SRV_CONNECTING"; + case RTRS_SRV_CONNECTED: + return "RTRS_SRV_CONNECTED"; + case RTRS_SRV_CLOSING: + return "RTRS_SRV_CLOSING"; + case RTRS_SRV_CLOSED: + return "RTRS_SRV_CLOSED"; + default: + return "UNKNOWN"; + } +} + +/** + * rtrs_srv_resp_rdma() - Finish an RDMA request + * + * @id: Internal RTRS operation identifier + * @status: Response Code sent to the other side for this operation. + * 0 = success, <=0 error + * Context: any + * + * Finish a RDMA operation. A message is sent to the client and the + * corresponding memory areas will be released. + */ +bool rtrs_srv_resp_rdma(struct rtrs_srv_op *id, int status) +{ + struct rtrs_srv_sess *sess; + struct rtrs_srv_con *con; + struct rtrs_sess *s; + int err; + + if (WARN_ON(!id)) + return true; + + con = id->con; + s = con->c.sess; + sess = to_srv_sess(s); + + id->status = status; + + if (unlikely(sess->state != RTRS_SRV_CONNECTED)) { + rtrs_err_rl(s, + "Sending I/O response failed, session is disconnected, sess state %s\n", + rtrs_srv_state_str(sess->state)); + goto out; + } + if (always_invalidate) { + struct rtrs_srv_mr *mr = &sess->mrs[id->msg_id]; + + ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey)); + } + if (unlikely(atomic_sub_return(1, + &con->sq_wr_avail) < 0)) { + pr_err("IB send queue full\n"); + atomic_add(1, &con->sq_wr_avail); + spin_lock(&con->rsp_wr_wait_lock); + list_add_tail(&id->wait_list, &con->rsp_wr_wait_list); + spin_unlock(&con->rsp_wr_wait_lock); + return false; + } + + if (status || id->dir == WRITE || !id->rd_msg->sg_cnt) + err = send_io_resp_imm(con, id, status); + else + err = rdma_write_sg(id); + + if (unlikely(err)) { + rtrs_err_rl(s, "IO response failed: %d\n", err); + close_sess(sess); + } +out: + rtrs_srv_put_ops_ids(sess); + return true; +} +EXPORT_SYMBOL(rtrs_srv_resp_rdma); + +/** + * rtrs_srv_set_sess_priv() - Set private pointer in rtrs_srv. + * @srv: Session pointer + * @priv: The private pointer that is associated with the session. + */ +void rtrs_srv_set_sess_priv(struct rtrs_srv *srv, void *priv) +{ + srv->priv = priv; +} +EXPORT_SYMBOL(rtrs_srv_set_sess_priv); + +static void unmap_cont_bufs(struct rtrs_srv_sess *sess) +{ + int i; + + for (i = 0; i < sess->mrs_num; i++) { + struct rtrs_srv_mr *srv_mr; + + srv_mr = &sess->mrs[i]; + rtrs_iu_free(srv_mr->iu, DMA_TO_DEVICE, + sess->s.dev->ib_dev, 1); + ib_dereg_mr(srv_mr->mr); + ib_dma_unmap_sg(sess->s.dev->ib_dev, srv_mr->sgt.sgl, + srv_mr->sgt.nents, DMA_BIDIRECTIONAL); + sg_free_table(&srv_mr->sgt); + } + kfree(sess->mrs); +} + +static int map_cont_bufs(struct rtrs_srv_sess *sess) +{ + struct rtrs_srv *srv = sess->srv; + struct rtrs_sess *ss = &sess->s; + int i, mri, err, mrs_num; + unsigned int chunk_bits; + int chunks_per_mr = 1; + + /* + * Here we map queue_depth chunks to MR. Firstly we have to + * figure out how many chunks can we map per MR. + */ + if (always_invalidate) { + /* + * in order to do invalidate for each chunks of memory, we needs + * more memory regions. + */ + mrs_num = srv->queue_depth; + } else { + chunks_per_mr = + sess->s.dev->ib_dev->attrs.max_fast_reg_page_list_len; + mrs_num = DIV_ROUND_UP(srv->queue_depth, chunks_per_mr); + chunks_per_mr = DIV_ROUND_UP(srv->queue_depth, mrs_num); + } + + sess->mrs = kcalloc(mrs_num, sizeof(*sess->mrs), GFP_KERNEL); + if (!sess->mrs) + return -ENOMEM; + + sess->mrs_num = mrs_num; + + for (mri = 0; mri < mrs_num; mri++) { + struct rtrs_srv_mr *srv_mr = &sess->mrs[mri]; + struct sg_table *sgt = &srv_mr->sgt; + struct scatterlist *s; + struct ib_mr *mr; + int nr, chunks; + + chunks = chunks_per_mr * mri; + if (!always_invalidate) + chunks_per_mr = min_t(int, chunks_per_mr, + srv->queue_depth - chunks); + + err = sg_alloc_table(sgt, chunks_per_mr, GFP_KERNEL); + if (err) + goto err; + + for_each_sg(sgt->sgl, s, chunks_per_mr, i) + sg_set_page(s, srv->chunks[chunks + i], + max_chunk_size, 0); + + nr = ib_dma_map_sg(sess->s.dev->ib_dev, sgt->sgl, + sgt->nents, DMA_BIDIRECTIONAL); + if (nr < sgt->nents) { + err = nr < 0 ? nr : -EINVAL; + goto free_sg; + } + mr = ib_alloc_mr(sess->s.dev->ib_pd, IB_MR_TYPE_MEM_REG, + sgt->nents); + if (IS_ERR(mr)) { + err = PTR_ERR(mr); + goto unmap_sg; + } + nr = ib_map_mr_sg(mr, sgt->sgl, sgt->nents, + NULL, max_chunk_size); + if (nr < sgt->nents) { + err = nr < 0 ? nr : -EINVAL; + goto dereg_mr; + } + + if (always_invalidate) { + srv_mr->iu = rtrs_iu_alloc(1, + sizeof(struct rtrs_msg_rkey_rsp), + GFP_KERNEL, sess->s.dev->ib_dev, + DMA_TO_DEVICE, rtrs_srv_rdma_done); + if (!srv_mr->iu) { + rtrs_err(ss, "rtrs_iu_alloc(), err: %d\n", + -ENOMEM); + goto free_iu; + } + } + /* Eventually dma addr for each chunk can be cached */ + for_each_sg(sgt->sgl, s, sgt->orig_nents, i) + sess->dma_addr[chunks + i] = sg_dma_address(s); + + ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); + srv_mr->mr = mr; + + continue; +err: + while (mri--) { + srv_mr = &sess->mrs[mri]; + sgt = &srv_mr->sgt; + mr = srv_mr->mr; +free_iu: + rtrs_iu_free(srv_mr->iu, DMA_TO_DEVICE, + sess->s.dev->ib_dev, 1); +dereg_mr: + ib_dereg_mr(mr); +unmap_sg: + ib_dma_unmap_sg(sess->s.dev->ib_dev, sgt->sgl, + sgt->nents, DMA_BIDIRECTIONAL); +free_sg: + sg_free_table(sgt); + } + kfree(sess->mrs); + + return err; + } + + chunk_bits = ilog2(srv->queue_depth - 1) + 1; + sess->mem_bits = (MAX_IMM_PAYL_BITS - chunk_bits); + + return 0; +} + +static void rtrs_srv_hb_err_handler(struct rtrs_con *c) +{ + close_sess(to_srv_sess(c->sess)); +} + +static void rtrs_srv_init_hb(struct rtrs_srv_sess *sess) +{ + rtrs_init_hb(&sess->s, &io_comp_cqe, + RTRS_HB_INTERVAL_MS, + RTRS_HB_MISSED_MAX, + rtrs_srv_hb_err_handler, + rtrs_wq); +} + +static void rtrs_srv_start_hb(struct rtrs_srv_sess *sess) +{ + rtrs_start_hb(&sess->s); +} + +static void rtrs_srv_stop_hb(struct rtrs_srv_sess *sess) +{ + rtrs_stop_hb(&sess->s); +} + +static void rtrs_srv_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct rtrs_srv_con *con = cq->cq_context; + struct rtrs_sess *s = con->c.sess; + struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_iu *iu; + + iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); + rtrs_iu_free(iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + rtrs_err(s, "Sess info response send failed: %s\n", + ib_wc_status_msg(wc->status)); + close_sess(sess); + return; + } + WARN_ON(wc->opcode != IB_WC_SEND); +} + +static void rtrs_srv_sess_up(struct rtrs_srv_sess *sess) +{ + struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_ctx *ctx = srv->ctx; + int up; + + mutex_lock(&srv->paths_ev_mutex); + up = ++srv->paths_up; + if (up == 1) + ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_CONNECTED, NULL); + mutex_unlock(&srv->paths_ev_mutex); + + /* Mark session as established */ + sess->established = true; +} + +static void rtrs_srv_sess_down(struct rtrs_srv_sess *sess) +{ + struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_ctx *ctx = srv->ctx; + + if (!sess->established) + return; + + sess->established = false; + mutex_lock(&srv->paths_ev_mutex); + WARN_ON(!srv->paths_up); + if (--srv->paths_up == 0) + ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_DISCONNECTED, srv->priv); + mutex_unlock(&srv->paths_ev_mutex); +} + +static int post_recv_sess(struct rtrs_srv_sess *sess); + +static int process_info_req(struct rtrs_srv_con *con, + struct rtrs_msg_info_req *msg) +{ + struct rtrs_sess *s = con->c.sess; + struct rtrs_srv_sess *sess = to_srv_sess(s); + struct ib_send_wr *reg_wr = NULL; + struct rtrs_msg_info_rsp *rsp; + struct rtrs_iu *tx_iu; + struct ib_reg_wr *rwr; + int mri, err; + size_t tx_sz; + + err = post_recv_sess(sess); + if (unlikely(err)) { + rtrs_err(s, "post_recv_sess(), err: %d\n", err); + return err; + } + rwr = kcalloc(sess->mrs_num, sizeof(*rwr), GFP_KERNEL); + if (unlikely(!rwr)) + return -ENOMEM; + strlcpy(sess->s.sessname, msg->sessname, sizeof(sess->s.sessname)); + + tx_sz = sizeof(*rsp); + tx_sz += sizeof(rsp->desc[0]) * sess->mrs_num; + tx_iu = rtrs_iu_alloc(1, tx_sz, GFP_KERNEL, sess->s.dev->ib_dev, + DMA_TO_DEVICE, rtrs_srv_info_rsp_done); + if (unlikely(!tx_iu)) { + err = -ENOMEM; + goto rwr_free; + } + + rsp = tx_iu->buf; + rsp->type = cpu_to_le16(RTRS_MSG_INFO_RSP); + rsp->sg_cnt = cpu_to_le16(sess->mrs_num); + + for (mri = 0; mri < sess->mrs_num; mri++) { + struct ib_mr *mr = sess->mrs[mri].mr; + + rsp->desc[mri].addr = cpu_to_le64(mr->iova); + rsp->desc[mri].key = cpu_to_le32(mr->rkey); + rsp->desc[mri].len = cpu_to_le32(mr->length); + + /* + * Fill in reg MR request and chain them *backwards* + */ + rwr[mri].wr.next = mri ? &rwr[mri - 1].wr : NULL; + rwr[mri].wr.opcode = IB_WR_REG_MR; + rwr[mri].wr.wr_cqe = &local_reg_cqe; + rwr[mri].wr.num_sge = 0; + rwr[mri].wr.send_flags = mri ? 0 : IB_SEND_SIGNALED; + rwr[mri].mr = mr; + rwr[mri].key = mr->rkey; + rwr[mri].access = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE); + reg_wr = &rwr[mri].wr; + } + + err = rtrs_srv_create_sess_files(sess); + if (unlikely(err)) + goto iu_free; + kobject_get(&sess->kobj); + get_device(&sess->srv->dev); + rtrs_srv_change_state(sess, RTRS_SRV_CONNECTED); + rtrs_srv_start_hb(sess); + + /* + * We do not account number of established connections at the current + * moment, we rely on the client, which should send info request when + * all connections are successfully established. Thus, simply notify + * listener with a proper event if we are the first path. + */ + rtrs_srv_sess_up(sess); + + ib_dma_sync_single_for_device(sess->s.dev->ib_dev, tx_iu->dma_addr, + tx_iu->size, DMA_TO_DEVICE); + + /* Send info response */ + err = rtrs_iu_post_send(&con->c, tx_iu, tx_sz, reg_wr); + if (unlikely(err)) { + rtrs_err(s, "rtrs_iu_post_send(), err: %d\n", err); +iu_free: + rtrs_iu_free(tx_iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + } +rwr_free: + kfree(rwr); + + return err; +} + +static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct rtrs_srv_con *con = cq->cq_context; + struct rtrs_sess *s = con->c.sess; + struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_msg_info_req *msg; + struct rtrs_iu *iu; + int err; + + WARN_ON(con->c.cid); + + iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); + if (unlikely(wc->status != IB_WC_SUCCESS)) { + rtrs_err(s, "Sess info request receive failed: %s\n", + ib_wc_status_msg(wc->status)); + goto close; + } + WARN_ON(wc->opcode != IB_WC_RECV); + + if (unlikely(wc->byte_len < sizeof(*msg))) { + rtrs_err(s, "Sess info request is malformed: size %d\n", + wc->byte_len); + goto close; + } + ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr, + iu->size, DMA_FROM_DEVICE); + msg = iu->buf; + if (unlikely(le16_to_cpu(msg->type) != RTRS_MSG_INFO_REQ)) { + rtrs_err(s, "Sess info request is malformed: type %d\n", + le16_to_cpu(msg->type)); + goto close; + } + err = process_info_req(con, msg); + if (unlikely(err)) + goto close; + +out: + rtrs_iu_free(iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + return; +close: + close_sess(sess); + goto out; +} + +static int post_recv_info_req(struct rtrs_srv_con *con) +{ + struct rtrs_sess *s = con->c.sess; + struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_iu *rx_iu; + int err; + + rx_iu = rtrs_iu_alloc(1, sizeof(struct rtrs_msg_info_req), + GFP_KERNEL, sess->s.dev->ib_dev, + DMA_FROM_DEVICE, rtrs_srv_info_req_done); + if (unlikely(!rx_iu)) + return -ENOMEM; + /* Prepare for getting info response */ + err = rtrs_iu_post_recv(&con->c, rx_iu); + if (unlikely(err)) { + rtrs_err(s, "rtrs_iu_post_recv(), err: %d\n", err); + rtrs_iu_free(rx_iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + return err; + } + + return 0; +} + +static int post_recv_io(struct rtrs_srv_con *con, size_t q_size) +{ + int i, err; + + for (i = 0; i < q_size; i++) { + err = rtrs_post_recv_empty(&con->c, &io_comp_cqe); + if (unlikely(err)) + return err; + } + + return 0; +} + +static int post_recv_sess(struct rtrs_srv_sess *sess) +{ + struct rtrs_srv *srv = sess->srv; + struct rtrs_sess *s = &sess->s; + size_t q_size; + int err, cid; + + for (cid = 0; cid < sess->s.con_num; cid++) { + if (cid == 0) + q_size = SERVICE_CON_QUEUE_DEPTH; + else + q_size = srv->queue_depth; + + err = post_recv_io(to_srv_con(sess->s.con[cid]), q_size); + if (unlikely(err)) { + rtrs_err(s, "post_recv_io(), err: %d\n", err); + return err; + } + } + + return 0; +} + +static void process_read(struct rtrs_srv_con *con, + struct rtrs_msg_rdma_read *msg, + u32 buf_id, u32 off) +{ + struct rtrs_sess *s = con->c.sess; + struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_ctx *ctx = srv->ctx; + struct rtrs_srv_op *id; + + size_t usr_len, data_len; + void *data; + int ret; + + if (unlikely(sess->state != RTRS_SRV_CONNECTED)) { + rtrs_err_rl(s, + "Processing read request failed, session is disconnected, sess state %s\n", + rtrs_srv_state_str(sess->state)); + return; + } + if (unlikely(msg->sg_cnt != 1 && msg->sg_cnt != 0)) { + rtrs_err_rl(s, + "Processing read request failed, invalid message\n"); + return; + } + rtrs_srv_get_ops_ids(sess); + rtrs_srv_update_rdma_stats(sess->stats, off, READ); + id = sess->ops_ids[buf_id]; + id->con = con; + id->dir = READ; + id->msg_id = buf_id; + id->rd_msg = msg; + usr_len = le16_to_cpu(msg->usr_len); + data_len = off - usr_len; + data = page_address(srv->chunks[buf_id]); + ret = ctx->ops.rdma_ev(srv, srv->priv, id, READ, data, data_len, + data + data_len, usr_len); + + if (unlikely(ret)) { + rtrs_err_rl(s, + "Processing read request failed, user module cb reported for msg_id %d, err: %d\n", + buf_id, ret); + goto send_err_msg; + } + + return; + +send_err_msg: + ret = send_io_resp_imm(con, id, ret); + if (ret < 0) { + rtrs_err_rl(s, + "Sending err msg for failed RDMA-Write-Req failed, msg_id %d, err: %d\n", + buf_id, ret); + close_sess(sess); + } + rtrs_srv_put_ops_ids(sess); +} + +static void process_write(struct rtrs_srv_con *con, + struct rtrs_msg_rdma_write *req, + u32 buf_id, u32 off) +{ + struct rtrs_sess *s = con->c.sess; + struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_ctx *ctx = srv->ctx; + struct rtrs_srv_op *id; + + size_t data_len, usr_len; + void *data; + int ret; + + if (unlikely(sess->state != RTRS_SRV_CONNECTED)) { + rtrs_err_rl(s, + "Processing write request failed, session is disconnected, sess state %s\n", + rtrs_srv_state_str(sess->state)); + return; + } + rtrs_srv_get_ops_ids(sess); + rtrs_srv_update_rdma_stats(sess->stats, off, WRITE); + id = sess->ops_ids[buf_id]; + id->con = con; + id->dir = WRITE; + id->msg_id = buf_id; + + usr_len = le16_to_cpu(req->usr_len); + data_len = off - usr_len; + data = page_address(srv->chunks[buf_id]); + ret = ctx->ops.rdma_ev(srv, srv->priv, id, WRITE, data, data_len, + data + data_len, usr_len); + if (unlikely(ret)) { + rtrs_err_rl(s, + "Processing write request failed, user module callback reports err: %d\n", + ret); + goto send_err_msg; + } + + return; + +send_err_msg: + ret = send_io_resp_imm(con, id, ret); + if (ret < 0) { + rtrs_err_rl(s, + "Processing write request failed, sending I/O response failed, msg_id %d, err: %d\n", + buf_id, ret); + close_sess(sess); + } + rtrs_srv_put_ops_ids(sess); +} + +static void process_io_req(struct rtrs_srv_con *con, void *msg, + u32 id, u32 off) +{ + struct rtrs_sess *s = con->c.sess; + struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_msg_rdma_hdr *hdr; + unsigned int type; + + ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, sess->dma_addr[id], + max_chunk_size, DMA_BIDIRECTIONAL); + hdr = msg; + type = le16_to_cpu(hdr->type); + + switch (type) { + case RTRS_MSG_WRITE: + process_write(con, msg, id, off); + break; + case RTRS_MSG_READ: + process_read(con, msg, id, off); + break; + default: + rtrs_err(s, + "Processing I/O request failed, unknown message type received: 0x%02x\n", + type); + goto err; + } + + return; + +err: + close_sess(sess); +} + +static void rtrs_srv_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct rtrs_srv_mr *mr = + container_of(wc->wr_cqe, typeof(*mr), inv_cqe); + struct rtrs_srv_con *con = cq->cq_context; + struct rtrs_sess *s = con->c.sess; + struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_srv *srv = sess->srv; + u32 msg_id, off; + void *data; + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + rtrs_err(s, "Failed IB_WR_LOCAL_INV: %s\n", + ib_wc_status_msg(wc->status)); + close_sess(sess); + } + msg_id = mr->msg_id; + off = mr->msg_off; + data = page_address(srv->chunks[msg_id]) + off; + process_io_req(con, data, msg_id, off); +} + +static int rtrs_srv_inv_rkey(struct rtrs_srv_con *con, + struct rtrs_srv_mr *mr) +{ + struct ib_send_wr wr = { + .opcode = IB_WR_LOCAL_INV, + .wr_cqe = &mr->inv_cqe, + .send_flags = IB_SEND_SIGNALED, + .ex.invalidate_rkey = mr->mr->rkey, + }; + mr->inv_cqe.done = rtrs_srv_inv_rkey_done; + + return ib_post_send(con->c.qp, &wr, NULL); +} + +static void rtrs_rdma_process_wr_wait_list(struct rtrs_srv_con *con) +{ + spin_lock(&con->rsp_wr_wait_lock); + while (!list_empty(&con->rsp_wr_wait_list)) { + struct rtrs_srv_op *id; + int ret; + + id = list_entry(con->rsp_wr_wait_list.next, + struct rtrs_srv_op, wait_list); + list_del(&id->wait_list); + + spin_unlock(&con->rsp_wr_wait_lock); + ret = rtrs_srv_resp_rdma(id, id->status); + spin_lock(&con->rsp_wr_wait_lock); + + if (!ret) { + list_add(&id->wait_list, &con->rsp_wr_wait_list); + break; + } + } + spin_unlock(&con->rsp_wr_wait_lock); +} + +static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct rtrs_srv_con *con = cq->cq_context; + struct rtrs_sess *s = con->c.sess; + struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_srv *srv = sess->srv; + u32 imm_type, imm_payload; + int err; + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + if (wc->status != IB_WC_WR_FLUSH_ERR) { + rtrs_err(s, + "%s (wr_cqe: %p, type: %d, vendor_err: 0x%x, len: %u)\n", + ib_wc_status_msg(wc->status), wc->wr_cqe, + wc->opcode, wc->vendor_err, wc->byte_len); + close_sess(sess); + } + return; + } + + switch (wc->opcode) { + case IB_WC_RECV_RDMA_WITH_IMM: + /* + * post_recv() RDMA write completions of IO reqs (read/write) + * and hb + */ + if (WARN_ON(wc->wr_cqe != &io_comp_cqe)) + return; + err = rtrs_post_recv_empty(&con->c, &io_comp_cqe); + if (unlikely(err)) { + rtrs_err(s, "rtrs_post_recv(), err: %d\n", err); + close_sess(sess); + break; + } + rtrs_from_imm(be32_to_cpu(wc->ex.imm_data), + &imm_type, &imm_payload); + if (likely(imm_type == RTRS_IO_REQ_IMM)) { + u32 msg_id, off; + void *data; + + msg_id = imm_payload >> sess->mem_bits; + off = imm_payload & ((1 << sess->mem_bits) - 1); + if (unlikely(msg_id > srv->queue_depth || + off > max_chunk_size)) { + rtrs_err(s, "Wrong msg_id %u, off %u\n", + msg_id, off); + close_sess(sess); + return; + } + if (always_invalidate) { + struct rtrs_srv_mr *mr = &sess->mrs[msg_id]; + + mr->msg_off = off; + mr->msg_id = msg_id; + err = rtrs_srv_inv_rkey(con, mr); + if (unlikely(err)) { + rtrs_err(s, "rtrs_post_recv(), err: %d\n", + err); + close_sess(sess); + break; + } + } else { + data = page_address(srv->chunks[msg_id]) + off; + process_io_req(con, data, msg_id, off); + } + } else if (imm_type == RTRS_HB_MSG_IMM) { + WARN_ON(con->c.cid); + rtrs_send_hb_ack(&sess->s); + } else if (imm_type == RTRS_HB_ACK_IMM) { + WARN_ON(con->c.cid); + sess->s.hb_missed_cnt = 0; + } else { + rtrs_wrn(s, "Unknown IMM type %u\n", imm_type); + } + break; + case IB_WC_RDMA_WRITE: + case IB_WC_SEND: + /* + * post_send() RDMA write completions of IO reqs (read/write) + * and hb + */ + atomic_add(srv->queue_depth, &con->sq_wr_avail); + + if (unlikely(!list_empty_careful(&con->rsp_wr_wait_list))) + rtrs_rdma_process_wr_wait_list(con); + + break; + default: + rtrs_wrn(s, "Unexpected WC type: %d\n", wc->opcode); + return; + } +} + +/** + * rtrs_srv_get_sess_name() - Get rtrs_srv peer hostname. + * @srv: Session + * @sessname: Sessname buffer + * @len: Length of sessname buffer + */ +int rtrs_srv_get_sess_name(struct rtrs_srv *srv, char *sessname, size_t len) +{ + struct rtrs_srv_sess *sess; + int err = -ENOTCONN; + + mutex_lock(&srv->paths_mutex); + list_for_each_entry(sess, &srv->paths_list, s.entry) { + if (sess->state != RTRS_SRV_CONNECTED) + continue; + strlcpy(sessname, sess->s.sessname, + min_t(size_t, sizeof(sess->s.sessname), len)); + err = 0; + break; + } + mutex_unlock(&srv->paths_mutex); + + return err; +} +EXPORT_SYMBOL(rtrs_srv_get_sess_name); + +/** + * rtrs_srv_get_sess_qdepth() - Get rtrs_srv qdepth. + * @srv: Session + */ +int rtrs_srv_get_queue_depth(struct rtrs_srv *srv) +{ + return srv->queue_depth; +} +EXPORT_SYMBOL(rtrs_srv_get_queue_depth); + +static int find_next_bit_ring(struct rtrs_srv_sess *sess) +{ + struct ib_device *ib_dev = sess->s.dev->ib_dev; + int v; + + v = cpumask_next(sess->cur_cq_vector, &cq_affinity_mask); + if (v >= nr_cpu_ids || v >= ib_dev->num_comp_vectors) + v = cpumask_first(&cq_affinity_mask); + return v; +} + +static int rtrs_srv_get_next_cq_vector(struct rtrs_srv_sess *sess) +{ + sess->cur_cq_vector = find_next_bit_ring(sess); + + return sess->cur_cq_vector; +} + +static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, + const uuid_t *paths_uuid) +{ + struct rtrs_srv *srv; + int i; + + srv = kzalloc(sizeof(*srv), GFP_KERNEL); + if (!srv) + return NULL; + + refcount_set(&srv->refcount, 1); + INIT_LIST_HEAD(&srv->paths_list); + mutex_init(&srv->paths_mutex); + mutex_init(&srv->paths_ev_mutex); + uuid_copy(&srv->paths_uuid, paths_uuid); + srv->queue_depth = sess_queue_depth; + srv->ctx = ctx; + + srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks), + GFP_KERNEL); + if (!srv->chunks) + goto err_free_srv; + + for (i = 0; i < srv->queue_depth; i++) { + srv->chunks[i] = mempool_alloc(chunk_pool, GFP_KERNEL); + if (!srv->chunks[i]) + goto err_free_chunks; + } + list_add(&srv->ctx_list, &ctx->srv_list); + + return srv; + +err_free_chunks: + while (i--) + mempool_free(srv->chunks[i], chunk_pool); + kfree(srv->chunks); + +err_free_srv: + kfree(srv); + + return NULL; +} + +static void free_srv(struct rtrs_srv *srv) +{ + int i; + + WARN_ON(refcount_read(&srv->refcount)); + for (i = 0; i < srv->queue_depth; i++) + mempool_free(srv->chunks[i], chunk_pool); + kfree(srv->chunks); + mutex_destroy(&srv->paths_mutex); + mutex_destroy(&srv->paths_ev_mutex); + /* last put to release the srv structure */ + put_device(&srv->dev); +} + +static inline struct rtrs_srv *__find_srv_and_get(struct rtrs_srv_ctx *ctx, + const uuid_t *paths_uuid) +{ + struct rtrs_srv *srv; + + list_for_each_entry(srv, &ctx->srv_list, ctx_list) { + if (uuid_equal(&srv->paths_uuid, paths_uuid) && + refcount_inc_not_zero(&srv->refcount)) + return srv; + } + + return NULL; +} + +static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, + const uuid_t *paths_uuid) +{ + struct rtrs_srv *srv; + + mutex_lock(&ctx->srv_mutex); + srv = __find_srv_and_get(ctx, paths_uuid); + if (!srv) + srv = __alloc_srv(ctx, paths_uuid); + mutex_unlock(&ctx->srv_mutex); + + return srv; +} + +static void put_srv(struct rtrs_srv *srv) +{ + if (refcount_dec_and_test(&srv->refcount)) { + struct rtrs_srv_ctx *ctx = srv->ctx; + + WARN_ON(srv->dev.kobj.state_in_sysfs); + + mutex_lock(&ctx->srv_mutex); + list_del(&srv->ctx_list); + mutex_unlock(&ctx->srv_mutex); + free_srv(srv); + } +} + +static void __add_path_to_srv(struct rtrs_srv *srv, + struct rtrs_srv_sess *sess) +{ + list_add_tail(&sess->s.entry, &srv->paths_list); + srv->paths_num++; + WARN_ON(srv->paths_num >= MAX_PATHS_NUM); +} + +static void del_path_from_srv(struct rtrs_srv_sess *sess) +{ + struct rtrs_srv *srv = sess->srv; + + if (WARN_ON(!srv)) + return; + + mutex_lock(&srv->paths_mutex); + list_del(&sess->s.entry); + WARN_ON(!srv->paths_num); + srv->paths_num--; + mutex_unlock(&srv->paths_mutex); +} + +/* return true if addresses are the same, error other wise */ +static int sockaddr_cmp(const struct sockaddr *a, const struct sockaddr *b) +{ + switch (a->sa_family) { + case AF_IB: + return memcmp(&((struct sockaddr_ib *)a)->sib_addr, + &((struct sockaddr_ib *)b)->sib_addr, + sizeof(struct ib_addr)) && + (b->sa_family == AF_IB); + case AF_INET: + return memcmp(&((struct sockaddr_in *)a)->sin_addr, + &((struct sockaddr_in *)b)->sin_addr, + sizeof(struct in_addr)) && + (b->sa_family == AF_INET); + case AF_INET6: + return memcmp(&((struct sockaddr_in6 *)a)->sin6_addr, + &((struct sockaddr_in6 *)b)->sin6_addr, + sizeof(struct in6_addr)) && + (b->sa_family == AF_INET6); + default: + return -ENOENT; + } +} + +static bool __is_path_w_addr_exists(struct rtrs_srv *srv, + struct rdma_addr *addr) +{ + struct rtrs_srv_sess *sess; + + list_for_each_entry(sess, &srv->paths_list, s.entry) + if (!sockaddr_cmp((struct sockaddr *)&sess->s.dst_addr, + (struct sockaddr *)&addr->dst_addr) && + !sockaddr_cmp((struct sockaddr *)&sess->s.src_addr, + (struct sockaddr *)&addr->src_addr)) + return true; + + return false; +} + +static void free_sess(struct rtrs_srv_sess *sess) +{ + if (sess->kobj.state_in_sysfs) + kobject_put(&sess->kobj); + else + kfree(sess); +} + +static void rtrs_srv_close_work(struct work_struct *work) +{ + struct rtrs_srv_sess *sess; + struct rtrs_srv_con *con; + int i; + + sess = container_of(work, typeof(*sess), close_work); + + rtrs_srv_destroy_sess_files(sess); + rtrs_srv_stop_hb(sess); + + for (i = 0; i < sess->s.con_num; i++) { + if (!sess->s.con[i]) + continue; + con = to_srv_con(sess->s.con[i]); + rdma_disconnect(con->c.cm_id); + ib_drain_qp(con->c.qp); + } + /* Wait for all inflights */ + rtrs_srv_wait_ops_ids(sess); + + /* Notify upper layer if we are the last path */ + rtrs_srv_sess_down(sess); + + unmap_cont_bufs(sess); + rtrs_srv_free_ops_ids(sess); + + for (i = 0; i < sess->s.con_num; i++) { + if (!sess->s.con[i]) + continue; + con = to_srv_con(sess->s.con[i]); + rtrs_cq_qp_destroy(&con->c); + rdma_destroy_id(con->c.cm_id); + kfree(con); + } + rtrs_ib_dev_put(sess->s.dev); + + del_path_from_srv(sess); + put_srv(sess->srv); + sess->srv = NULL; + rtrs_srv_change_state(sess, RTRS_SRV_CLOSED); + + kfree(sess->dma_addr); + kfree(sess->s.con); + free_sess(sess); +} + +static int rtrs_rdma_do_accept(struct rtrs_srv_sess *sess, + struct rdma_cm_id *cm_id) +{ + struct rtrs_srv *srv = sess->srv; + struct rtrs_msg_conn_rsp msg; + struct rdma_conn_param param; + int err; + + param = (struct rdma_conn_param) { + .rnr_retry_count = 7, + .private_data = &msg, + .private_data_len = sizeof(msg), + }; + + msg = (struct rtrs_msg_conn_rsp) { + .magic = cpu_to_le16(RTRS_MAGIC), + .version = cpu_to_le16(RTRS_PROTO_VER), + .queue_depth = cpu_to_le16(srv->queue_depth), + .max_io_size = cpu_to_le32(max_chunk_size - MAX_HDR_SIZE), + .max_hdr_size = cpu_to_le32(MAX_HDR_SIZE), + }; + + if (always_invalidate) + msg.flags = cpu_to_le32(RTRS_MSG_NEW_RKEY_F); + + err = rdma_accept(cm_id, ¶m); + if (err) + pr_err("rdma_accept(), err: %d\n", err); + + return err; +} + +static int rtrs_rdma_do_reject(struct rdma_cm_id *cm_id, int errno) +{ + struct rtrs_msg_conn_rsp msg; + int err; + + msg = (struct rtrs_msg_conn_rsp) { + .magic = cpu_to_le16(RTRS_MAGIC), + .version = cpu_to_le16(RTRS_PROTO_VER), + .errno = cpu_to_le16(errno), + }; + + err = rdma_reject(cm_id, &msg, sizeof(msg)); + if (err) + pr_err("rdma_reject(), err: %d\n", err); + + /* Bounce errno back */ + return errno; +} + +static struct rtrs_srv_sess * +__find_sess(struct rtrs_srv *srv, const uuid_t *sess_uuid) +{ + struct rtrs_srv_sess *sess; + + list_for_each_entry(sess, &srv->paths_list, s.entry) { + if (uuid_equal(&sess->s.uuid, sess_uuid)) + return sess; + } + + return NULL; +} + +static int create_con(struct rtrs_srv_sess *sess, + struct rdma_cm_id *cm_id, + unsigned int cid) +{ + struct rtrs_srv *srv = sess->srv; + struct rtrs_sess *s = &sess->s; + struct rtrs_srv_con *con; + + u16 cq_size, wr_queue_size; + int err, cq_vector; + + con = kzalloc(sizeof(*con), GFP_KERNEL); + if (!con) { + err = -ENOMEM; + goto err; + } + + spin_lock_init(&con->rsp_wr_wait_lock); + INIT_LIST_HEAD(&con->rsp_wr_wait_list); + con->c.cm_id = cm_id; + con->c.sess = &sess->s; + con->c.cid = cid; + atomic_set(&con->wr_cnt, 0); + + if (con->c.cid == 0) { + /* + * All receive and all send (each requiring invalidate) + * + 2 for drain and heartbeat + */ + wr_queue_size = SERVICE_CON_QUEUE_DEPTH * 3 + 2; + cq_size = wr_queue_size; + } else { + /* + * If we have all receive requests posted and + * all write requests posted and each read request + * requires an invalidate request + drain + * and qp gets into error state. + */ + cq_size = srv->queue_depth * 3 + 1; + /* + * In theory we might have queue_depth * 32 + * outstanding requests if an unsafe global key is used + * and we have queue_depth read requests each consisting + * of 32 different addresses. div 3 for mlx5. + */ + wr_queue_size = sess->s.dev->ib_dev->attrs.max_qp_wr / 3; + } + atomic_set(&con->sq_wr_avail, wr_queue_size); + cq_vector = rtrs_srv_get_next_cq_vector(sess); + + /* TODO: SOFTIRQ can be faster, but be careful with softirq context */ + err = rtrs_cq_qp_create(&sess->s, &con->c, 1, cq_vector, cq_size, + wr_queue_size, IB_POLL_WORKQUEUE); + if (err) { + rtrs_err(s, "rtrs_cq_qp_create(), err: %d\n", err); + goto free_con; + } + if (con->c.cid == 0) { + err = post_recv_info_req(con); + if (err) + goto free_cqqp; + } + WARN_ON(sess->s.con[cid]); + sess->s.con[cid] = &con->c; + + /* + * Change context from server to current connection. The other + * way is to use cm_id->qp->qp_context, which does not work on OFED. + */ + cm_id->context = &con->c; + + return 0; + +free_cqqp: + rtrs_cq_qp_destroy(&con->c); +free_con: + kfree(con); + +err: + return err; +} + +static struct rtrs_srv_sess *__alloc_sess(struct rtrs_srv *srv, + struct rdma_cm_id *cm_id, + unsigned int con_num, + unsigned int recon_cnt, + const uuid_t *uuid) +{ + struct rtrs_srv_sess *sess; + int err = -ENOMEM; + + if (srv->paths_num >= MAX_PATHS_NUM) { + err = -ECONNRESET; + goto err; + } + if (__is_path_w_addr_exists(srv, &cm_id->route.addr)) { + err = -EEXIST; + pr_err("Path with same addr exists\n"); + goto err; + } + sess = kzalloc(sizeof(*sess), GFP_KERNEL); + if (!sess) + goto err; + + sess->stats = kzalloc(sizeof(*sess->stats), GFP_KERNEL); + if (!sess->stats) + goto err_free_sess; + + sess->stats->sess = sess; + + sess->dma_addr = kcalloc(srv->queue_depth, sizeof(*sess->dma_addr), + GFP_KERNEL); + if (!sess->dma_addr) + goto err_free_stats; + + sess->s.con = kcalloc(con_num, sizeof(*sess->s.con), GFP_KERNEL); + if (!sess->s.con) + goto err_free_dma_addr; + + sess->state = RTRS_SRV_CONNECTING; + sess->srv = srv; + sess->cur_cq_vector = -1; + sess->s.dst_addr = cm_id->route.addr.dst_addr; + sess->s.src_addr = cm_id->route.addr.src_addr; + sess->s.con_num = con_num; + sess->s.recon_cnt = recon_cnt; + uuid_copy(&sess->s.uuid, uuid); + spin_lock_init(&sess->state_lock); + INIT_WORK(&sess->close_work, rtrs_srv_close_work); + rtrs_srv_init_hb(sess); + + sess->s.dev = rtrs_ib_dev_find_or_add(cm_id->device, &dev_pd); + if (!sess->s.dev) { + err = -ENOMEM; + goto err_free_con; + } + err = map_cont_bufs(sess); + if (err) + goto err_put_dev; + + err = rtrs_srv_alloc_ops_ids(sess); + if (err) + goto err_unmap_bufs; + + __add_path_to_srv(srv, sess); + + return sess; + +err_unmap_bufs: + unmap_cont_bufs(sess); +err_put_dev: + rtrs_ib_dev_put(sess->s.dev); +err_free_con: + kfree(sess->s.con); +err_free_dma_addr: + kfree(sess->dma_addr); +err_free_stats: + kfree(sess->stats); +err_free_sess: + kfree(sess); +err: + return ERR_PTR(err); +} + +static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, + const struct rtrs_msg_conn_req *msg, + size_t len) +{ + struct rtrs_srv_ctx *ctx = cm_id->context; + struct rtrs_srv_sess *sess; + struct rtrs_srv *srv; + + u16 version, con_num, cid; + u16 recon_cnt; + int err; + + if (len < sizeof(*msg)) { + pr_err("Invalid RTRS connection request\n"); + goto reject_w_econnreset; + } + if (le16_to_cpu(msg->magic) != RTRS_MAGIC) { + pr_err("Invalid RTRS magic\n"); + goto reject_w_econnreset; + } + version = le16_to_cpu(msg->version); + if (version >> 8 != RTRS_PROTO_VER_MAJOR) { + pr_err("Unsupported major RTRS version: %d, expected %d\n", + version >> 8, RTRS_PROTO_VER_MAJOR); + goto reject_w_econnreset; + } + con_num = le16_to_cpu(msg->cid_num); + if (con_num > 4096) { + /* Sanity check */ + pr_err("Too many connections requested: %d\n", con_num); + goto reject_w_econnreset; + } + cid = le16_to_cpu(msg->cid); + if (cid >= con_num) { + /* Sanity check */ + pr_err("Incorrect cid: %d >= %d\n", cid, con_num); + goto reject_w_econnreset; + } + recon_cnt = le16_to_cpu(msg->recon_cnt); + srv = get_or_create_srv(ctx, &msg->paths_uuid); + if (!srv) { + err = -ENOMEM; + goto reject_w_err; + } + mutex_lock(&srv->paths_mutex); + sess = __find_sess(srv, &msg->sess_uuid); + if (sess) { + struct rtrs_sess *s = &sess->s; + + /* Session already holds a reference */ + put_srv(srv); + + if (sess->state != RTRS_SRV_CONNECTING) { + rtrs_err(s, "Session in wrong state: %s\n", + rtrs_srv_state_str(sess->state)); + mutex_unlock(&srv->paths_mutex); + goto reject_w_econnreset; + } + /* + * Sanity checks + */ + if (con_num != sess->s.con_num || cid >= sess->s.con_num) { + rtrs_err(s, "Incorrect request: %d, %d\n", + cid, con_num); + mutex_unlock(&srv->paths_mutex); + goto reject_w_econnreset; + } + if (sess->s.con[cid]) { + rtrs_err(s, "Connection already exists: %d\n", + cid); + mutex_unlock(&srv->paths_mutex); + goto reject_w_econnreset; + } + } else { + sess = __alloc_sess(srv, cm_id, con_num, recon_cnt, + &msg->sess_uuid); + if (IS_ERR(sess)) { + mutex_unlock(&srv->paths_mutex); + put_srv(srv); + err = PTR_ERR(sess); + goto reject_w_err; + } + } + err = create_con(sess, cm_id, cid); + if (err) { + (void)rtrs_rdma_do_reject(cm_id, err); + /* + * Since session has other connections we follow normal way + * through workqueue, but still return an error to tell cma.c + * to call rdma_destroy_id() for current connection. + */ + goto close_and_return_err; + } + err = rtrs_rdma_do_accept(sess, cm_id); + if (err) { + (void)rtrs_rdma_do_reject(cm_id, err); + /* + * Since current connection was successfully added to the + * session we follow normal way through workqueue to close the + * session, thus return 0 to tell cma.c we call + * rdma_destroy_id() ourselves. + */ + err = 0; + goto close_and_return_err; + } + mutex_unlock(&srv->paths_mutex); + + return 0; + +reject_w_err: + return rtrs_rdma_do_reject(cm_id, err); + +reject_w_econnreset: + return rtrs_rdma_do_reject(cm_id, -ECONNRESET); + +close_and_return_err: + close_sess(sess); + mutex_unlock(&srv->paths_mutex); + + return err; +} + +static int rtrs_srv_rdma_cm_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *ev) +{ + struct rtrs_srv_sess *sess = NULL; + struct rtrs_sess *s = NULL; + + if (ev->event != RDMA_CM_EVENT_CONNECT_REQUEST) { + struct rtrs_con *c = cm_id->context; + + s = c->sess; + sess = to_srv_sess(s); + } + + switch (ev->event) { + case RDMA_CM_EVENT_CONNECT_REQUEST: + /* + * In case of error cma.c will destroy cm_id, + * see cma_process_remove() + */ + return rtrs_rdma_connect(cm_id, ev->param.conn.private_data, + ev->param.conn.private_data_len); + case RDMA_CM_EVENT_ESTABLISHED: + /* Nothing here */ + break; + case RDMA_CM_EVENT_REJECTED: + case RDMA_CM_EVENT_CONNECT_ERROR: + case RDMA_CM_EVENT_UNREACHABLE: + rtrs_err(s, "CM error (CM event: %s, err: %d)\n", + rdma_event_msg(ev->event), ev->status); + close_sess(sess); + break; + case RDMA_CM_EVENT_DISCONNECTED: + case RDMA_CM_EVENT_ADDR_CHANGE: + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + close_sess(sess); + break; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + close_sess(sess); + break; + default: + pr_err("Ignoring unexpected CM event %s, err %d\n", + rdma_event_msg(ev->event), ev->status); + break; + } + + return 0; +} + +static struct rdma_cm_id *rtrs_srv_cm_init(struct rtrs_srv_ctx *ctx, + struct sockaddr *addr, + enum rdma_ucm_port_space ps) +{ + struct rdma_cm_id *cm_id; + int ret; + + cm_id = rdma_create_id(&init_net, rtrs_srv_rdma_cm_handler, + ctx, ps, IB_QPT_RC); + if (IS_ERR(cm_id)) { + ret = PTR_ERR(cm_id); + pr_err("Creating id for RDMA connection failed, err: %d\n", + ret); + goto err_out; + } + ret = rdma_bind_addr(cm_id, addr); + if (ret) { + pr_err("Binding RDMA address failed, err: %d\n", ret); + goto err_cm; + } + ret = rdma_listen(cm_id, 64); + if (ret) { + pr_err("Listening on RDMA connection failed, err: %d\n", + ret); + goto err_cm; + } + + return cm_id; + +err_cm: + rdma_destroy_id(cm_id); +err_out: + + return ERR_PTR(ret); +} + +static int rtrs_srv_rdma_init(struct rtrs_srv_ctx *ctx, u16 port) +{ + struct sockaddr_in6 sin = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_ANY_INIT, + .sin6_port = htons(port), + }; + struct sockaddr_ib sib = { + .sib_family = AF_IB, + .sib_sid = cpu_to_be64(RDMA_IB_IP_PS_IB | port), + .sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL), + .sib_pkey = cpu_to_be16(0xffff), + }; + struct rdma_cm_id *cm_ip, *cm_ib; + int ret; + + /* + * We accept both IPoIB and IB connections, so we need to keep + * two cm id's, one for each socket type and port space. + * If the cm initialization of one of the id's fails, we abort + * everything. + */ + cm_ip = rtrs_srv_cm_init(ctx, (struct sockaddr *)&sin, RDMA_PS_TCP); + if (IS_ERR(cm_ip)) + return PTR_ERR(cm_ip); + + cm_ib = rtrs_srv_cm_init(ctx, (struct sockaddr *)&sib, RDMA_PS_IB); + if (IS_ERR(cm_ib)) { + ret = PTR_ERR(cm_ib); + goto free_cm_ip; + } + + ctx->cm_id_ip = cm_ip; + ctx->cm_id_ib = cm_ib; + + return 0; + +free_cm_ip: + rdma_destroy_id(cm_ip); + + return ret; +} + +static struct rtrs_srv_ctx *alloc_srv_ctx(struct rtrs_srv_ops *ops) +{ + struct rtrs_srv_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + ctx->ops = *ops; + mutex_init(&ctx->srv_mutex); + INIT_LIST_HEAD(&ctx->srv_list); + + return ctx; +} + +static void free_srv_ctx(struct rtrs_srv_ctx *ctx) +{ + WARN_ON(!list_empty(&ctx->srv_list)); + mutex_destroy(&ctx->srv_mutex); + kfree(ctx); +} + +/** + * rtrs_srv_open() - open RTRS server context + * @ops: callback functions + * @port: port to listen on + * + * Creates server context with specified callbacks. + * + * Return a valid pointer on success otherwise PTR_ERR. + */ +struct rtrs_srv_ctx *rtrs_srv_open(struct rtrs_srv_ops *ops, u16 port) +{ + struct rtrs_srv_ctx *ctx; + int err; + + ctx = alloc_srv_ctx(ops); + if (!ctx) + return ERR_PTR(-ENOMEM); + + err = rtrs_srv_rdma_init(ctx, port); + if (err) { + free_srv_ctx(ctx); + return ERR_PTR(err); + } + + return ctx; +} +EXPORT_SYMBOL(rtrs_srv_open); + +static void close_sessions(struct rtrs_srv *srv) +{ + struct rtrs_srv_sess *sess; + + mutex_lock(&srv->paths_mutex); + list_for_each_entry(sess, &srv->paths_list, s.entry) + close_sess(sess); + mutex_unlock(&srv->paths_mutex); +} + +static void close_ctx(struct rtrs_srv_ctx *ctx) +{ + struct rtrs_srv *srv; + + mutex_lock(&ctx->srv_mutex); + list_for_each_entry(srv, &ctx->srv_list, ctx_list) + close_sessions(srv); + mutex_unlock(&ctx->srv_mutex); + flush_workqueue(rtrs_wq); +} + +/** + * rtrs_srv_close() - close RTRS server context + * @ctx: pointer to server context + * + * Closes RTRS server context with all client sessions. + */ +void rtrs_srv_close(struct rtrs_srv_ctx *ctx) +{ + rdma_destroy_id(ctx->cm_id_ip); + rdma_destroy_id(ctx->cm_id_ib); + close_ctx(ctx); + free_srv_ctx(ctx); +} +EXPORT_SYMBOL(rtrs_srv_close); + +static int check_module_params(void) +{ + if (sess_queue_depth < 1 || sess_queue_depth > MAX_SESS_QUEUE_DEPTH) { + pr_err("Invalid sess_queue_depth value %d, has to be >= %d, <= %d.\n", + sess_queue_depth, 1, MAX_SESS_QUEUE_DEPTH); + return -EINVAL; + } + if (max_chunk_size < 4096 || !is_power_of_2(max_chunk_size)) { + pr_err("Invalid max_chunk_size value %d, has to be >= %d and should be power of two.\n", + max_chunk_size, 4096); + return -EINVAL; + } + + /* + * Check if IB immediate data size is enough to hold the mem_id and the + * offset inside the memory chunk + */ + if ((ilog2(sess_queue_depth - 1) + 1) + + (ilog2(max_chunk_size - 1) + 1) > MAX_IMM_PAYL_BITS) { + pr_err("RDMA immediate size (%db) not enough to encode %d buffers of size %dB. Reduce 'sess_queue_depth' or 'max_chunk_size' parameters.\n", + MAX_IMM_PAYL_BITS, sess_queue_depth, max_chunk_size); + return -EINVAL; + } + + return 0; +} + +static int __init rtrs_server_init(void) +{ + int err; + + pr_info("Loading module %s, proto %s: (max_chunk_size: %d (pure IO %ld, headers %ld) , sess_queue_depth: %d, always_invalidate: %d)\n", + KBUILD_MODNAME, RTRS_PROTO_VER_STRING, + max_chunk_size, max_chunk_size - MAX_HDR_SIZE, MAX_HDR_SIZE, + sess_queue_depth, always_invalidate); + + rtrs_rdma_dev_pd_init(0, &dev_pd); + + err = check_module_params(); + if (err) { + pr_err("Failed to load module, invalid module parameters, err: %d\n", + err); + return err; + } + chunk_pool = mempool_create_page_pool(sess_queue_depth * CHUNK_POOL_SZ, + get_order(max_chunk_size)); + if (!chunk_pool) + return -ENOMEM; + rtrs_dev_class = class_create(THIS_MODULE, "rtrs-server"); + if (IS_ERR(rtrs_dev_class)) { + err = PTR_ERR(rtrs_dev_class); + goto out_chunk_pool; + } + rtrs_wq = alloc_workqueue("rtrs_server_wq", WQ_MEM_RECLAIM, 0); + if (!rtrs_wq) + goto out_dev_class; + + return 0; + +out_dev_class: + class_destroy(rtrs_dev_class); +out_chunk_pool: + mempool_destroy(chunk_pool); + + return err; +} + +static void __exit rtrs_server_exit(void) +{ + destroy_workqueue(rtrs_wq); + class_destroy(rtrs_dev_class); + mempool_destroy(chunk_pool); + rtrs_rdma_dev_pd_deinit(&dev_pd); +} + +module_init(rtrs_server_init); +module_exit(rtrs_server_exit); From c4f07c60bb021dd76382457e23d72ca078bb6f13 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:17 +0200 Subject: [PATCH 339/562] RDMA/rtrs: server: statistics functions This introduces set of functions used on server side to account statistics of RDMA data sent/received. Link: https://lore.kernel.org/r/20200511135131.27580-12-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c | 38 ++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c new file mode 100644 index 000000000000..e102b1368d0c --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RDMA Transport Layer + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt + +#include "rtrs-srv.h" + +int rtrs_srv_reset_rdma_stats(struct rtrs_srv_stats *stats, bool enable) +{ + if (enable) { + struct rtrs_srv_stats_rdma_stats *r = &stats->rdma_stats; + + memset(r, 0, sizeof(*r)); + return 0; + } + + return -EINVAL; +} + +ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, + char *page, size_t len) +{ + struct rtrs_srv_stats_rdma_stats *r = &stats->rdma_stats; + struct rtrs_srv_sess *sess = stats->sess; + + return scnprintf(page, len, "%lld %lld %lld %lld %u\n", + (s64)atomic64_read(&r->dir[READ].cnt), + (s64)atomic64_read(&r->dir[READ].size_total), + (s64)atomic64_read(&r->dir[WRITE].cnt), + (s64)atomic64_read(&r->dir[WRITE].size_total), + atomic_read(&sess->ids_inflight)); +} From 91b11610af8d61acd618ab1532cf34a4901fee1e Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:18 +0200 Subject: [PATCH 340/562] RDMA/rtrs: server: sysfs interface functions This is the sysfs interface to rtrs sessions on server side: /sys/class/rtrs-server// *** rtrs session accepted from a client peer | |- paths// *** established paths from a client in a session | |- disconnect | *** disconnect path | |- hca_name | *** HCA name | |- hca_port | *** HCA port | |- stats/ *** current path statistics | |- rdma Link: https://lore.kernel.org/r/20200511135131.27580-13-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c | 320 +++++++++++++++++++ 1 file changed, 320 insertions(+) create mode 100644 drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c new file mode 100644 index 000000000000..0cf015634338 --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RDMA Transport Layer + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt + +#include "rtrs-pri.h" +#include "rtrs-srv.h" +#include "rtrs-log.h" + +static void rtrs_srv_release(struct kobject *kobj) +{ + struct rtrs_srv_sess *sess; + + sess = container_of(kobj, struct rtrs_srv_sess, kobj); + kfree(sess); +} + +static struct kobj_type ktype = { + .sysfs_ops = &kobj_sysfs_ops, + .release = rtrs_srv_release, +}; + +static ssize_t rtrs_srv_disconnect_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *page) +{ + return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", + attr->attr.name); +} + +static ssize_t rtrs_srv_disconnect_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rtrs_srv_sess *sess; + struct rtrs_sess *s; + char str[MAXHOSTNAMELEN]; + + sess = container_of(kobj, struct rtrs_srv_sess, kobj); + s = &sess->s; + if (!sysfs_streq(buf, "1")) { + rtrs_err(s, "%s: invalid value: '%s'\n", + attr->attr.name, buf); + return -EINVAL; + } + + sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, str, sizeof(str)); + + rtrs_info(s, "disconnect for path %s requested\n", str); + close_sess(sess); + + return count; +} + +static struct kobj_attribute rtrs_srv_disconnect_attr = + __ATTR(disconnect, 0644, + rtrs_srv_disconnect_show, rtrs_srv_disconnect_store); + +static ssize_t rtrs_srv_hca_port_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *page) +{ + struct rtrs_srv_sess *sess; + struct rtrs_con *usr_con; + + sess = container_of(kobj, typeof(*sess), kobj); + usr_con = sess->s.con[0]; + + return scnprintf(page, PAGE_SIZE, "%u\n", + usr_con->cm_id->port_num); +} + +static struct kobj_attribute rtrs_srv_hca_port_attr = + __ATTR(hca_port, 0444, rtrs_srv_hca_port_show, NULL); + +static ssize_t rtrs_srv_hca_name_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *page) +{ + struct rtrs_srv_sess *sess; + + sess = container_of(kobj, struct rtrs_srv_sess, kobj); + + return scnprintf(page, PAGE_SIZE, "%s\n", + sess->s.dev->ib_dev->name); +} + +static struct kobj_attribute rtrs_srv_hca_name_attr = + __ATTR(hca_name, 0444, rtrs_srv_hca_name_show, NULL); + +static ssize_t rtrs_srv_src_addr_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *page) +{ + struct rtrs_srv_sess *sess; + int cnt; + + sess = container_of(kobj, struct rtrs_srv_sess, kobj); + cnt = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, + page, PAGE_SIZE); + return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); +} + +static struct kobj_attribute rtrs_srv_src_addr_attr = + __ATTR(src_addr, 0444, rtrs_srv_src_addr_show, NULL); + +static ssize_t rtrs_srv_dst_addr_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *page) +{ + struct rtrs_srv_sess *sess; + int cnt; + + sess = container_of(kobj, struct rtrs_srv_sess, kobj); + cnt = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, + page, PAGE_SIZE); + return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); +} + +static struct kobj_attribute rtrs_srv_dst_addr_attr = + __ATTR(dst_addr, 0444, rtrs_srv_dst_addr_show, NULL); + +static struct attribute *rtrs_srv_sess_attrs[] = { + &rtrs_srv_hca_name_attr.attr, + &rtrs_srv_hca_port_attr.attr, + &rtrs_srv_src_addr_attr.attr, + &rtrs_srv_dst_addr_attr.attr, + &rtrs_srv_disconnect_attr.attr, + NULL, +}; + +static struct attribute_group rtrs_srv_sess_attr_group = { + .attrs = rtrs_srv_sess_attrs, +}; + +STAT_ATTR(struct rtrs_srv_stats, rdma, + rtrs_srv_stats_rdma_to_str, + rtrs_srv_reset_rdma_stats); + +static struct attribute *rtrs_srv_stats_attrs[] = { + &rdma_attr.attr, + NULL, +}; + +static struct attribute_group rtrs_srv_stats_attr_group = { + .attrs = rtrs_srv_stats_attrs, +}; + +static void rtrs_srv_dev_release(struct device *dev) +{ + struct rtrs_srv *srv = container_of(dev, struct rtrs_srv, dev); + + kfree(srv); +} + +static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_sess *sess) +{ + struct rtrs_srv *srv = sess->srv; + int err = 0; + + mutex_lock(&srv->paths_mutex); + if (srv->dev_ref++) { + /* + * Device needs to be registered only on the first session + */ + goto unlock; + } + srv->dev.class = rtrs_dev_class; + srv->dev.release = rtrs_srv_dev_release; + err = dev_set_name(&srv->dev, "%s", sess->s.sessname); + if (err) + goto unlock; + + /* + * Suppress user space notification until + * sysfs files are created + */ + dev_set_uevent_suppress(&srv->dev, true); + err = device_register(&srv->dev); + if (err) { + pr_err("device_register(): %d\n", err); + goto put; + } + srv->kobj_paths = kobject_create_and_add("paths", &srv->dev.kobj); + if (!srv->kobj_paths) { + pr_err("kobject_create_and_add(): %d\n", err); + device_unregister(&srv->dev); + goto unlock; + } + dev_set_uevent_suppress(&srv->dev, false); + kobject_uevent(&srv->dev.kobj, KOBJ_ADD); + goto unlock; + +put: + put_device(&srv->dev); +unlock: + mutex_unlock(&srv->paths_mutex); + + return err; +} + +static void +rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_sess *sess) +{ + struct rtrs_srv *srv = sess->srv; + + mutex_lock(&srv->paths_mutex); + if (!--srv->dev_ref) { + kobject_del(srv->kobj_paths); + kobject_put(srv->kobj_paths); + mutex_unlock(&srv->paths_mutex); + device_unregister(&srv->dev); + } else { + mutex_unlock(&srv->paths_mutex); + } +} + +static void rtrs_srv_sess_stats_release(struct kobject *kobj) +{ + struct rtrs_srv_stats *stats; + + stats = container_of(kobj, struct rtrs_srv_stats, kobj_stats); + + kfree(stats); +} + +static struct kobj_type ktype_stats = { + .sysfs_ops = &kobj_sysfs_ops, + .release = rtrs_srv_sess_stats_release, +}; + +static int rtrs_srv_create_stats_files(struct rtrs_srv_sess *sess) +{ + int err; + struct rtrs_sess *s = &sess->s; + + err = kobject_init_and_add(&sess->stats->kobj_stats, &ktype_stats, + &sess->kobj, "stats"); + if (err) { + rtrs_err(s, "kobject_init_and_add(): %d\n", err); + return err; + } + err = sysfs_create_group(&sess->stats->kobj_stats, + &rtrs_srv_stats_attr_group); + if (err) { + rtrs_err(s, "sysfs_create_group(): %d\n", err); + goto err; + } + + return 0; + +err: + kobject_del(&sess->stats->kobj_stats); + kobject_put(&sess->stats->kobj_stats); + + return err; +} + +int rtrs_srv_create_sess_files(struct rtrs_srv_sess *sess) +{ + struct rtrs_srv *srv = sess->srv; + struct rtrs_sess *s = &sess->s; + char str[NAME_MAX]; + int err, cnt; + + cnt = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, + str, sizeof(str)); + cnt += scnprintf(str + cnt, sizeof(str) - cnt, "@"); + sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, + str + cnt, sizeof(str) - cnt); + + err = rtrs_srv_create_once_sysfs_root_folders(sess); + if (err) + return err; + + err = kobject_init_and_add(&sess->kobj, &ktype, srv->kobj_paths, + "%s", str); + if (err) { + rtrs_err(s, "kobject_init_and_add(): %d\n", err); + goto destroy_root; + } + err = sysfs_create_group(&sess->kobj, &rtrs_srv_sess_attr_group); + if (err) { + rtrs_err(s, "sysfs_create_group(): %d\n", err); + goto put_kobj; + } + err = rtrs_srv_create_stats_files(sess); + if (err) + goto remove_group; + + return 0; + +remove_group: + sysfs_remove_group(&sess->kobj, &rtrs_srv_sess_attr_group); +put_kobj: + kobject_del(&sess->kobj); + kobject_put(&sess->kobj); +destroy_root: + rtrs_srv_destroy_once_sysfs_root_folders(sess); + + return err; +} + +void rtrs_srv_destroy_sess_files(struct rtrs_srv_sess *sess) +{ + if (sess->kobj.state_in_sysfs) { + kobject_del(&sess->stats->kobj_stats); + kobject_put(&sess->stats->kobj_stats); + kobject_del(&sess->kobj); + kobject_put(&sess->kobj); + + rtrs_srv_destroy_once_sysfs_root_folders(sess); + } +} From c013fbc1fd341d28269cf0a6b465925186b9a1e1 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:19 +0200 Subject: [PATCH 341/562] RDMA/rtrs: include client and server modules into kernel compilation Add rtrs Makefile, Kconfig and also corresponding lines into upper layer infiniband/ulp files. Link: https://lore.kernel.org/r/20200511135131.27580-14-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/Kconfig | 1 + drivers/infiniband/ulp/Makefile | 1 + drivers/infiniband/ulp/rtrs/Kconfig | 27 +++++++++++++++++++++++++++ drivers/infiniband/ulp/rtrs/Makefile | 15 +++++++++++++++ 4 files changed, 44 insertions(+) create mode 100644 drivers/infiniband/ulp/rtrs/Kconfig create mode 100644 drivers/infiniband/ulp/rtrs/Makefile diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index ade86388434f..477418b37786 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -107,6 +107,7 @@ source "drivers/infiniband/ulp/srpt/Kconfig" source "drivers/infiniband/ulp/iser/Kconfig" source "drivers/infiniband/ulp/isert/Kconfig" +source "drivers/infiniband/ulp/rtrs/Kconfig" source "drivers/infiniband/ulp/opa_vnic/Kconfig" diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile index 437813c7b481..4d0004b58377 100644 --- a/drivers/infiniband/ulp/Makefile +++ b/drivers/infiniband/ulp/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_INFINIBAND_SRPT) += srpt/ obj-$(CONFIG_INFINIBAND_ISER) += iser/ obj-$(CONFIG_INFINIBAND_ISERT) += isert/ obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic/ +obj-$(CONFIG_INFINIBAND_RTRS) += rtrs/ diff --git a/drivers/infiniband/ulp/rtrs/Kconfig b/drivers/infiniband/ulp/rtrs/Kconfig new file mode 100644 index 000000000000..9092b62e6dc8 --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/Kconfig @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +config INFINIBAND_RTRS + tristate + depends on INFINIBAND_ADDR_TRANS + +config INFINIBAND_RTRS_CLIENT + tristate "RTRS client module" + depends on INFINIBAND_ADDR_TRANS + select INFINIBAND_RTRS + help + RDMA transport client module. + + RDMA Transport (RTRS) client implements a reliable transport layer + and also multipathing functionality and that it is intended to be + the base layer for a block storage initiator over RDMA. + +config INFINIBAND_RTRS_SERVER + tristate "RTRS server module" + depends on INFINIBAND_ADDR_TRANS + select INFINIBAND_RTRS + help + RDMA transport server module. + + RDMA Transport (RTRS) server module processing connection and IO + requests received from the RTRS client module, it will pass the + IO requests to its user eg. RNBD_server. diff --git a/drivers/infiniband/ulp/rtrs/Makefile b/drivers/infiniband/ulp/rtrs/Makefile new file mode 100644 index 000000000000..3898509be270 --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +rtrs-client-y := rtrs-clt.o \ + rtrs-clt-stats.o \ + rtrs-clt-sysfs.o + +rtrs-server-y := rtrs-srv.o \ + rtrs-srv-stats.o \ + rtrs-srv-sysfs.o + +rtrs-core-y := rtrs.o + +obj-$(CONFIG_INFINIBAND_RTRS) += rtrs-core.o +obj-$(CONFIG_INFINIBAND_RTRS_CLIENT) += rtrs-client.o +obj-$(CONFIG_INFINIBAND_RTRS_SERVER) += rtrs-server.o From 745b6a3d4a673c0b8de6e7c15b0620117614b75b Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:20 +0200 Subject: [PATCH 342/562] RDMA/rtrs: a bit of documentation README with description of major sysfs entries, sysfs documentation has been moved to ABI dir as suggested by Bart. Link: https://lore.kernel.org/r/20200511135131.27580-15-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Cc: linux-kernel@vger.kernel.org Signed-off-by: Jason Gunthorpe --- .../ABI/testing/sysfs-class-rtrs-client | 131 +++++++++++ .../ABI/testing/sysfs-class-rtrs-server | 53 +++++ drivers/infiniband/ulp/rtrs/README | 213 ++++++++++++++++++ 3 files changed, 397 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-class-rtrs-client create mode 100644 Documentation/ABI/testing/sysfs-class-rtrs-server create mode 100644 drivers/infiniband/ulp/rtrs/README diff --git a/Documentation/ABI/testing/sysfs-class-rtrs-client b/Documentation/ABI/testing/sysfs-class-rtrs-client new file mode 100644 index 000000000000..e7e718db8941 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-rtrs-client @@ -0,0 +1,131 @@ +What: /sys/class/rtrs-client +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: When a user of RTRS API creates a new session, a directory entry with + the name of that session is created under /sys/class/rtrs-client// + +What: /sys/class/rtrs-client//add_path +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: RW, adds a new path (connection) to an existing session. Expected format is the + following: + + <[source addr,]destination addr> + *addr ::= [ ip: | gid: ] + +What: /sys/class/rtrs-client//max_reconnect_attempts +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: Maximum number reconnect attempts the client should make before giving up + after connection breaks unexpectedly. + +What: /sys/class/rtrs-client//mp_policy +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: Multipath policy specifies which path should be selected on each IO: + + round-robin (0): + select path in per CPU round-robin manner. + + min-inflight (1): + select path with minimum inflights. + +What: /sys/class/rtrs-client//paths/ +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: Each path belonging to a given session is listed here by its source and + destination address. When a new path is added to a session by writing to + the "add_path" entry, a directory is created. + +What: /sys/class/rtrs-client//paths//state +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: RO, Contains "connected" if the session is connected to the peer and fully + functional. Otherwise the file contains "disconnected" + +What: /sys/class/rtrs-client//paths//reconnect +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: Write "1" to the file in order to reconnect the path. + Operation is blocking and returns 0 if reconnect was successful. + +What: /sys/class/rtrs-client//paths//disconnect +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: Write "1" to the file in order to disconnect the path. + Operation blocks until RTRS path is disconnected. + +What: /sys/class/rtrs-client//paths//remove_path +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: Write "1" to the file in order to disconnected and remove the path + from the session. Operation blocks until the path is disconnected + and removed from the session. + +What: /sys/class/rtrs-client//paths//hca_name +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: RO, Contains the the name of HCA the connection established on. + +What: /sys/class/rtrs-client//paths//hca_port +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: RO, Contains the port number of active port traffic is going through. + +What: /sys/class/rtrs-client//paths//src_addr +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: RO, Contains the source address of the path + +What: /sys/class/rtrs-client//paths//dst_addr +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: RO, Contains the destination address of the path + +What: /sys/class/rtrs-client//paths//stats/reset_all +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: RW, Read will return usage help, write 0 will clear all the statistics. + +What: /sys/class/rtrs-client//paths//stats/cpu_migration +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: RTRS expects that each HCA IRQ is pinned to a separate CPU. If it's + not the case, the processing of an I/O response could be processed on a + different CPU than where it was originally submitted. This file shows + how many interrupts where generated on a non expected CPU. + "from:" is the CPU on which the IRQ was expected, but not generated. + "to:" is the CPU on which the IRQ was generated, but not expected. + +What: /sys/class/rtrs-client//paths//stats/reconnects +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: Contains 2 unsigned int values, the first one records number of successful + reconnects in the path lifetime, the second one records number of failed + reconnects in the path lifetime. + +What: /sys/class/rtrs-client//paths//stats/rdma +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: Contains statistics regarding rdma operations and inflight operations. + The output consists of 6 values: + + \ + diff --git a/Documentation/ABI/testing/sysfs-class-rtrs-server b/Documentation/ABI/testing/sysfs-class-rtrs-server new file mode 100644 index 000000000000..3b6d5b067df0 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-rtrs-server @@ -0,0 +1,53 @@ +What: /sys/class/rtrs-server +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: When a user of RTRS API creates a new session on a client side, a + directory entry with the name of that session is created in here. + +What: /sys/class/rtrs-server//paths/ +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: When new path is created by writing to "add_path" entry on client side, + a directory entry named as @ is created + on server. + +What: /sys/class/rtrs-server//paths//disconnect +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: When "1" is written to the file, the RTRS session is being disconnected. + Operations is non-blocking and returns control immediately to the caller. + +What: /sys/class/rtrs-server//paths//hca_name +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: RO, Contains the the name of HCA the connection established on. + +What: /sys/class/rtrs-server//paths//hca_port +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: RO, Contains the port number of active port traffic is going through. + +What: /sys/class/rtrs-server//paths//src_addr +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: RO, Contains the source address of the path + +What: /sys/class/rtrs-server//paths//dst_addr +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: RO, Contains the destination address of the path + +What: /sys/class/rtrs-server//paths//stats/rdma +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: Contains statistics regarding rdma operations and inflight operations. + The output consists of 5 values: + diff --git a/drivers/infiniband/ulp/rtrs/README b/drivers/infiniband/ulp/rtrs/README new file mode 100644 index 000000000000..5d9ea142e5dd --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/README @@ -0,0 +1,213 @@ +**************************** +RDMA Transport (RTRS) +**************************** + +RTRS (RDMA Transport) is a reliable high speed transport library +which provides support to establish optimal number of connections +between client and server machines using RDMA (InfiniBand, RoCE, iWarp) +transport. It is optimized to transfer (read/write) IO blocks. + +In its core interface it follows the BIO semantics of providing the +possibility to either write data from an sg list to the remote side +or to request ("read") data transfer from the remote side into a given +sg list. + +RTRS provides I/O fail-over and load-balancing capabilities by using +multipath I/O (see "add_path" and "mp_policy" configuration entries in +Documentation/ABI/testing/sysfs-class-rtrs-client). + +RTRS is used by the RNBD (RDMA Network Block Device) modules. + +================== +Transport protocol +================== + +Overview +-------- +An established connection between a client and a server is called rtrs +session. A session is associated with a set of memory chunks reserved on the +server side for a given client for rdma transfer. A session +consists of multiple paths, each representing a separate physical link +between client and server. Those are used for load balancing and failover. +Each path consists of as many connections (QPs) as there are cpus on +the client. + +When processing an incoming write or read request, rtrs client uses memory +chunks reserved for him on the server side. Their number, size and addresses +need to be exchanged between client and server during the connection +establishment phase. Apart from the memory related information client needs to +inform the server about the session name and identify each path and connection +individually. + +On an established session client sends to server write or read messages. +Server uses immediate field to tell the client which request is being +acknowledged and for errno. Client uses immediate field to tell the server +which of the memory chunks has been accessed and at which offset the message +can be found. + +Module parameter always_invalidate is introduced for the security problem +discussed in LPC RDMA MC 2019. When always_invalidate=Y, on the server side we +invalidate each rdma buffer before we hand it over to RNBD server and +then pass it to the block layer. A new rkey is generated and registered for the +buffer after it returns back from the block layer and RNBD server. +The new rkey is sent back to the client along with the IO result. +The procedure is the default behaviour of the driver. This invalidation and +registration on each IO causes performance drop of up to 20%. A user of the +driver may choose to load the modules with this mechanism switched off +(always_invalidate=N), if he understands and can take the risk of a malicious +client being able to corrupt memory of a server it is connected to. This might +be a reasonable option in a scenario where all the clients and all the servers +are located within a secure datacenter. + + +Connection establishment +------------------------ + +1. Client starts establishing connections belonging to a path of a session one +by one via attaching RTRS_MSG_CON_REQ messages to the rdma_connect requests. +Those include uuid of the session and uuid of the path to be +established. They are used by the server to find a persisting session/path or +to create a new one when necessary. The message also contains the protocol +version and magic for compatibility, total number of connections per session +(as many as cpus on the client), the id of the current connection and +the reconnect counter, which is used to resolve the situations where +client is trying to reconnect a path, while server is still destroying the old +one. + +2. Server accepts the connection requests one by one and attaches +RTRS_MSG_CONN_RSP messages to the rdma_accept. Apart from magic and +protocol version, the messages include error code, queue depth supported by +the server (number of memory chunks which are going to be allocated for that +session) and the maximum size of one io, RTRS_MSG_NEW_RKEY_F flags is set +when always_invalidate=Y. + +3. After all connections of a path are established client sends to server the +RTRS_MSG_INFO_REQ message, containing the name of the session. This message +requests the address information from the server. + +4. Server replies to the session info request message with RTRS_MSG_INFO_RSP, +which contains the addresses and keys of the RDMA buffers allocated for that +session. + +5. Session becomes connected after all paths to be established are connected +(i.e. steps 1-4 finished for all paths requested for a session) + +6. Server and client exchange periodically heartbeat messages (empty rdma +messages with an immediate field) which are used to detect a crash on remote +side or network outage in an absence of IO. + +7. On any RDMA related error or in the case of a heartbeat timeout, the +corresponding path is disconnected, all the inflight IO are failed over to a +healthy path, if any, and the reconnect mechanism is triggered. + +CLT SRV +*for each connection belonging to a path and for each path: +RTRS_MSG_CON_REQ -------------------> + <------------------- RTRS_MSG_CON_RSP +... +*after all connections are established: +RTRS_MSG_INFO_REQ -------------------> + <------------------- RTRS_MSG_INFO_RSP +*heartbeat is started from both sides: + -------------------> [RTRS_HB_MSG_IMM] +[RTRS_HB_MSG_ACK] <------------------- +[RTRS_HB_MSG_IMM] <------------------- + -------------------> [RTRS_HB_MSG_ACK] + +IO path +------- + +* Write (always_invalidate=N) * + +1. When processing a write request client selects one of the memory chunks +on the server side and rdma writes there the user data, user header and the +RTRS_MSG_RDMA_WRITE message. Apart from the type (write), the message only +contains size of the user header. The client tells the server which chunk has +been accessed and at what offset the RTRS_MSG_RDMA_WRITE can be found by +using the IMM field. + +2. When confirming a write request server sends an "empty" rdma message with +an immediate field. The 32 bit field is used to specify the outstanding +inflight IO and for the error code. + +CLT SRV +usr_data + usr_hdr + rtrs_msg_rdma_write -----------------> [RTRS_IO_REQ_IMM] +[RTRS_IO_RSP_IMM] <----------------- (id + errno) + +* Write (always_invalidate=Y) * + +1. When processing a write request client selects one of the memory chunks +on the server side and rdma writes there the user data, user header and the +RTRS_MSG_RDMA_WRITE message. Apart from the type (write), the message only +contains size of the user header. The client tells the server which chunk has +been accessed and at what offset the RTRS_MSG_RDMA_WRITE can be found by +using the IMM field, Server invalidate rkey associated to the memory chunks +first, when it finishes, pass the IO to RNBD server module. + +2. When confirming a write request server sends an "empty" rdma message with +an immediate field. The 32 bit field is used to specify the outstanding +inflight IO and for the error code. The new rkey is sent back using +SEND_WITH_IMM WR, client When it recived new rkey message, it validates +the message and finished IO after update rkey for the rbuffer, then post +back the recv buffer for later use. + +CLT SRV +usr_data + usr_hdr + rtrs_msg_rdma_write -----------------> [RTRS_IO_REQ_IMM] +[RTRS_MSG_RKEY_RSP] <----------------- (RTRS_MSG_RKEY_RSP) +[RTRS_IO_RSP_IMM] <----------------- (id + errno) + + +* Read (always_invalidate=N)* + +1. When processing a read request client selects one of the memory chunks +on the server side and rdma writes there the user header and the +RTRS_MSG_RDMA_READ message. This message contains the type (read), size of +the user header, flags (specifying if memory invalidation is necessary) and the +list of addresses along with keys for the data to be read into. + +2. When confirming a read request server transfers the requested data first, +attaches an invalidation message if requested and finally an "empty" rdma +message with an immediate field. The 32 bit field is used to specify the +outstanding inflight IO and the error code. + +CLT SRV +usr_hdr + rtrs_msg_rdma_read --------------> [RTRS_IO_REQ_IMM] +[RTRS_IO_RSP_IMM] <-------------- usr_data + (id + errno) +or in case client requested invalidation: +[RTRS_IO_RSP_IMM_W_INV] <-------------- usr_data + (INV) + (id + errno) + +* Read (always_invalidate=Y)* + +1. When processing a read request client selects one of the memory chunks +on the server side and rdma writes there the user header and the +RTRS_MSG_RDMA_READ message. This message contains the type (read), size of +the user header, flags (specifying if memory invalidation is necessary) and the +list of addresses along with keys for the data to be read into. +Server invalidate rkey associated to the memory chunks first, when it finishes, +passes the IO to RNBD server module. + +2. When confirming a read request server transfers the requested data first, +attaches an invalidation message if requested and finally an "empty" rdma +message with an immediate field. The 32 bit field is used to specify the +outstanding inflight IO and the error code. The new rkey is sent back using +SEND_WITH_IMM WR, client When it recived new rkey message, it validates +the message and finished IO after update rkey for the rbuffer, then post +back the recv buffer for later use. + +CLT SRV +usr_hdr + rtrs_msg_rdma_read --------------> [RTRS_IO_REQ_IMM] +[RTRS_IO_RSP_IMM] <-------------- usr_data + (id + errno) +[RTRS_MSG_RKEY_RSP] <----------------- (RTRS_MSG_RKEY_RSP) +or in case client requested invalidation: +[RTRS_IO_RSP_IMM_W_INV] <-------------- usr_data + (INV) + (id + errno) +========================================= +Contributors List(in alphabetical order) +========================================= +Danil Kipnis +Fabian Holler +Guoqing Jiang +Jack Wang +Kleber Souza +Lutz Pogrell +Milind Dumbare +Roman Penyaev From 219ace60770117fbe440904f9156ab2ab8f30e7d Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:21 +0200 Subject: [PATCH 343/562] block/rnbd: private headers with rnbd protocol structs and helpers These are common private headers with rnbd protocol structures, logging, sysfs and other helper functions, which are used on both client and server sides. Link: https://lore.kernel.org/r/20200511135131.27580-16-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Reviewed-by: Bart Van Assche Acked-by: Jens Axboe Signed-off-by: Jason Gunthorpe --- drivers/block/rnbd/rnbd-common.c | 23 +++ drivers/block/rnbd/rnbd-log.h | 41 +++++ drivers/block/rnbd/rnbd-proto.h | 303 +++++++++++++++++++++++++++++++ 3 files changed, 367 insertions(+) create mode 100644 drivers/block/rnbd/rnbd-common.c create mode 100644 drivers/block/rnbd/rnbd-log.h create mode 100644 drivers/block/rnbd/rnbd-proto.h diff --git a/drivers/block/rnbd/rnbd-common.c b/drivers/block/rnbd/rnbd-common.c new file mode 100644 index 000000000000..596c3f732403 --- /dev/null +++ b/drivers/block/rnbd/rnbd-common.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RDMA Network Block Driver + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ +#include "rnbd-proto.h" + +const char *rnbd_access_mode_str(enum rnbd_access_mode mode) +{ + switch (mode) { + case RNBD_ACCESS_RO: + return "ro"; + case RNBD_ACCESS_RW: + return "rw"; + case RNBD_ACCESS_MIGRATION: + return "migration"; + default: + return "unknown"; + } +} diff --git a/drivers/block/rnbd/rnbd-log.h b/drivers/block/rnbd/rnbd-log.h new file mode 100644 index 000000000000..136e7d6c3451 --- /dev/null +++ b/drivers/block/rnbd/rnbd-log.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * RDMA Network Block Driver + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ +#ifndef RNBD_LOG_H +#define RNBD_LOG_H + +#include "rnbd-clt.h" +#include "rnbd-srv.h" + +#define rnbd_clt_log(fn, dev, fmt, ...) ( \ + fn("<%s@%s> " fmt, (dev)->pathname, \ + (dev)->sess->sessname, \ + ##__VA_ARGS__)) +#define rnbd_srv_log(fn, dev, fmt, ...) ( \ + fn("<%s@%s>: " fmt, (dev)->pathname, \ + (dev)->sess->sessname, ##__VA_ARGS__)) + +#define rnbd_clt_err(dev, fmt, ...) \ + rnbd_clt_log(pr_err, dev, fmt, ##__VA_ARGS__) +#define rnbd_clt_err_rl(dev, fmt, ...) \ + rnbd_clt_log(pr_err_ratelimited, dev, fmt, ##__VA_ARGS__) +#define rnbd_clt_info(dev, fmt, ...) \ + rnbd_clt_log(pr_info, dev, fmt, ##__VA_ARGS__) +#define rnbd_clt_info_rl(dev, fmt, ...) \ + rnbd_clt_log(pr_info_ratelimited, dev, fmt, ##__VA_ARGS__) + +#define rnbd_srv_err(dev, fmt, ...) \ + rnbd_srv_log(pr_err, dev, fmt, ##__VA_ARGS__) +#define rnbd_srv_err_rl(dev, fmt, ...) \ + rnbd_srv_log(pr_err_ratelimited, dev, fmt, ##__VA_ARGS__) +#define rnbd_srv_info(dev, fmt, ...) \ + rnbd_srv_log(pr_info, dev, fmt, ##__VA_ARGS__) +#define rnbd_srv_info_rl(dev, fmt, ...) \ + rnbd_srv_log(pr_info_ratelimited, dev, fmt, ##__VA_ARGS__) + +#endif /* RNBD_LOG_H */ diff --git a/drivers/block/rnbd/rnbd-proto.h b/drivers/block/rnbd/rnbd-proto.h new file mode 100644 index 000000000000..ca166241452c --- /dev/null +++ b/drivers/block/rnbd/rnbd-proto.h @@ -0,0 +1,303 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * RDMA Network Block Driver + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ +#ifndef RNBD_PROTO_H +#define RNBD_PROTO_H + +#include +#include +#include +#include +#include +#include +#include + +#define RNBD_PROTO_VER_MAJOR 2 +#define RNBD_PROTO_VER_MINOR 0 + +/* The default port number the RTRS server is listening on. */ +#define RTRS_PORT 1234 + +/** + * enum rnbd_msg_types - RNBD message types + * @RNBD_MSG_SESS_INFO: initial session info from client to server + * @RNBD_MSG_SESS_INFO_RSP: initial session info from server to client + * @RNBD_MSG_OPEN: open (map) device request + * @RNBD_MSG_OPEN_RSP: response to an @RNBD_MSG_OPEN + * @RNBD_MSG_IO: block IO request operation + * @RNBD_MSG_CLOSE: close (unmap) device request + */ +enum rnbd_msg_type { + RNBD_MSG_SESS_INFO, + RNBD_MSG_SESS_INFO_RSP, + RNBD_MSG_OPEN, + RNBD_MSG_OPEN_RSP, + RNBD_MSG_IO, + RNBD_MSG_CLOSE, +}; + +/** + * struct rnbd_msg_hdr - header of RNBD messages + * @type: Message type, valid values see: enum rnbd_msg_types + */ +struct rnbd_msg_hdr { + __le16 type; + __le16 __padding; +}; + +/** + * We allow to map RO many times and RW only once. We allow to map yet another + * time RW, if MIGRATION is provided (second RW export can be required for + * example for VM migration) + */ +enum rnbd_access_mode { + RNBD_ACCESS_RO, + RNBD_ACCESS_RW, + RNBD_ACCESS_MIGRATION, +}; + +/** + * struct rnbd_msg_sess_info - initial session info from client to server + * @hdr: message header + * @ver: RNBD protocol version + */ +struct rnbd_msg_sess_info { + struct rnbd_msg_hdr hdr; + u8 ver; + u8 reserved[31]; +}; + +/** + * struct rnbd_msg_sess_info_rsp - initial session info from server to client + * @hdr: message header + * @ver: RNBD protocol version + */ +struct rnbd_msg_sess_info_rsp { + struct rnbd_msg_hdr hdr; + u8 ver; + u8 reserved[31]; +}; + +/** + * struct rnbd_msg_open - request to open a remote device. + * @hdr: message header + * @access_mode: the mode to open remote device, valid values see: + * enum rnbd_access_mode + * @device_name: device path on remote side + */ +struct rnbd_msg_open { + struct rnbd_msg_hdr hdr; + u8 access_mode; + u8 resv1; + s8 dev_name[NAME_MAX]; + u8 reserved[3]; +}; + +/** + * struct rnbd_msg_close - request to close a remote device. + * @hdr: message header + * @device_id: device_id on server side to identify the device + */ +struct rnbd_msg_close { + struct rnbd_msg_hdr hdr; + __le32 device_id; +}; + +/** + * struct rnbd_msg_open_rsp - response message to RNBD_MSG_OPEN + * @hdr: message header + * @device_id: device_id on server side to identify the device + * @nsectors: number of sectors in the usual 512b unit + * @max_hw_sectors: max hardware sectors in the usual 512b unit + * @max_write_same_sectors: max sectors for WRITE SAME in the 512b unit + * @max_discard_sectors: max. sectors that can be discarded at once in 512b + * unit. + * @discard_granularity: size of the internal discard allocation unit in bytes + * @discard_alignment: offset from internal allocation assignment in bytes + * @physical_block_size: physical block size device supports in bytes + * @logical_block_size: logical block size device supports in bytes + * @max_segments: max segments hardware support in one transfer + * @secure_discard: supports secure discard + * @rotation: is a rotational disc? + */ +struct rnbd_msg_open_rsp { + struct rnbd_msg_hdr hdr; + __le32 device_id; + __le64 nsectors; + __le32 max_hw_sectors; + __le32 max_write_same_sectors; + __le32 max_discard_sectors; + __le32 discard_granularity; + __le32 discard_alignment; + __le16 physical_block_size; + __le16 logical_block_size; + __le16 max_segments; + __le16 secure_discard; + u8 rotational; + u8 reserved[11]; +}; + +/** + * struct rnbd_msg_io - message for I/O read/write + * @hdr: message header + * @device_id: device_id on server side to find the right device + * @sector: bi_sector attribute from struct bio + * @rw: valid values are defined in enum rnbd_io_flags + * @bi_size: number of bytes for I/O read/write + * @prio: priority + */ +struct rnbd_msg_io { + struct rnbd_msg_hdr hdr; + __le32 device_id; + __le64 sector; + __le32 rw; + __le32 bi_size; + __le16 prio; +}; + +#define RNBD_OP_BITS 8 +#define RNBD_OP_MASK ((1 << RNBD_OP_BITS) - 1) + +/** + * enum rnbd_io_flags - RNBD request types from rq_flag_bits + * @RNBD_OP_READ: read sectors from the device + * @RNBD_OP_WRITE: write sectors to the device + * @RNBD_OP_FLUSH: flush the volatile write cache + * @RNBD_OP_DISCARD: discard sectors + * @RNBD_OP_SECURE_ERASE: securely erase sectors + * @RNBD_OP_WRITE_SAME: write the same sectors many times + + * @RNBD_F_SYNC: request is sync (sync write or read) + * @RNBD_F_FUA: forced unit access + */ +enum rnbd_io_flags { + + /* Operations */ + + RNBD_OP_READ = 0, + RNBD_OP_WRITE = 1, + RNBD_OP_FLUSH = 2, + RNBD_OP_DISCARD = 3, + RNBD_OP_SECURE_ERASE = 4, + RNBD_OP_WRITE_SAME = 5, + + RNBD_OP_LAST, + + /* Flags */ + + RNBD_F_SYNC = 1<<(RNBD_OP_BITS + 0), + RNBD_F_FUA = 1<<(RNBD_OP_BITS + 1), + + RNBD_F_ALL = (RNBD_F_SYNC | RNBD_F_FUA) + +}; + +static inline u32 rnbd_op(u32 flags) +{ + return flags & RNBD_OP_MASK; +} + +static inline u32 rnbd_flags(u32 flags) +{ + return flags & ~RNBD_OP_MASK; +} + +static inline bool rnbd_flags_supported(u32 flags) +{ + u32 op; + + op = rnbd_op(flags); + flags = rnbd_flags(flags); + + if (op >= RNBD_OP_LAST) + return false; + if (flags & ~RNBD_F_ALL) + return false; + + return true; +} + +static inline u32 rnbd_to_bio_flags(u32 rnbd_opf) +{ + u32 bio_opf; + + switch (rnbd_op(rnbd_opf)) { + case RNBD_OP_READ: + bio_opf = REQ_OP_READ; + break; + case RNBD_OP_WRITE: + bio_opf = REQ_OP_WRITE; + break; + case RNBD_OP_FLUSH: + bio_opf = REQ_OP_FLUSH | REQ_PREFLUSH; + break; + case RNBD_OP_DISCARD: + bio_opf = REQ_OP_DISCARD; + break; + case RNBD_OP_SECURE_ERASE: + bio_opf = REQ_OP_SECURE_ERASE; + break; + case RNBD_OP_WRITE_SAME: + bio_opf = REQ_OP_WRITE_SAME; + break; + default: + WARN(1, "Unknown RNBD type: %d (flags %d)\n", + rnbd_op(rnbd_opf), rnbd_opf); + bio_opf = 0; + } + + if (rnbd_opf & RNBD_F_SYNC) + bio_opf |= REQ_SYNC; + + if (rnbd_opf & RNBD_F_FUA) + bio_opf |= REQ_FUA; + + return bio_opf; +} + +static inline u32 rq_to_rnbd_flags(struct request *rq) +{ + u32 rnbd_opf; + + switch (req_op(rq)) { + case REQ_OP_READ: + rnbd_opf = RNBD_OP_READ; + break; + case REQ_OP_WRITE: + rnbd_opf = RNBD_OP_WRITE; + break; + case REQ_OP_DISCARD: + rnbd_opf = RNBD_OP_DISCARD; + break; + case REQ_OP_SECURE_ERASE: + rnbd_opf = RNBD_OP_SECURE_ERASE; + break; + case REQ_OP_WRITE_SAME: + rnbd_opf = RNBD_OP_WRITE_SAME; + break; + case REQ_OP_FLUSH: + rnbd_opf = RNBD_OP_FLUSH; + break; + default: + WARN(1, "Unknown request type %d (flags %llu)\n", + req_op(rq), (unsigned long long)rq->cmd_flags); + rnbd_opf = 0; + } + + if (op_is_sync(rq->cmd_flags)) + rnbd_opf |= RNBD_F_SYNC; + + if (op_is_flush(rq->cmd_flags)) + rnbd_opf |= RNBD_F_FUA; + + return rnbd_opf; +} + +const char *rnbd_access_mode_str(enum rnbd_access_mode mode); + +#endif /* RNBD_PROTO_H */ From 90426e89f54dbb8f77d94604a06d0643dd0c3eb9 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:22 +0200 Subject: [PATCH 344/562] block/rnbd: client: private header with client structs and functions This header describes main structs and functions used by rnbd-client module, mainly for managing RNBD sessions and mapped block devices, creating and destroying sysfs entries. Link: https://lore.kernel.org/r/20200511135131.27580-17-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Reviewed-by: Bart Van Assche Acked-by: Jens Axboe Signed-off-by: Jason Gunthorpe --- drivers/block/rnbd/rnbd-clt.h | 156 ++++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) create mode 100644 drivers/block/rnbd/rnbd-clt.h diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h new file mode 100644 index 000000000000..ed33654aa486 --- /dev/null +++ b/drivers/block/rnbd/rnbd-clt.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * RDMA Network Block Driver + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ + +#ifndef RNBD_CLT_H +#define RNBD_CLT_H + +#include +#include +#include +#include +#include + +#include +#include "rnbd-proto.h" +#include "rnbd-log.h" + +/* Max. number of segments per IO request, Mellanox Connect X ~ Connect X5, + * choose minimial 30 for all, minus 1 for internal protocol, so 29. + */ +#define BMAX_SEGMENTS 29 +/* time in seconds between reconnect tries, default to 30 s */ +#define RECONNECT_DELAY 30 +/* + * Number of times to reconnect on error before giving up, 0 for * disabled, + * -1 for forever + */ +#define MAX_RECONNECTS -1 + +enum rnbd_clt_dev_state { + DEV_STATE_INIT, + DEV_STATE_MAPPED, + DEV_STATE_MAPPED_DISCONNECTED, + DEV_STATE_UNMAPPED, +}; + +struct rnbd_iu_comp { + wait_queue_head_t wait; + int errno; +}; + +struct rnbd_iu { + union { + struct request *rq; /* for block io */ + void *buf; /* for user messages */ + }; + struct rtrs_permit *permit; + union { + /* use to send msg associated with a dev */ + struct rnbd_clt_dev *dev; + /* use to send msg associated with a sess */ + struct rnbd_clt_session *sess; + }; + struct scatterlist sglist[BMAX_SEGMENTS]; + struct work_struct work; + int errno; + struct rnbd_iu_comp comp; + atomic_t refcount; +}; + +struct rnbd_cpu_qlist { + struct list_head requeue_list; + spinlock_t requeue_lock; + unsigned int cpu; +}; + +struct rnbd_clt_session { + struct list_head list; + struct rtrs_clt *rtrs; + wait_queue_head_t rtrs_waitq; + bool rtrs_ready; + struct rnbd_cpu_qlist __percpu + *cpu_queues; + DECLARE_BITMAP(cpu_queues_bm, NR_CPUS); + int __percpu *cpu_rr; /* per-cpu var for CPU round-robin */ + atomic_t busy; + int queue_depth; + u32 max_io_size; + struct blk_mq_tag_set tag_set; + struct mutex lock; /* protects state and devs_list */ + struct list_head devs_list; /* list of struct rnbd_clt_dev */ + refcount_t refcount; + char sessname[NAME_MAX]; + u8 ver; /* protocol version */ +}; + +/** + * Submission queues. + */ +struct rnbd_queue { + struct list_head requeue_list; + unsigned long in_list; + struct rnbd_clt_dev *dev; + struct blk_mq_hw_ctx *hctx; +}; + +struct rnbd_clt_dev { + struct rnbd_clt_session *sess; + struct request_queue *queue; + struct rnbd_queue *hw_queues; + u32 device_id; + /* local Idr index - used to track minor number allocations. */ + u32 clt_device_id; + struct mutex lock; + enum rnbd_clt_dev_state dev_state; + char pathname[NAME_MAX]; + enum rnbd_access_mode access_mode; + bool read_only; + bool rotational; + u32 max_hw_sectors; + u32 max_write_same_sectors; + u32 max_discard_sectors; + u32 discard_granularity; + u32 discard_alignment; + u16 secure_discard; + u16 physical_block_size; + u16 logical_block_size; + u16 max_segments; + size_t nsectors; + u64 size; /* device size in bytes */ + struct list_head list; + struct gendisk *gd; + struct kobject kobj; + char blk_symlink_name[NAME_MAX]; + refcount_t refcount; + struct work_struct unmap_on_rmmod_work; +}; + +/* rnbd-clt.c */ + +struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname, + struct rtrs_addr *paths, + size_t path_cnt, u16 port_nr, + const char *pathname, + enum rnbd_access_mode access_mode); +int rnbd_clt_unmap_device(struct rnbd_clt_dev *dev, bool force, + const struct attribute *sysfs_self); + +int rnbd_clt_remap_device(struct rnbd_clt_dev *dev); +int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, size_t newsize); + +/* rnbd-clt-sysfs.c */ + +int rnbd_clt_create_sysfs_files(void); + +void rnbd_clt_destroy_sysfs_files(void); +void rnbd_clt_destroy_default_group(void); + +void rnbd_clt_remove_dev_symlink(struct rnbd_clt_dev *dev); + +#endif /* RNBD_CLT_H */ From f7a7a5c228d45efc45d6e26a199a3ea13d2f8754 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:23 +0200 Subject: [PATCH 345/562] block/rnbd: client: main functionality This is main functionality of rnbd-client module, which provides interface to map remote device as local block device /dev/rnbd and feeds RTRS with IO requests. Link: https://lore.kernel.org/r/20200511135131.27580-18-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Reviewed-by: Bart Van Assche Acked-by: Jens Axboe Signed-off-by: Jason Gunthorpe --- drivers/block/rnbd/rnbd-clt.c | 1729 +++++++++++++++++++++++++++++++++ 1 file changed, 1729 insertions(+) create mode 100644 drivers/block/rnbd/rnbd-clt.c diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c new file mode 100644 index 000000000000..55bff3b1be71 --- /dev/null +++ b/drivers/block/rnbd/rnbd-clt.c @@ -0,0 +1,1729 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RDMA Network Block Driver + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ + +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt + +#include +#include +#include +#include +#include + +#include "rnbd-clt.h" + +MODULE_DESCRIPTION("RDMA Network Block Device Client"); +MODULE_LICENSE("GPL"); + +static int rnbd_client_major; +static DEFINE_IDA(index_ida); +static DEFINE_MUTEX(ida_lock); +static DEFINE_MUTEX(sess_lock); +static LIST_HEAD(sess_list); + +/* + * Maximum number of partitions an instance can have. + * 6 bits = 64 minors = 63 partitions (one minor is used for the device itself) + */ +#define RNBD_PART_BITS 6 + +static inline bool rnbd_clt_get_sess(struct rnbd_clt_session *sess) +{ + return refcount_inc_not_zero(&sess->refcount); +} + +static void free_sess(struct rnbd_clt_session *sess); + +static void rnbd_clt_put_sess(struct rnbd_clt_session *sess) +{ + might_sleep(); + + if (refcount_dec_and_test(&sess->refcount)) + free_sess(sess); +} + +static void rnbd_clt_put_dev(struct rnbd_clt_dev *dev) +{ + might_sleep(); + + if (!refcount_dec_and_test(&dev->refcount)) + return; + + mutex_lock(&ida_lock); + ida_simple_remove(&index_ida, dev->clt_device_id); + mutex_unlock(&ida_lock); + kfree(dev->hw_queues); + rnbd_clt_put_sess(dev->sess); + mutex_destroy(&dev->lock); + kfree(dev); +} + +static inline bool rnbd_clt_get_dev(struct rnbd_clt_dev *dev) +{ + return refcount_inc_not_zero(&dev->refcount); +} + +static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev, + const struct rnbd_msg_open_rsp *rsp) +{ + struct rnbd_clt_session *sess = dev->sess; + + if (!rsp->logical_block_size) + return -EINVAL; + + dev->device_id = le32_to_cpu(rsp->device_id); + dev->nsectors = le64_to_cpu(rsp->nsectors); + dev->logical_block_size = le16_to_cpu(rsp->logical_block_size); + dev->physical_block_size = le16_to_cpu(rsp->physical_block_size); + dev->max_write_same_sectors = le32_to_cpu(rsp->max_write_same_sectors); + dev->max_discard_sectors = le32_to_cpu(rsp->max_discard_sectors); + dev->discard_granularity = le32_to_cpu(rsp->discard_granularity); + dev->discard_alignment = le32_to_cpu(rsp->discard_alignment); + dev->secure_discard = le16_to_cpu(rsp->secure_discard); + dev->rotational = rsp->rotational; + + dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE; + dev->max_segments = BMAX_SEGMENTS; + + dev->max_hw_sectors = min_t(u32, dev->max_hw_sectors, + le32_to_cpu(rsp->max_hw_sectors)); + dev->max_segments = min_t(u16, dev->max_segments, + le16_to_cpu(rsp->max_segments)); + + return 0; +} + +static int rnbd_clt_change_capacity(struct rnbd_clt_dev *dev, + size_t new_nsectors) +{ + int err = 0; + + rnbd_clt_info(dev, "Device size changed from %zu to %zu sectors\n", + dev->nsectors, new_nsectors); + dev->nsectors = new_nsectors; + set_capacity(dev->gd, dev->nsectors); + err = revalidate_disk(dev->gd); + if (err) + rnbd_clt_err(dev, + "Failed to change device size from %zu to %zu, err: %d\n", + dev->nsectors, new_nsectors, err); + return err; +} + +static int process_msg_open_rsp(struct rnbd_clt_dev *dev, + struct rnbd_msg_open_rsp *rsp) +{ + int err = 0; + + mutex_lock(&dev->lock); + if (dev->dev_state == DEV_STATE_UNMAPPED) { + rnbd_clt_info(dev, + "Ignoring Open-Response message from server for unmapped device\n"); + err = -ENOENT; + goto out; + } + if (dev->dev_state == DEV_STATE_MAPPED_DISCONNECTED) { + u64 nsectors = le64_to_cpu(rsp->nsectors); + + /* + * If the device was remapped and the size changed in the + * meantime we need to revalidate it + */ + if (dev->nsectors != nsectors) + rnbd_clt_change_capacity(dev, nsectors); + rnbd_clt_info(dev, "Device online, device remapped successfully\n"); + } + err = rnbd_clt_set_dev_attr(dev, rsp); + if (err) + goto out; + dev->dev_state = DEV_STATE_MAPPED; + +out: + mutex_unlock(&dev->lock); + + return err; +} + +int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, size_t newsize) +{ + int ret = 0; + + mutex_lock(&dev->lock); + if (dev->dev_state != DEV_STATE_MAPPED) { + pr_err("Failed to set new size of the device, device is not opened\n"); + ret = -ENOENT; + goto out; + } + ret = rnbd_clt_change_capacity(dev, newsize); + +out: + mutex_unlock(&dev->lock); + + return ret; +} + +static inline void rnbd_clt_dev_requeue(struct rnbd_queue *q) +{ + if (WARN_ON(!q->hctx)) + return; + + /* We can come here from interrupt, thus async=true */ + blk_mq_run_hw_queue(q->hctx, true); +} + +enum { + RNBD_DELAY_IFBUSY = -1, +}; + +/** + * rnbd_get_cpu_qlist() - finds a list with HW queues to be rerun + * @sess: Session to find a queue for + * @cpu: Cpu to start the search from + * + * Description: + * Each CPU has a list of HW queues, which needs to be rerun. If a list + * is not empty - it is marked with a bit. This function finds first + * set bit in a bitmap and returns corresponding CPU list. + */ +static struct rnbd_cpu_qlist * +rnbd_get_cpu_qlist(struct rnbd_clt_session *sess, int cpu) +{ + int bit; + + /* Search from cpu to nr_cpu_ids */ + bit = find_next_bit(sess->cpu_queues_bm, nr_cpu_ids, cpu); + if (bit < nr_cpu_ids) { + return per_cpu_ptr(sess->cpu_queues, bit); + } else if (cpu != 0) { + /* Search from 0 to cpu */ + bit = find_next_bit(sess->cpu_queues_bm, cpu, 0); + if (bit < cpu) + return per_cpu_ptr(sess->cpu_queues, bit); + } + + return NULL; +} + +static inline int nxt_cpu(int cpu) +{ + return (cpu + 1) % nr_cpu_ids; +} + +/** + * rnbd_rerun_if_needed() - rerun next queue marked as stopped + * @sess: Session to rerun a queue on + * + * Description: + * Each CPU has it's own list of HW queues, which should be rerun. + * Function finds such list with HW queues, takes a list lock, picks up + * the first HW queue out of the list and requeues it. + * + * Return: + * True if the queue was requeued, false otherwise. + * + * Context: + * Does not matter. + */ +static bool rnbd_rerun_if_needed(struct rnbd_clt_session *sess) +{ + struct rnbd_queue *q = NULL; + struct rnbd_cpu_qlist *cpu_q; + unsigned long flags; + int *cpup; + + /* + * To keep fairness and not to let other queues starve we always + * try to wake up someone else in round-robin manner. That of course + * increases latency but queues always have a chance to be executed. + */ + cpup = get_cpu_ptr(sess->cpu_rr); + for (cpu_q = rnbd_get_cpu_qlist(sess, nxt_cpu(*cpup)); cpu_q; + cpu_q = rnbd_get_cpu_qlist(sess, nxt_cpu(cpu_q->cpu))) { + if (!spin_trylock_irqsave(&cpu_q->requeue_lock, flags)) + continue; + if (unlikely(!test_bit(cpu_q->cpu, sess->cpu_queues_bm))) + goto unlock; + q = list_first_entry_or_null(&cpu_q->requeue_list, + typeof(*q), requeue_list); + if (WARN_ON(!q)) + goto clear_bit; + list_del_init(&q->requeue_list); + clear_bit_unlock(0, &q->in_list); + + if (list_empty(&cpu_q->requeue_list)) { + /* Clear bit if nothing is left */ +clear_bit: + clear_bit(cpu_q->cpu, sess->cpu_queues_bm); + } +unlock: + spin_unlock_irqrestore(&cpu_q->requeue_lock, flags); + + if (q) + break; + } + + /** + * Saves the CPU that is going to be requeued on the per-cpu var. Just + * incrementing it doesn't work because rnbd_get_cpu_qlist() will + * always return the first CPU with something on the queue list when the + * value stored on the var is greater than the last CPU with something + * on the list. + */ + if (cpu_q) + *cpup = cpu_q->cpu; + put_cpu_var(sess->cpu_rr); + + if (q) + rnbd_clt_dev_requeue(q); + + return q; +} + +/** + * rnbd_rerun_all_if_idle() - rerun all queues left in the list if + * session is idling (there are no requests + * in-flight). + * @sess: Session to rerun the queues on + * + * Description: + * This function tries to rerun all stopped queues if there are no + * requests in-flight anymore. This function tries to solve an obvious + * problem, when number of tags < than number of queues (hctx), which + * are stopped and put to sleep. If last permit, which has been just put, + * does not wake up all left queues (hctxs), IO requests hang forever. + * + * That can happen when all number of permits, say N, have been exhausted + * from one CPU, and we have many block devices per session, say M. + * Each block device has it's own queue (hctx) for each CPU, so eventually + * we can put that number of queues (hctxs) to sleep: M x nr_cpu_ids. + * If number of permits N < M x nr_cpu_ids finally we will get an IO hang. + * + * To avoid this hang last caller of rnbd_put_permit() (last caller is the + * one who observes sess->busy == 0) must wake up all remaining queues. + * + * Context: + * Does not matter. + */ +static void rnbd_rerun_all_if_idle(struct rnbd_clt_session *sess) +{ + bool requeued; + + do { + requeued = rnbd_rerun_if_needed(sess); + } while (atomic_read(&sess->busy) == 0 && requeued); +} + +static struct rtrs_permit *rnbd_get_permit(struct rnbd_clt_session *sess, + enum rtrs_clt_con_type con_type, + int wait) +{ + struct rtrs_permit *permit; + + permit = rtrs_clt_get_permit(sess->rtrs, con_type, + wait ? RTRS_PERMIT_WAIT : + RTRS_PERMIT_NOWAIT); + if (likely(permit)) + /* We have a subtle rare case here, when all permits can be + * consumed before busy counter increased. This is safe, + * because loser will get NULL as a permit, observe 0 busy + * counter and immediately restart the queue himself. + */ + atomic_inc(&sess->busy); + + return permit; +} + +static void rnbd_put_permit(struct rnbd_clt_session *sess, + struct rtrs_permit *permit) +{ + rtrs_clt_put_permit(sess->rtrs, permit); + atomic_dec(&sess->busy); + /* Paired with rnbd_clt_dev_add_to_requeue(). Decrement first + * and then check queue bits. + */ + smp_mb__after_atomic(); + rnbd_rerun_all_if_idle(sess); +} + +static struct rnbd_iu *rnbd_get_iu(struct rnbd_clt_session *sess, + enum rtrs_clt_con_type con_type, + int wait) +{ + struct rnbd_iu *iu; + struct rtrs_permit *permit; + + permit = rnbd_get_permit(sess, con_type, + wait ? RTRS_PERMIT_WAIT : + RTRS_PERMIT_NOWAIT); + if (unlikely(!permit)) + return NULL; + iu = rtrs_permit_to_pdu(permit); + iu->permit = permit; + /* + * 1st reference is dropped after finishing sending a "user" message, + * 2nd reference is dropped after confirmation with the response is + * returned. + * 1st and 2nd can happen in any order, so the rnbd_iu should be + * released (rtrs_permit returned to ibbtrs) only leased after both + * are finished. + */ + atomic_set(&iu->refcount, 2); + init_waitqueue_head(&iu->comp.wait); + iu->comp.errno = INT_MAX; + + return iu; +} + +static void rnbd_put_iu(struct rnbd_clt_session *sess, struct rnbd_iu *iu) +{ + if (atomic_dec_and_test(&iu->refcount)) + rnbd_put_permit(sess, iu->permit); +} + +static void rnbd_softirq_done_fn(struct request *rq) +{ + struct rnbd_clt_dev *dev = rq->rq_disk->private_data; + struct rnbd_clt_session *sess = dev->sess; + struct rnbd_iu *iu; + + iu = blk_mq_rq_to_pdu(rq); + rnbd_put_permit(sess, iu->permit); + blk_mq_end_request(rq, errno_to_blk_status(iu->errno)); +} + +static void msg_io_conf(void *priv, int errno) +{ + struct rnbd_iu *iu = priv; + struct rnbd_clt_dev *dev = iu->dev; + struct request *rq = iu->rq; + int rw = rq_data_dir(rq); + + iu->errno = errno; + + blk_mq_complete_request(rq); + + if (errno) + rnbd_clt_info_rl(dev, "%s I/O failed with err: %d\n", + rw == READ ? "read" : "write", errno); +} + +static void wake_up_iu_comp(struct rnbd_iu *iu, int errno) +{ + iu->comp.errno = errno; + wake_up(&iu->comp.wait); +} + +static void msg_conf(void *priv, int errno) +{ + struct rnbd_iu *iu = priv; + + iu->errno = errno; + schedule_work(&iu->work); +} + +enum wait_type { + NO_WAIT = 0, + WAIT = 1 +}; + +static int send_usr_msg(struct rtrs_clt *rtrs, int dir, + struct rnbd_iu *iu, struct kvec *vec, size_t nr, + size_t len, struct scatterlist *sg, unsigned int sg_len, + void (*conf)(struct work_struct *work), + int *errno, enum wait_type wait) +{ + int err; + struct rtrs_clt_req_ops req_ops; + + INIT_WORK(&iu->work, conf); + req_ops = (struct rtrs_clt_req_ops) { + .priv = iu, + .conf_fn = msg_conf, + }; + err = rtrs_clt_request(dir, &req_ops, rtrs, iu->permit, + vec, nr, len, sg, sg_len); + if (!err && wait) { + wait_event(iu->comp.wait, iu->comp.errno != INT_MAX); + *errno = iu->comp.errno; + } else { + *errno = 0; + } + + return err; +} + +static void msg_close_conf(struct work_struct *work) +{ + struct rnbd_iu *iu = container_of(work, struct rnbd_iu, work); + struct rnbd_clt_dev *dev = iu->dev; + + wake_up_iu_comp(iu, iu->errno); + rnbd_put_iu(dev->sess, iu); + rnbd_clt_put_dev(dev); +} + +static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait) +{ + struct rnbd_clt_session *sess = dev->sess; + struct rnbd_msg_close msg; + struct rnbd_iu *iu; + struct kvec vec = { + .iov_base = &msg, + .iov_len = sizeof(msg) + }; + int err, errno; + + iu = rnbd_get_iu(sess, RTRS_ADMIN_CON, RTRS_PERMIT_WAIT); + if (!iu) + return -ENOMEM; + + iu->buf = NULL; + iu->dev = dev; + + sg_mark_end(&iu->sglist[0]); + + msg.hdr.type = cpu_to_le16(RNBD_MSG_CLOSE); + msg.device_id = cpu_to_le32(device_id); + + WARN_ON(!rnbd_clt_get_dev(dev)); + err = send_usr_msg(sess->rtrs, WRITE, iu, &vec, 1, 0, NULL, 0, + msg_close_conf, &errno, wait); + if (err) { + rnbd_clt_put_dev(dev); + rnbd_put_iu(sess, iu); + } else { + err = errno; + } + + rnbd_put_iu(sess, iu); + return err; +} + +static void msg_open_conf(struct work_struct *work) +{ + struct rnbd_iu *iu = container_of(work, struct rnbd_iu, work); + struct rnbd_msg_open_rsp *rsp = iu->buf; + struct rnbd_clt_dev *dev = iu->dev; + int errno = iu->errno; + + if (errno) { + rnbd_clt_err(dev, + "Opening failed, server responded: %d\n", + errno); + } else { + errno = process_msg_open_rsp(dev, rsp); + if (errno) { + u32 device_id = le32_to_cpu(rsp->device_id); + /* + * If server thinks its fine, but we fail to process + * then be nice and send a close to server. + */ + (void)send_msg_close(dev, device_id, NO_WAIT); + } + } + kfree(rsp); + wake_up_iu_comp(iu, errno); + rnbd_put_iu(dev->sess, iu); + rnbd_clt_put_dev(dev); +} + +static void msg_sess_info_conf(struct work_struct *work) +{ + struct rnbd_iu *iu = container_of(work, struct rnbd_iu, work); + struct rnbd_msg_sess_info_rsp *rsp = iu->buf; + struct rnbd_clt_session *sess = iu->sess; + + if (!iu->errno) + sess->ver = min_t(u8, rsp->ver, RNBD_PROTO_VER_MAJOR); + + kfree(rsp); + wake_up_iu_comp(iu, iu->errno); + rnbd_put_iu(sess, iu); + rnbd_clt_put_sess(sess); +} + +static int send_msg_open(struct rnbd_clt_dev *dev, bool wait) +{ + struct rnbd_clt_session *sess = dev->sess; + struct rnbd_msg_open_rsp *rsp; + struct rnbd_msg_open msg; + struct rnbd_iu *iu; + struct kvec vec = { + .iov_base = &msg, + .iov_len = sizeof(msg) + }; + int err, errno; + + rsp = kzalloc(sizeof(*rsp), GFP_KERNEL); + if (!rsp) + return -ENOMEM; + + iu = rnbd_get_iu(sess, RTRS_ADMIN_CON, RTRS_PERMIT_WAIT); + if (!iu) { + kfree(rsp); + return -ENOMEM; + } + + iu->buf = rsp; + iu->dev = dev; + + sg_init_one(iu->sglist, rsp, sizeof(*rsp)); + + msg.hdr.type = cpu_to_le16(RNBD_MSG_OPEN); + msg.access_mode = dev->access_mode; + strlcpy(msg.dev_name, dev->pathname, sizeof(msg.dev_name)); + + WARN_ON(!rnbd_clt_get_dev(dev)); + err = send_usr_msg(sess->rtrs, READ, iu, + &vec, 1, sizeof(*rsp), iu->sglist, 1, + msg_open_conf, &errno, wait); + if (err) { + rnbd_clt_put_dev(dev); + rnbd_put_iu(sess, iu); + kfree(rsp); + } else { + err = errno; + } + + rnbd_put_iu(sess, iu); + return err; +} + +static int send_msg_sess_info(struct rnbd_clt_session *sess, bool wait) +{ + struct rnbd_msg_sess_info_rsp *rsp; + struct rnbd_msg_sess_info msg; + struct rnbd_iu *iu; + struct kvec vec = { + .iov_base = &msg, + .iov_len = sizeof(msg) + }; + int err, errno; + + rsp = kzalloc(sizeof(*rsp), GFP_KERNEL); + if (!rsp) + return -ENOMEM; + + iu = rnbd_get_iu(sess, RTRS_ADMIN_CON, RTRS_PERMIT_WAIT); + if (!iu) { + kfree(rsp); + return -ENOMEM; + } + + iu->buf = rsp; + iu->sess = sess; + + sg_init_one(iu->sglist, rsp, sizeof(*rsp)); + + msg.hdr.type = cpu_to_le16(RNBD_MSG_SESS_INFO); + msg.ver = RNBD_PROTO_VER_MAJOR; + + if (!rnbd_clt_get_sess(sess)) { + /* + * That can happen only in one case, when RTRS has restablished + * the connection and link_ev() is called, but session is almost + * dead, last reference on session is put and caller is waiting + * for RTRS to close everything. + */ + err = -ENODEV; + goto put_iu; + } + err = send_usr_msg(sess->rtrs, READ, iu, + &vec, 1, sizeof(*rsp), iu->sglist, 1, + msg_sess_info_conf, &errno, wait); + if (err) { + rnbd_clt_put_sess(sess); +put_iu: + rnbd_put_iu(sess, iu); + kfree(rsp); + } else { + err = errno; + } + + rnbd_put_iu(sess, iu); + return err; +} + +static void set_dev_states_to_disconnected(struct rnbd_clt_session *sess) +{ + struct rnbd_clt_dev *dev; + + mutex_lock(&sess->lock); + list_for_each_entry(dev, &sess->devs_list, list) { + rnbd_clt_err(dev, "Device disconnected.\n"); + + mutex_lock(&dev->lock); + if (dev->dev_state == DEV_STATE_MAPPED) + dev->dev_state = DEV_STATE_MAPPED_DISCONNECTED; + mutex_unlock(&dev->lock); + } + mutex_unlock(&sess->lock); +} + +static void remap_devs(struct rnbd_clt_session *sess) +{ + struct rnbd_clt_dev *dev; + struct rtrs_attrs attrs; + int err; + + /* + * Careful here: we are called from RTRS link event directly, + * thus we can't send any RTRS request and wait for response + * or RTRS will not be able to complete request with failure + * if something goes wrong (failing of outstanding requests + * happens exactly from the context where we are blocking now). + * + * So to avoid deadlocks each usr message sent from here must + * be asynchronous. + */ + + err = send_msg_sess_info(sess, NO_WAIT); + if (err) { + pr_err("send_msg_sess_info(\"%s\"): %d\n", sess->sessname, err); + return; + } + + rtrs_clt_query(sess->rtrs, &attrs); + mutex_lock(&sess->lock); + sess->max_io_size = attrs.max_io_size; + + list_for_each_entry(dev, &sess->devs_list, list) { + bool skip; + + mutex_lock(&dev->lock); + skip = (dev->dev_state == DEV_STATE_INIT); + mutex_unlock(&dev->lock); + if (skip) + /* + * When device is establishing connection for the first + * time - do not remap, it will be closed soon. + */ + continue; + + rnbd_clt_info(dev, "session reconnected, remapping device\n"); + err = send_msg_open(dev, NO_WAIT); + if (err) { + rnbd_clt_err(dev, "send_msg_open(): %d\n", err); + break; + } + } + mutex_unlock(&sess->lock); +} + +static void rnbd_clt_link_ev(void *priv, enum rtrs_clt_link_ev ev) +{ + struct rnbd_clt_session *sess = priv; + + switch (ev) { + case RTRS_CLT_LINK_EV_DISCONNECTED: + set_dev_states_to_disconnected(sess); + break; + case RTRS_CLT_LINK_EV_RECONNECTED: + remap_devs(sess); + break; + default: + pr_err("Unknown session event received (%d), session: %s\n", + ev, sess->sessname); + } +} + +static void rnbd_init_cpu_qlists(struct rnbd_cpu_qlist __percpu *cpu_queues) +{ + unsigned int cpu; + struct rnbd_cpu_qlist *cpu_q; + + for_each_possible_cpu(cpu) { + cpu_q = per_cpu_ptr(cpu_queues, cpu); + + cpu_q->cpu = cpu; + INIT_LIST_HEAD(&cpu_q->requeue_list); + spin_lock_init(&cpu_q->requeue_lock); + } +} + +static void destroy_mq_tags(struct rnbd_clt_session *sess) +{ + if (sess->tag_set.tags) + blk_mq_free_tag_set(&sess->tag_set); +} + +static inline void wake_up_rtrs_waiters(struct rnbd_clt_session *sess) +{ + sess->rtrs_ready = true; + wake_up_all(&sess->rtrs_waitq); +} + +static void close_rtrs(struct rnbd_clt_session *sess) +{ + might_sleep(); + + if (!IS_ERR_OR_NULL(sess->rtrs)) { + rtrs_clt_close(sess->rtrs); + sess->rtrs = NULL; + wake_up_rtrs_waiters(sess); + } +} + +static void free_sess(struct rnbd_clt_session *sess) +{ + WARN_ON(!list_empty(&sess->devs_list)); + + might_sleep(); + + close_rtrs(sess); + destroy_mq_tags(sess); + if (!list_empty(&sess->list)) { + mutex_lock(&sess_lock); + list_del(&sess->list); + mutex_unlock(&sess_lock); + } + free_percpu(sess->cpu_queues); + free_percpu(sess->cpu_rr); + mutex_destroy(&sess->lock); + kfree(sess); +} + +static struct rnbd_clt_session *alloc_sess(const char *sessname) +{ + struct rnbd_clt_session *sess; + int err, cpu; + + sess = kzalloc_node(sizeof(*sess), GFP_KERNEL, NUMA_NO_NODE); + if (!sess) + return ERR_PTR(-ENOMEM); + strlcpy(sess->sessname, sessname, sizeof(sess->sessname)); + atomic_set(&sess->busy, 0); + mutex_init(&sess->lock); + INIT_LIST_HEAD(&sess->devs_list); + INIT_LIST_HEAD(&sess->list); + bitmap_zero(sess->cpu_queues_bm, NR_CPUS); + init_waitqueue_head(&sess->rtrs_waitq); + refcount_set(&sess->refcount, 1); + + sess->cpu_queues = alloc_percpu(struct rnbd_cpu_qlist); + if (!sess->cpu_queues) { + err = -ENOMEM; + goto err; + } + rnbd_init_cpu_qlists(sess->cpu_queues); + + /* + * That is simple percpu variable which stores cpu indeces, which are + * incremented on each access. We need that for the sake of fairness + * to wake up queues in a round-robin manner. + */ + sess->cpu_rr = alloc_percpu(int); + if (!sess->cpu_rr) { + err = -ENOMEM; + goto err; + } + for_each_possible_cpu(cpu) + * per_cpu_ptr(sess->cpu_rr, cpu) = cpu; + + return sess; + +err: + free_sess(sess); + + return ERR_PTR(err); +} + +static int wait_for_rtrs_connection(struct rnbd_clt_session *sess) +{ + wait_event(sess->rtrs_waitq, sess->rtrs_ready); + if (IS_ERR_OR_NULL(sess->rtrs)) + return -ECONNRESET; + + return 0; +} + +static void wait_for_rtrs_disconnection(struct rnbd_clt_session *sess) + __releases(&sess_lock) + __acquires(&sess_lock) +{ + DEFINE_WAIT(wait); + + prepare_to_wait(&sess->rtrs_waitq, &wait, TASK_UNINTERRUPTIBLE); + if (IS_ERR_OR_NULL(sess->rtrs)) { + finish_wait(&sess->rtrs_waitq, &wait); + return; + } + mutex_unlock(&sess_lock); + /* loop in caller, see __find_and_get_sess(). + * You can't leave mutex locked and call schedule(), you will catch a + * deadlock with a caller of free_sess(), which has just put the last + * reference and is about to take the sess_lock in order to delete + * the session from the list. + */ + schedule(); + mutex_lock(&sess_lock); +} + +static struct rnbd_clt_session *__find_and_get_sess(const char *sessname) + __releases(&sess_lock) + __acquires(&sess_lock) +{ + struct rnbd_clt_session *sess, *sn; + int err; + +again: + list_for_each_entry_safe(sess, sn, &sess_list, list) { + if (strcmp(sessname, sess->sessname)) + continue; + + if (sess->rtrs_ready && IS_ERR_OR_NULL(sess->rtrs)) + /* + * No RTRS connection, session is dying. + */ + continue; + + if (rnbd_clt_get_sess(sess)) { + /* + * Alive session is found, wait for RTRS connection. + */ + mutex_unlock(&sess_lock); + err = wait_for_rtrs_connection(sess); + if (err) + rnbd_clt_put_sess(sess); + mutex_lock(&sess_lock); + + if (err) + /* Session is dying, repeat the loop */ + goto again; + + return sess; + } + /* + * Ref is 0, session is dying, wait for RTRS disconnect + * in order to avoid session names clashes. + */ + wait_for_rtrs_disconnection(sess); + /* + * RTRS is disconnected and soon session will be freed, + * so repeat a loop. + */ + goto again; + } + + return NULL; +} + +static struct +rnbd_clt_session *find_or_create_sess(const char *sessname, bool *first) +{ + struct rnbd_clt_session *sess = NULL; + + mutex_lock(&sess_lock); + sess = __find_and_get_sess(sessname); + if (!sess) { + sess = alloc_sess(sessname); + if (sess) { + list_add(&sess->list, &sess_list); + *first = true; + } else { + mutex_unlock(&sess_lock); + return ERR_PTR(-ENOMEM); + } + } else + *first = false; + mutex_unlock(&sess_lock); + + return sess; +} + +static int rnbd_client_open(struct block_device *block_device, fmode_t mode) +{ + struct rnbd_clt_dev *dev = block_device->bd_disk->private_data; + + if (dev->read_only && (mode & FMODE_WRITE)) + return -EPERM; + + if (dev->dev_state == DEV_STATE_UNMAPPED || + !rnbd_clt_get_dev(dev)) + return -EIO; + + return 0; +} + +static void rnbd_client_release(struct gendisk *gen, fmode_t mode) +{ + struct rnbd_clt_dev *dev = gen->private_data; + + rnbd_clt_put_dev(dev); +} + +static int rnbd_client_getgeo(struct block_device *block_device, + struct hd_geometry *geo) +{ + u64 size; + struct rnbd_clt_dev *dev; + + dev = block_device->bd_disk->private_data; + size = dev->size * (dev->logical_block_size / SECTOR_SIZE); + geo->cylinders = size >> 6; /* size/64 */ + geo->heads = 4; + geo->sectors = 16; + geo->start = 0; + + return 0; +} + +static const struct block_device_operations rnbd_client_ops = { + .owner = THIS_MODULE, + .open = rnbd_client_open, + .release = rnbd_client_release, + .getgeo = rnbd_client_getgeo +}; + +/* The amount of data that belongs to an I/O and the amount of data that + * should be read or written to the disk (bi_size) can differ. + * + * E.g. When WRITE_SAME is used, only a small amount of data is + * transferred that is then written repeatedly over a lot of sectors. + * + * Get the size of data to be transferred via RTRS by summing up the size + * of the scather-gather list entries. + */ +static size_t rnbd_clt_get_sg_size(struct scatterlist *sglist, u32 len) +{ + struct scatterlist *sg; + size_t tsize = 0; + int i; + + for_each_sg(sglist, sg, len, i) + tsize += sg->length; + return tsize; +} + +static int rnbd_client_xfer_request(struct rnbd_clt_dev *dev, + struct request *rq, + struct rnbd_iu *iu) +{ + struct rtrs_clt *rtrs = dev->sess->rtrs; + struct rtrs_permit *permit = iu->permit; + struct rnbd_msg_io msg; + struct rtrs_clt_req_ops req_ops; + unsigned int sg_cnt = 0; + struct kvec vec; + size_t size; + int err; + + iu->rq = rq; + iu->dev = dev; + msg.sector = cpu_to_le64(blk_rq_pos(rq)); + msg.bi_size = cpu_to_le32(blk_rq_bytes(rq)); + msg.rw = cpu_to_le32(rq_to_rnbd_flags(rq)); + msg.prio = cpu_to_le16(req_get_ioprio(rq)); + + /* + * We only support discards with single segment for now. + * See queue limits. + */ + if (req_op(rq) != REQ_OP_DISCARD) + sg_cnt = blk_rq_map_sg(dev->queue, rq, iu->sglist); + + if (sg_cnt == 0) + /* Do not forget to mark the end */ + sg_mark_end(&iu->sglist[0]); + + msg.hdr.type = cpu_to_le16(RNBD_MSG_IO); + msg.device_id = cpu_to_le32(dev->device_id); + + vec = (struct kvec) { + .iov_base = &msg, + .iov_len = sizeof(msg) + }; + size = rnbd_clt_get_sg_size(iu->sglist, sg_cnt); + req_ops = (struct rtrs_clt_req_ops) { + .priv = iu, + .conf_fn = msg_io_conf, + }; + err = rtrs_clt_request(rq_data_dir(rq), &req_ops, rtrs, permit, + &vec, 1, size, iu->sglist, sg_cnt); + if (unlikely(err)) { + rnbd_clt_err_rl(dev, "RTRS failed to transfer IO, err: %d\n", + err); + return err; + } + + return 0; +} + +/** + * rnbd_clt_dev_add_to_requeue() - add device to requeue if session is busy + * @dev: Device to be checked + * @q: Queue to be added to the requeue list if required + * + * Description: + * If session is busy, that means someone will requeue us when resources + * are freed. If session is not doing anything - device is not added to + * the list and @false is returned. + */ +static bool rnbd_clt_dev_add_to_requeue(struct rnbd_clt_dev *dev, + struct rnbd_queue *q) +{ + struct rnbd_clt_session *sess = dev->sess; + struct rnbd_cpu_qlist *cpu_q; + unsigned long flags; + bool added = true; + bool need_set; + + cpu_q = get_cpu_ptr(sess->cpu_queues); + spin_lock_irqsave(&cpu_q->requeue_lock, flags); + + if (likely(!test_and_set_bit_lock(0, &q->in_list))) { + if (WARN_ON(!list_empty(&q->requeue_list))) + goto unlock; + + need_set = !test_bit(cpu_q->cpu, sess->cpu_queues_bm); + if (need_set) { + set_bit(cpu_q->cpu, sess->cpu_queues_bm); + /* Paired with rnbd_put_permit(). Set a bit first + * and then observe the busy counter. + */ + smp_mb__before_atomic(); + } + if (likely(atomic_read(&sess->busy))) { + list_add_tail(&q->requeue_list, &cpu_q->requeue_list); + } else { + /* Very unlikely, but possible: busy counter was + * observed as zero. Drop all bits and return + * false to restart the queue by ourselves. + */ + if (need_set) + clear_bit(cpu_q->cpu, sess->cpu_queues_bm); + clear_bit_unlock(0, &q->in_list); + added = false; + } + } +unlock: + spin_unlock_irqrestore(&cpu_q->requeue_lock, flags); + put_cpu_ptr(sess->cpu_queues); + + return added; +} + +static void rnbd_clt_dev_kick_mq_queue(struct rnbd_clt_dev *dev, + struct blk_mq_hw_ctx *hctx, + int delay) +{ + struct rnbd_queue *q = hctx->driver_data; + + if (delay != RNBD_DELAY_IFBUSY) + blk_mq_delay_run_hw_queue(hctx, delay); + else if (unlikely(!rnbd_clt_dev_add_to_requeue(dev, q))) + /* + * If session is not busy we have to restart + * the queue ourselves. + */ + blk_mq_delay_run_hw_queue(hctx, 10/*ms*/); +} + +static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct request *rq = bd->rq; + struct rnbd_clt_dev *dev = rq->rq_disk->private_data; + struct rnbd_iu *iu = blk_mq_rq_to_pdu(rq); + int err; + + if (unlikely(dev->dev_state != DEV_STATE_MAPPED)) + return BLK_STS_IOERR; + + iu->permit = rnbd_get_permit(dev->sess, RTRS_IO_CON, + RTRS_PERMIT_NOWAIT); + if (unlikely(!iu->permit)) { + rnbd_clt_dev_kick_mq_queue(dev, hctx, RNBD_DELAY_IFBUSY); + return BLK_STS_RESOURCE; + } + + blk_mq_start_request(rq); + err = rnbd_client_xfer_request(dev, rq, iu); + if (likely(err == 0)) + return BLK_STS_OK; + if (unlikely(err == -EAGAIN || err == -ENOMEM)) { + rnbd_clt_dev_kick_mq_queue(dev, hctx, 10/*ms*/); + rnbd_put_permit(dev->sess, iu->permit); + return BLK_STS_RESOURCE; + } + + rnbd_put_permit(dev->sess, iu->permit); + return BLK_STS_IOERR; +} + +static int rnbd_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) +{ + struct rnbd_iu *iu = blk_mq_rq_to_pdu(rq); + + sg_init_table(iu->sglist, BMAX_SEGMENTS); + return 0; +} + +static struct blk_mq_ops rnbd_mq_ops = { + .queue_rq = rnbd_queue_rq, + .init_request = rnbd_init_request, + .complete = rnbd_softirq_done_fn, +}; + +static int setup_mq_tags(struct rnbd_clt_session *sess) +{ + struct blk_mq_tag_set *tag_set = &sess->tag_set; + + memset(tag_set, 0, sizeof(*tag_set)); + tag_set->ops = &rnbd_mq_ops; + tag_set->queue_depth = sess->queue_depth; + tag_set->numa_node = NUMA_NO_NODE; + tag_set->flags = BLK_MQ_F_SHOULD_MERGE | + BLK_MQ_F_TAG_SHARED; + tag_set->cmd_size = sizeof(struct rnbd_iu); + tag_set->nr_hw_queues = num_online_cpus(); + + return blk_mq_alloc_tag_set(tag_set); +} + +static struct rnbd_clt_session * +find_and_get_or_create_sess(const char *sessname, + const struct rtrs_addr *paths, + size_t path_cnt, u16 port_nr) +{ + struct rnbd_clt_session *sess; + struct rtrs_attrs attrs; + int err; + bool first; + struct rtrs_clt_ops rtrs_ops; + + sess = find_or_create_sess(sessname, &first); + if (sess == ERR_PTR(-ENOMEM)) + return ERR_PTR(-ENOMEM); + else if (!first) + return sess; + + rtrs_ops = (struct rtrs_clt_ops) { + .priv = sess, + .link_ev = rnbd_clt_link_ev, + }; + /* + * Nothing was found, establish rtrs connection and proceed further. + */ + sess->rtrs = rtrs_clt_open(&rtrs_ops, sessname, + paths, path_cnt, port_nr, + sizeof(struct rnbd_iu), + RECONNECT_DELAY, BMAX_SEGMENTS, + MAX_RECONNECTS); + if (IS_ERR(sess->rtrs)) { + err = PTR_ERR(sess->rtrs); + goto wake_up_and_put; + } + rtrs_clt_query(sess->rtrs, &attrs); + sess->max_io_size = attrs.max_io_size; + sess->queue_depth = attrs.queue_depth; + + err = setup_mq_tags(sess); + if (err) + goto close_rtrs; + + err = send_msg_sess_info(sess, WAIT); + if (err) + goto close_rtrs; + + wake_up_rtrs_waiters(sess); + + return sess; + +close_rtrs: + close_rtrs(sess); +put_sess: + rnbd_clt_put_sess(sess); + + return ERR_PTR(err); + +wake_up_and_put: + wake_up_rtrs_waiters(sess); + goto put_sess; +} + +static inline void rnbd_init_hw_queue(struct rnbd_clt_dev *dev, + struct rnbd_queue *q, + struct blk_mq_hw_ctx *hctx) +{ + INIT_LIST_HEAD(&q->requeue_list); + q->dev = dev; + q->hctx = hctx; +} + +static void rnbd_init_mq_hw_queues(struct rnbd_clt_dev *dev) +{ + int i; + struct blk_mq_hw_ctx *hctx; + struct rnbd_queue *q; + + queue_for_each_hw_ctx(dev->queue, hctx, i) { + q = &dev->hw_queues[i]; + rnbd_init_hw_queue(dev, q, hctx); + hctx->driver_data = q; + } +} + +static int setup_mq_dev(struct rnbd_clt_dev *dev) +{ + dev->queue = blk_mq_init_queue(&dev->sess->tag_set); + if (IS_ERR(dev->queue)) { + rnbd_clt_err(dev, "Initializing multiqueue queue failed, err: %ld\n", + PTR_ERR(dev->queue)); + return PTR_ERR(dev->queue); + } + rnbd_init_mq_hw_queues(dev); + return 0; +} + +static void setup_request_queue(struct rnbd_clt_dev *dev) +{ + blk_queue_logical_block_size(dev->queue, dev->logical_block_size); + blk_queue_physical_block_size(dev->queue, dev->physical_block_size); + blk_queue_max_hw_sectors(dev->queue, dev->max_hw_sectors); + blk_queue_max_write_same_sectors(dev->queue, + dev->max_write_same_sectors); + + /* + * we don't support discards to "discontiguous" segments + * in on request + */ + blk_queue_max_discard_segments(dev->queue, 1); + + blk_queue_max_discard_sectors(dev->queue, dev->max_discard_sectors); + dev->queue->limits.discard_granularity = dev->discard_granularity; + dev->queue->limits.discard_alignment = dev->discard_alignment; + if (dev->max_discard_sectors) + blk_queue_flag_set(QUEUE_FLAG_DISCARD, dev->queue); + if (dev->secure_discard) + blk_queue_flag_set(QUEUE_FLAG_SECERASE, dev->queue); + + blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue); + blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue); + blk_queue_max_segments(dev->queue, dev->max_segments); + blk_queue_io_opt(dev->queue, dev->sess->max_io_size); + blk_queue_virt_boundary(dev->queue, SZ_4K - 1); + blk_queue_write_cache(dev->queue, true, true); + dev->queue->queuedata = dev; +} + +static void rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx) +{ + dev->gd->major = rnbd_client_major; + dev->gd->first_minor = idx << RNBD_PART_BITS; + dev->gd->fops = &rnbd_client_ops; + dev->gd->queue = dev->queue; + dev->gd->private_data = dev; + snprintf(dev->gd->disk_name, sizeof(dev->gd->disk_name), "rnbd%d", + idx); + pr_debug("disk_name=%s, capacity=%zu\n", + dev->gd->disk_name, + dev->nsectors * (dev->logical_block_size / SECTOR_SIZE) + ); + + set_capacity(dev->gd, dev->nsectors); + + if (dev->access_mode == RNBD_ACCESS_RO) { + dev->read_only = true; + set_disk_ro(dev->gd, true); + } else { + dev->read_only = false; + } + + if (!dev->rotational) + blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue); +} + +static int rnbd_client_setup_device(struct rnbd_clt_session *sess, + struct rnbd_clt_dev *dev, int idx) +{ + int err; + + dev->size = dev->nsectors * dev->logical_block_size; + + err = setup_mq_dev(dev); + if (err) + return err; + + setup_request_queue(dev); + + dev->gd = alloc_disk_node(1 << RNBD_PART_BITS, NUMA_NO_NODE); + if (!dev->gd) { + blk_cleanup_queue(dev->queue); + return -ENOMEM; + } + + rnbd_clt_setup_gen_disk(dev, idx); + + return 0; +} + +static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, + enum rnbd_access_mode access_mode, + const char *pathname) +{ + struct rnbd_clt_dev *dev; + int ret; + + dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, NUMA_NO_NODE); + if (!dev) + return ERR_PTR(-ENOMEM); + + dev->hw_queues = kcalloc(nr_cpu_ids, sizeof(*dev->hw_queues), + GFP_KERNEL); + if (!dev->hw_queues) { + ret = -ENOMEM; + goto out_alloc; + } + + mutex_lock(&ida_lock); + ret = ida_simple_get(&index_ida, 0, 1 << (MINORBITS - RNBD_PART_BITS), + GFP_KERNEL); + mutex_unlock(&ida_lock); + if (ret < 0) { + pr_err("Failed to initialize device '%s' from session %s, allocating idr failed, err: %d\n", + pathname, sess->sessname, ret); + goto out_queues; + } + dev->clt_device_id = ret; + dev->sess = sess; + dev->access_mode = access_mode; + strlcpy(dev->pathname, pathname, sizeof(dev->pathname)); + mutex_init(&dev->lock); + refcount_set(&dev->refcount, 1); + dev->dev_state = DEV_STATE_INIT; + + /* + * Here we called from sysfs entry, thus clt-sysfs is + * responsible that session will not disappear. + */ + WARN_ON(!rnbd_clt_get_sess(sess)); + + return dev; + +out_queues: + kfree(dev->hw_queues); +out_alloc: + kfree(dev); + return ERR_PTR(ret); +} + +static bool __exists_dev(const char *pathname) +{ + struct rnbd_clt_session *sess; + struct rnbd_clt_dev *dev; + bool found = false; + + list_for_each_entry(sess, &sess_list, list) { + mutex_lock(&sess->lock); + list_for_each_entry(dev, &sess->devs_list, list) { + if (!strncmp(dev->pathname, pathname, + sizeof(dev->pathname))) { + found = true; + break; + } + } + mutex_unlock(&sess->lock); + if (found) + break; + } + + return found; +} + +static bool exists_devpath(const char *pathname) +{ + bool found; + + mutex_lock(&sess_lock); + found = __exists_dev(pathname); + mutex_unlock(&sess_lock); + + return found; +} + +static bool insert_dev_if_not_exists_devpath(const char *pathname, + struct rnbd_clt_session *sess, + struct rnbd_clt_dev *dev) +{ + bool found; + + mutex_lock(&sess_lock); + found = __exists_dev(pathname); + if (!found) { + mutex_lock(&sess->lock); + list_add_tail(&dev->list, &sess->devs_list); + mutex_unlock(&sess->lock); + } + mutex_unlock(&sess_lock); + + return found; +} + +static void delete_dev(struct rnbd_clt_dev *dev) +{ + struct rnbd_clt_session *sess = dev->sess; + + mutex_lock(&sess->lock); + list_del(&dev->list); + mutex_unlock(&sess->lock); +} + +struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname, + struct rtrs_addr *paths, + size_t path_cnt, u16 port_nr, + const char *pathname, + enum rnbd_access_mode access_mode) +{ + struct rnbd_clt_session *sess; + struct rnbd_clt_dev *dev; + int ret; + + if (exists_devpath(pathname)) + return ERR_PTR(-EEXIST); + + sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr); + if (IS_ERR(sess)) + return ERR_CAST(sess); + + dev = init_dev(sess, access_mode, pathname); + if (IS_ERR(dev)) { + pr_err("map_device: failed to map device '%s' from session %s, can't initialize device, err: %ld\n", + pathname, sess->sessname, PTR_ERR(dev)); + ret = PTR_ERR(dev); + goto put_sess; + } + if (insert_dev_if_not_exists_devpath(pathname, sess, dev)) { + ret = -EEXIST; + goto put_dev; + } + ret = send_msg_open(dev, WAIT); + if (ret) { + rnbd_clt_err(dev, + "map_device: failed, can't open remote device, err: %d\n", + ret); + goto del_dev; + } + mutex_lock(&dev->lock); + pr_debug("Opened remote device: session=%s, path='%s'\n", + sess->sessname, pathname); + ret = rnbd_client_setup_device(sess, dev, dev->clt_device_id); + if (ret) { + rnbd_clt_err(dev, + "map_device: Failed to configure device, err: %d\n", + ret); + mutex_unlock(&dev->lock); + goto del_dev; + } + + rnbd_clt_info(dev, + "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_write_same_sectors: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, rotational: %d)\n", + dev->gd->disk_name, dev->nsectors, + dev->logical_block_size, dev->physical_block_size, + dev->max_write_same_sectors, dev->max_discard_sectors, + dev->discard_granularity, dev->discard_alignment, + dev->secure_discard, dev->max_segments, + dev->max_hw_sectors, dev->rotational); + + mutex_unlock(&dev->lock); + + add_disk(dev->gd); + rnbd_clt_put_sess(sess); + + return dev; + +del_dev: + delete_dev(dev); +put_dev: + rnbd_clt_put_dev(dev); +put_sess: + rnbd_clt_put_sess(sess); + + return ERR_PTR(ret); +} + +static void destroy_gen_disk(struct rnbd_clt_dev *dev) +{ + del_gendisk(dev->gd); + blk_cleanup_queue(dev->queue); + put_disk(dev->gd); +} + +static void destroy_sysfs(struct rnbd_clt_dev *dev, + const struct attribute *sysfs_self) +{ + rnbd_clt_remove_dev_symlink(dev); + if (dev->kobj.state_initialized) { + if (sysfs_self) + /* To avoid deadlock firstly remove itself */ + sysfs_remove_file_self(&dev->kobj, sysfs_self); + kobject_del(&dev->kobj); + kobject_put(&dev->kobj); + } +} + +int rnbd_clt_unmap_device(struct rnbd_clt_dev *dev, bool force, + const struct attribute *sysfs_self) +{ + struct rnbd_clt_session *sess = dev->sess; + int refcount, ret = 0; + bool was_mapped; + + mutex_lock(&dev->lock); + if (dev->dev_state == DEV_STATE_UNMAPPED) { + rnbd_clt_info(dev, "Device is already being unmapped\n"); + ret = -EALREADY; + goto err; + } + refcount = refcount_read(&dev->refcount); + if (!force && refcount > 1) { + rnbd_clt_err(dev, + "Closing device failed, device is in use, (%d device users)\n", + refcount - 1); + ret = -EBUSY; + goto err; + } + was_mapped = (dev->dev_state == DEV_STATE_MAPPED); + dev->dev_state = DEV_STATE_UNMAPPED; + mutex_unlock(&dev->lock); + + delete_dev(dev); + destroy_sysfs(dev, sysfs_self); + destroy_gen_disk(dev); + if (was_mapped && sess->rtrs) + send_msg_close(dev, dev->device_id, WAIT); + + rnbd_clt_info(dev, "Device is unmapped\n"); + + /* Likely last reference put */ + rnbd_clt_put_dev(dev); + + /* + * Here device and session can be vanished! + */ + + return 0; +err: + mutex_unlock(&dev->lock); + + return ret; +} + +int rnbd_clt_remap_device(struct rnbd_clt_dev *dev) +{ + int err; + + mutex_lock(&dev->lock); + if (dev->dev_state == DEV_STATE_MAPPED_DISCONNECTED) + err = 0; + else if (dev->dev_state == DEV_STATE_UNMAPPED) + err = -ENODEV; + else if (dev->dev_state == DEV_STATE_MAPPED) + err = -EALREADY; + else + err = -EBUSY; + mutex_unlock(&dev->lock); + if (!err) { + rnbd_clt_info(dev, "Remapping device.\n"); + err = send_msg_open(dev, WAIT); + if (err) + rnbd_clt_err(dev, "remap_device: %d\n", err); + } + + return err; +} + +static void unmap_device_work(struct work_struct *work) +{ + struct rnbd_clt_dev *dev; + + dev = container_of(work, typeof(*dev), unmap_on_rmmod_work); + rnbd_clt_unmap_device(dev, true, NULL); +} + +static void rnbd_destroy_sessions(void) +{ + struct rnbd_clt_session *sess, *sn; + struct rnbd_clt_dev *dev, *tn; + + /* Firstly forbid access through sysfs interface */ + rnbd_clt_destroy_default_group(); + rnbd_clt_destroy_sysfs_files(); + + /* + * Here at this point there is no any concurrent access to sessions + * list and devices list: + * 1. New session or device can'be be created - session sysfs files + * are removed. + * 2. Device or session can't be removed - module reference is taken + * into account in unmap device sysfs callback. + * 3. No IO requests inflight - each file open of block_dev increases + * module reference in get_disk(). + * + * But still there can be user requests inflights, which are sent by + * asynchronous send_msg_*() functions, thus before unmapping devices + * RTRS session must be explicitly closed. + */ + + list_for_each_entry_safe(sess, sn, &sess_list, list) { + WARN_ON(!rnbd_clt_get_sess(sess)); + close_rtrs(sess); + list_for_each_entry_safe(dev, tn, &sess->devs_list, list) { + /* + * Here unmap happens in parallel for only one reason: + * blk_cleanup_queue() takes around half a second, so + * on huge amount of devices the whole module unload + * procedure takes minutes. + */ + INIT_WORK(&dev->unmap_on_rmmod_work, unmap_device_work); + queue_work(system_long_wq, &dev->unmap_on_rmmod_work); + } + rnbd_clt_put_sess(sess); + } + /* Wait for all scheduled unmap works */ + flush_workqueue(system_long_wq); + WARN_ON(!list_empty(&sess_list)); +} + +static int __init rnbd_client_init(void) +{ + int err = 0; + + BUILD_BUG_ON(sizeof(struct rnbd_msg_hdr) != 4); + BUILD_BUG_ON(sizeof(struct rnbd_msg_sess_info) != 36); + BUILD_BUG_ON(sizeof(struct rnbd_msg_sess_info_rsp) != 36); + BUILD_BUG_ON(sizeof(struct rnbd_msg_open) != 264); + BUILD_BUG_ON(sizeof(struct rnbd_msg_close) != 8); + BUILD_BUG_ON(sizeof(struct rnbd_msg_open_rsp) != 56); + rnbd_client_major = register_blkdev(rnbd_client_major, "rnbd"); + if (rnbd_client_major <= 0) { + pr_err("Failed to load module, block device registration failed\n"); + return -EBUSY; + } + + err = rnbd_clt_create_sysfs_files(); + if (err) { + pr_err("Failed to load module, creating sysfs device files failed, err: %d\n", + err); + unregister_blkdev(rnbd_client_major, "rnbd"); + } + + return err; +} + +static void __exit rnbd_client_exit(void) +{ + rnbd_destroy_sessions(); + unregister_blkdev(rnbd_client_major, "rnbd"); + ida_destroy(&index_ida); +} + +module_init(rnbd_client_init); +module_exit(rnbd_client_exit); From 1eb54f8f5dd8ae09829caa37a50952d931bb79cf Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:24 +0200 Subject: [PATCH 346/562] block/rnbd: client: sysfs interface functions This is the sysfs interface to rnbd block devices on client side: /sys/class/rnbd-client/ctl/ |- map_device | *** maps remote device | |- devices/ *** all mapped devices /sys/block/rnbd/rnbd/ |- unmap_device | *** unmaps device | |- state | *** device state | |- session | *** session name | |- mapping_path *** path of the dev that was mapped on server Link: https://lore.kernel.org/r/20200511135131.27580-19-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Reviewed-by: Bart Van Assche Acked-by: Jens Axboe Signed-off-by: Jason Gunthorpe --- drivers/block/rnbd/rnbd-clt-sysfs.c | 636 ++++++++++++++++++++++++++++ 1 file changed, 636 insertions(+) create mode 100644 drivers/block/rnbd/rnbd-clt-sysfs.c diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c new file mode 100644 index 000000000000..a4508fcc7ffe --- /dev/null +++ b/drivers/block/rnbd/rnbd-clt-sysfs.c @@ -0,0 +1,636 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RDMA Network Block Driver + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ + +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rnbd-clt.h" + +static struct device *rnbd_dev; +static struct class *rnbd_dev_class; +static struct kobject *rnbd_devs_kobj; + +enum { + RNBD_OPT_ERR = 0, + RNBD_OPT_DEST_PORT = 1 << 0, + RNBD_OPT_PATH = 1 << 1, + RNBD_OPT_DEV_PATH = 1 << 2, + RNBD_OPT_ACCESS_MODE = 1 << 3, + RNBD_OPT_SESSNAME = 1 << 6, +}; + +static const unsigned int rnbd_opt_mandatory[] = { + RNBD_OPT_PATH, + RNBD_OPT_DEV_PATH, + RNBD_OPT_SESSNAME, +}; + +static const match_table_t rnbd_opt_tokens = { + {RNBD_OPT_PATH, "path=%s" }, + {RNBD_OPT_DEV_PATH, "device_path=%s"}, + {RNBD_OPT_DEST_PORT, "dest_port=%d" }, + {RNBD_OPT_ACCESS_MODE, "access_mode=%s"}, + {RNBD_OPT_SESSNAME, "sessname=%s" }, + {RNBD_OPT_ERR, NULL }, +}; + +struct rnbd_map_options { + char *sessname; + struct rtrs_addr *paths; + size_t *path_cnt; + char *pathname; + u16 *dest_port; + enum rnbd_access_mode *access_mode; +}; + +static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt, + struct rnbd_map_options *opt) +{ + char *options, *sep_opt; + char *p; + substring_t args[MAX_OPT_ARGS]; + int opt_mask = 0; + int token; + int ret = -EINVAL; + int i, dest_port; + int p_cnt = 0; + + options = kstrdup(buf, GFP_KERNEL); + if (!options) + return -ENOMEM; + + sep_opt = strstrip(options); + while ((p = strsep(&sep_opt, " ")) != NULL) { + if (!*p) + continue; + + token = match_token(p, rnbd_opt_tokens, args); + opt_mask |= token; + + switch (token) { + case RNBD_OPT_SESSNAME: + p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } + if (strlen(p) > NAME_MAX) { + pr_err("map_device: sessname too long\n"); + ret = -EINVAL; + kfree(p); + goto out; + } + strlcpy(opt->sessname, p, NAME_MAX); + kfree(p); + break; + + case RNBD_OPT_PATH: + if (p_cnt >= max_path_cnt) { + pr_err("map_device: too many (> %zu) paths provided\n", + max_path_cnt); + ret = -ENOMEM; + goto out; + } + p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } + + ret = rtrs_addr_to_sockaddr(p, strlen(p), + *opt->dest_port, + &opt->paths[p_cnt]); + if (ret) { + pr_err("Can't parse path %s: %d\n", p, ret); + kfree(p); + goto out; + } + + p_cnt++; + + kfree(p); + break; + + case RNBD_OPT_DEV_PATH: + p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } + if (strlen(p) > NAME_MAX) { + pr_err("map_device: Device path too long\n"); + ret = -EINVAL; + kfree(p); + goto out; + } + strlcpy(opt->pathname, p, NAME_MAX); + kfree(p); + break; + + case RNBD_OPT_DEST_PORT: + if (match_int(args, &dest_port) || dest_port < 0 || + dest_port > 65535) { + pr_err("bad destination port number parameter '%d'\n", + dest_port); + ret = -EINVAL; + goto out; + } + *opt->dest_port = dest_port; + break; + + case RNBD_OPT_ACCESS_MODE: + p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } + + if (!strcmp(p, "ro")) { + *opt->access_mode = RNBD_ACCESS_RO; + } else if (!strcmp(p, "rw")) { + *opt->access_mode = RNBD_ACCESS_RW; + } else if (!strcmp(p, "migration")) { + *opt->access_mode = RNBD_ACCESS_MIGRATION; + } else { + pr_err("map_device: Invalid access_mode: '%s'\n", + p); + ret = -EINVAL; + kfree(p); + goto out; + } + + kfree(p); + break; + + default: + pr_err("map_device: Unknown parameter or missing value '%s'\n", + p); + ret = -EINVAL; + goto out; + } + } + + for (i = 0; i < ARRAY_SIZE(rnbd_opt_mandatory); i++) { + if ((opt_mask & rnbd_opt_mandatory[i])) { + ret = 0; + } else { + pr_err("map_device: Parameters missing\n"); + ret = -EINVAL; + break; + } + } + +out: + *opt->path_cnt = p_cnt; + kfree(options); + return ret; +} + +static ssize_t state_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + struct rnbd_clt_dev *dev; + + dev = container_of(kobj, struct rnbd_clt_dev, kobj); + + switch (dev->dev_state) { + case DEV_STATE_INIT: + return snprintf(page, PAGE_SIZE, "init\n"); + case DEV_STATE_MAPPED: + /* TODO fix cli tool before changing to proper state */ + return snprintf(page, PAGE_SIZE, "open\n"); + case DEV_STATE_MAPPED_DISCONNECTED: + /* TODO fix cli tool before changing to proper state */ + return snprintf(page, PAGE_SIZE, "closed\n"); + case DEV_STATE_UNMAPPED: + return snprintf(page, PAGE_SIZE, "unmapped\n"); + default: + return snprintf(page, PAGE_SIZE, "unknown\n"); + } +} + +static struct kobj_attribute rnbd_clt_state_attr = __ATTR_RO(state); + +static ssize_t mapping_path_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + struct rnbd_clt_dev *dev; + + dev = container_of(kobj, struct rnbd_clt_dev, kobj); + + return scnprintf(page, PAGE_SIZE, "%s\n", dev->pathname); +} + +static struct kobj_attribute rnbd_clt_mapping_path_attr = + __ATTR_RO(mapping_path); + +static ssize_t access_mode_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + struct rnbd_clt_dev *dev; + + dev = container_of(kobj, struct rnbd_clt_dev, kobj); + + return snprintf(page, PAGE_SIZE, "%s\n", + rnbd_access_mode_str(dev->access_mode)); +} + +static struct kobj_attribute rnbd_clt_access_mode = + __ATTR_RO(access_mode); + +static ssize_t rnbd_clt_unmap_dev_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return scnprintf(page, PAGE_SIZE, "Usage: echo > %s\n", + attr->attr.name); +} + +static ssize_t rnbd_clt_unmap_dev_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rnbd_clt_dev *dev; + char *opt, *options; + bool force; + int err; + + opt = kstrdup(buf, GFP_KERNEL); + if (!opt) + return -ENOMEM; + + options = strstrip(opt); + dev = container_of(kobj, struct rnbd_clt_dev, kobj); + if (sysfs_streq(options, "normal")) { + force = false; + } else if (sysfs_streq(options, "force")) { + force = true; + } else { + rnbd_clt_err(dev, + "unmap_device: Invalid value: %s\n", + options); + err = -EINVAL; + goto out; + } + + rnbd_clt_info(dev, "Unmapping device, option: %s.\n", + force ? "force" : "normal"); + + /* + * We take explicit module reference only for one reason: do not + * race with lockless rnbd_destroy_sessions(). + */ + if (!try_module_get(THIS_MODULE)) { + err = -ENODEV; + goto out; + } + err = rnbd_clt_unmap_device(dev, force, &attr->attr); + if (err) { + if (err != -EALREADY) + rnbd_clt_err(dev, "unmap_device: %d\n", err); + goto module_put; + } + + /* + * Here device can be vanished! + */ + + err = count; + +module_put: + module_put(THIS_MODULE); +out: + kfree(opt); + + return err; +} + +static struct kobj_attribute rnbd_clt_unmap_device_attr = + __ATTR(unmap_device, 0644, rnbd_clt_unmap_dev_show, + rnbd_clt_unmap_dev_store); + +static ssize_t rnbd_clt_resize_dev_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *page) +{ + return scnprintf(page, PAGE_SIZE, + "Usage: echo > %s\n", + attr->attr.name); +} + +static ssize_t rnbd_clt_resize_dev_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned long sectors; + struct rnbd_clt_dev *dev; + + dev = container_of(kobj, struct rnbd_clt_dev, kobj); + + ret = kstrtoul(buf, 0, §ors); + if (ret) + return ret; + + ret = rnbd_clt_resize_disk(dev, (size_t)sectors); + if (ret) + return ret; + + return count; +} + +static struct kobj_attribute rnbd_clt_resize_dev_attr = + __ATTR(resize, 0644, rnbd_clt_resize_dev_show, + rnbd_clt_resize_dev_store); + +static ssize_t rnbd_clt_remap_dev_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return scnprintf(page, PAGE_SIZE, "Usage: echo <1> > %s\n", + attr->attr.name); +} + +static ssize_t rnbd_clt_remap_dev_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rnbd_clt_dev *dev; + char *opt, *options; + int err; + + opt = kstrdup(buf, GFP_KERNEL); + if (!opt) + return -ENOMEM; + + options = strstrip(opt); + dev = container_of(kobj, struct rnbd_clt_dev, kobj); + if (!sysfs_streq(options, "1")) { + rnbd_clt_err(dev, + "remap_device: Invalid value: %s\n", + options); + err = -EINVAL; + goto out; + } + err = rnbd_clt_remap_device(dev); + if (likely(!err)) + err = count; + +out: + kfree(opt); + + return err; +} + +static struct kobj_attribute rnbd_clt_remap_device_attr = + __ATTR(remap_device, 0644, rnbd_clt_remap_dev_show, + rnbd_clt_remap_dev_store); + +static ssize_t session_show(struct kobject *kobj, struct kobj_attribute *attr, + char *page) +{ + struct rnbd_clt_dev *dev; + + dev = container_of(kobj, struct rnbd_clt_dev, kobj); + + return scnprintf(page, PAGE_SIZE, "%s\n", dev->sess->sessname); +} + +static struct kobj_attribute rnbd_clt_session_attr = + __ATTR_RO(session); + +static struct attribute *rnbd_dev_attrs[] = { + &rnbd_clt_unmap_device_attr.attr, + &rnbd_clt_resize_dev_attr.attr, + &rnbd_clt_remap_device_attr.attr, + &rnbd_clt_mapping_path_attr.attr, + &rnbd_clt_state_attr.attr, + &rnbd_clt_session_attr.attr, + &rnbd_clt_access_mode.attr, + NULL, +}; + +void rnbd_clt_remove_dev_symlink(struct rnbd_clt_dev *dev) +{ + /* + * The module_is_live() check is crucial and helps to avoid annoying + * sysfs warning raised in sysfs_remove_link(), when the whole sysfs + * path was just removed, see rnbd_close_sessions(). + */ + if (strlen(dev->blk_symlink_name) && module_is_live(THIS_MODULE)) + sysfs_remove_link(rnbd_devs_kobj, dev->blk_symlink_name); +} + +static struct kobj_type rnbd_dev_ktype = { + .sysfs_ops = &kobj_sysfs_ops, + .default_attrs = rnbd_dev_attrs, +}; + +static int rnbd_clt_add_dev_kobj(struct rnbd_clt_dev *dev) +{ + int ret; + struct kobject *gd_kobj = &disk_to_dev(dev->gd)->kobj; + + ret = kobject_init_and_add(&dev->kobj, &rnbd_dev_ktype, gd_kobj, "%s", + "rnbd"); + if (ret) + rnbd_clt_err(dev, "Failed to create device sysfs dir, err: %d\n", + ret); + + return ret; +} + +static ssize_t rnbd_clt_map_device_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *page) +{ + return scnprintf(page, PAGE_SIZE, + "Usage: echo \"[dest_port=server port number] sessname= path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path= [access_mode=]\" > %s\n\naddr ::= [ ip: | ip: | gid: ]\n", + attr->attr.name); +} + +static int rnbd_clt_get_path_name(struct rnbd_clt_dev *dev, char *buf, + size_t len) +{ + int ret; + char pathname[NAME_MAX], *s; + + strlcpy(pathname, dev->pathname, sizeof(pathname)); + while ((s = strchr(pathname, '/'))) + s[0] = '!'; + + ret = snprintf(buf, len, "%s", pathname); + if (ret >= len) + return -ENAMETOOLONG; + + return 0; +} + +static int rnbd_clt_add_dev_symlink(struct rnbd_clt_dev *dev) +{ + struct kobject *gd_kobj = &disk_to_dev(dev->gd)->kobj; + int ret; + + ret = rnbd_clt_get_path_name(dev, dev->blk_symlink_name, + sizeof(dev->blk_symlink_name)); + if (ret) { + rnbd_clt_err(dev, "Failed to get /sys/block symlink path, err: %d\n", + ret); + goto out_err; + } + + ret = sysfs_create_link(rnbd_devs_kobj, gd_kobj, + dev->blk_symlink_name); + if (ret) { + rnbd_clt_err(dev, "Creating /sys/block symlink failed, err: %d\n", + ret); + goto out_err; + } + + return 0; + +out_err: + dev->blk_symlink_name[0] = '\0'; + return ret; +} + +static ssize_t rnbd_clt_map_device_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rnbd_clt_dev *dev; + struct rnbd_map_options opt; + int ret; + char pathname[NAME_MAX]; + char sessname[NAME_MAX]; + enum rnbd_access_mode access_mode = RNBD_ACCESS_RW; + u16 port_nr = RTRS_PORT; + + struct sockaddr_storage *addrs; + struct rtrs_addr paths[6]; + size_t path_cnt; + + opt.sessname = sessname; + opt.paths = paths; + opt.path_cnt = &path_cnt; + opt.pathname = pathname; + opt.dest_port = &port_nr; + opt.access_mode = &access_mode; + addrs = kcalloc(ARRAY_SIZE(paths) * 2, sizeof(*addrs), GFP_KERNEL); + if (!addrs) + return -ENOMEM; + + for (path_cnt = 0; path_cnt < ARRAY_SIZE(paths); path_cnt++) { + paths[path_cnt].src = &addrs[path_cnt * 2]; + paths[path_cnt].dst = &addrs[path_cnt * 2 + 1]; + } + + ret = rnbd_clt_parse_map_options(buf, ARRAY_SIZE(paths), &opt); + if (ret) + goto out; + + pr_info("Mapping device %s on session %s, (access_mode: %s)\n", + pathname, sessname, + rnbd_access_mode_str(access_mode)); + + dev = rnbd_clt_map_device(sessname, paths, path_cnt, port_nr, pathname, + access_mode); + if (IS_ERR(dev)) { + ret = PTR_ERR(dev); + goto out; + } + + ret = rnbd_clt_add_dev_kobj(dev); + if (ret) + goto unmap_dev; + + ret = rnbd_clt_add_dev_symlink(dev); + if (ret) + goto unmap_dev; + + kfree(addrs); + return count; + +unmap_dev: + rnbd_clt_unmap_device(dev, true, NULL); +out: + kfree(addrs); + return ret; +} + +static struct kobj_attribute rnbd_clt_map_device_attr = + __ATTR(map_device, 0644, + rnbd_clt_map_device_show, rnbd_clt_map_device_store); + +static struct attribute *default_attrs[] = { + &rnbd_clt_map_device_attr.attr, + NULL, +}; + +static struct attribute_group default_attr_group = { + .attrs = default_attrs, +}; + +static const struct attribute_group *default_attr_groups[] = { + &default_attr_group, + NULL, +}; + +int rnbd_clt_create_sysfs_files(void) +{ + int err; + + rnbd_dev_class = class_create(THIS_MODULE, "rnbd-client"); + if (IS_ERR(rnbd_dev_class)) + return PTR_ERR(rnbd_dev_class); + + rnbd_dev = device_create_with_groups(rnbd_dev_class, NULL, + MKDEV(0, 0), NULL, + default_attr_groups, "ctl"); + if (IS_ERR(rnbd_dev)) { + err = PTR_ERR(rnbd_dev); + goto cls_destroy; + } + rnbd_devs_kobj = kobject_create_and_add("devices", &rnbd_dev->kobj); + if (!rnbd_devs_kobj) { + err = -ENOMEM; + goto dev_destroy; + } + + return 0; + +dev_destroy: + device_destroy(rnbd_dev_class, MKDEV(0, 0)); +cls_destroy: + class_destroy(rnbd_dev_class); + + return err; +} + +void rnbd_clt_destroy_default_group(void) +{ + sysfs_remove_group(&rnbd_dev->kobj, &default_attr_group); +} + +void rnbd_clt_destroy_sysfs_files(void) +{ + kobject_del(rnbd_devs_kobj); + kobject_put(rnbd_devs_kobj); + device_destroy(rnbd_dev_class, MKDEV(0, 0)); + class_destroy(rnbd_dev_class); +} From d4c6957dd001dc097e8057611093c0731c517d26 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:25 +0200 Subject: [PATCH 347/562] block/rnbd: server: private header with server structs and functions This header describes main structs and functions used by rnbd-server module, namely structs for managing sessions from different clients and mapped (opened) devices. Link: https://lore.kernel.org/r/20200511135131.27580-20-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Reviewed-by: Bart Van Assche Acked-by: Jens Axboe Signed-off-by: Jason Gunthorpe --- drivers/block/rnbd/rnbd-srv.h | 78 +++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 drivers/block/rnbd/rnbd-srv.h diff --git a/drivers/block/rnbd/rnbd-srv.h b/drivers/block/rnbd/rnbd-srv.h new file mode 100644 index 000000000000..5a8544b5e74f --- /dev/null +++ b/drivers/block/rnbd/rnbd-srv.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * RDMA Network Block Driver + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ +#ifndef RNBD_SRV_H +#define RNBD_SRV_H + +#include +#include +#include + +#include +#include "rnbd-proto.h" +#include "rnbd-log.h" + +struct rnbd_srv_session { + /* Entry inside global sess_list */ + struct list_head list; + struct rtrs_srv *rtrs; + char sessname[NAME_MAX]; + int queue_depth; + struct bio_set sess_bio_set; + + struct xarray index_idr; + /* List of struct rnbd_srv_sess_dev */ + struct list_head sess_dev_list; + struct mutex lock; + u8 ver; +}; + +struct rnbd_srv_dev { + /* Entry inside global dev_list */ + struct list_head list; + struct kobject dev_kobj; + struct kobject *dev_sessions_kobj; + struct kref kref; + char id[NAME_MAX]; + /* List of rnbd_srv_sess_dev structs */ + struct list_head sess_dev_list; + struct mutex lock; + int open_write_cnt; +}; + +/* Structure which binds N devices and N sessions */ +struct rnbd_srv_sess_dev { + /* Entry inside rnbd_srv_dev struct */ + struct list_head dev_list; + /* Entry inside rnbd_srv_session struct */ + struct list_head sess_list; + struct rnbd_dev *rnbd_dev; + struct rnbd_srv_session *sess; + struct rnbd_srv_dev *dev; + struct kobject kobj; + u32 device_id; + fmode_t open_flags; + struct kref kref; + struct completion *destroy_comp; + char pathname[NAME_MAX]; + enum rnbd_access_mode access_mode; +}; + +/* rnbd-srv-sysfs.c */ + +int rnbd_srv_create_dev_sysfs(struct rnbd_srv_dev *dev, + struct block_device *bdev, + const char *dir_name); +void rnbd_srv_destroy_dev_sysfs(struct rnbd_srv_dev *dev); +int rnbd_srv_create_dev_session_sysfs(struct rnbd_srv_sess_dev *sess_dev); +void rnbd_srv_destroy_dev_session_sysfs(struct rnbd_srv_sess_dev *sess_dev); +int rnbd_srv_create_sysfs_files(void); +void rnbd_srv_destroy_sysfs_files(void); +void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev); + +#endif /* RNBD_SRV_H */ From 2de6c8de192b9341ffa5e84afe1ce6196d4eef41 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:26 +0200 Subject: [PATCH 348/562] block/rnbd: server: main functionality This is main functionality of rnbd-server module, which handles RTRS events and rnbd protocol requests, like map (open) or unmap (close) device. Also server side is responsible for processing incoming IBTRS IO requests and forward them to local mapped devices. Link: https://lore.kernel.org/r/20200511135131.27580-21-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Reviewed-by: Bart Van Assche Acked-by: Jens Axboe Signed-off-by: Jason Gunthorpe --- drivers/block/rnbd/rnbd-srv.c | 844 ++++++++++++++++++++++++++++++++++ 1 file changed, 844 insertions(+) create mode 100644 drivers/block/rnbd/rnbd-srv.c diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c new file mode 100644 index 000000000000..86e61523907b --- /dev/null +++ b/drivers/block/rnbd/rnbd-srv.c @@ -0,0 +1,844 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RDMA Network Block Driver + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt + +#include +#include + +#include "rnbd-srv.h" +#include "rnbd-srv-dev.h" + +MODULE_DESCRIPTION("RDMA Network Block Device Server"); +MODULE_LICENSE("GPL"); + +static u16 port_nr = RTRS_PORT; + +module_param_named(port_nr, port_nr, ushort, 0444); +MODULE_PARM_DESC(port_nr, + "The port number the server is listening on (default: " + __stringify(RTRS_PORT)")"); + +#define DEFAULT_DEV_SEARCH_PATH "/" + +static char dev_search_path[PATH_MAX] = DEFAULT_DEV_SEARCH_PATH; + +static int dev_search_path_set(const char *val, const struct kernel_param *kp) +{ + const char *p = strrchr(val, '\n') ? : val + strlen(val); + + if (strlen(val) >= sizeof(dev_search_path)) + return -EINVAL; + + snprintf(dev_search_path, sizeof(dev_search_path), "%.*s", + (int)(p - val), val); + + pr_info("dev_search_path changed to '%s'\n", dev_search_path); + + return 0; +} + +static struct kparam_string dev_search_path_kparam_str = { + .maxlen = sizeof(dev_search_path), + .string = dev_search_path +}; + +static const struct kernel_param_ops dev_search_path_ops = { + .set = dev_search_path_set, + .get = param_get_string, +}; + +module_param_cb(dev_search_path, &dev_search_path_ops, + &dev_search_path_kparam_str, 0444); +MODULE_PARM_DESC(dev_search_path, + "Sets the dev_search_path. When a device is mapped this path is prepended to the device path from the map device operation. If %SESSNAME% is specified in a path, then device will be searched in a session namespace. (default: " + DEFAULT_DEV_SEARCH_PATH ")"); + +static DEFINE_MUTEX(sess_lock); +static DEFINE_SPINLOCK(dev_lock); + +static LIST_HEAD(sess_list); +static LIST_HEAD(dev_list); + +struct rnbd_io_private { + struct rtrs_srv_op *id; + struct rnbd_srv_sess_dev *sess_dev; +}; + +static void rnbd_sess_dev_release(struct kref *kref) +{ + struct rnbd_srv_sess_dev *sess_dev; + + sess_dev = container_of(kref, struct rnbd_srv_sess_dev, kref); + complete(sess_dev->destroy_comp); +} + +static inline void rnbd_put_sess_dev(struct rnbd_srv_sess_dev *sess_dev) +{ + kref_put(&sess_dev->kref, rnbd_sess_dev_release); +} + +void rnbd_endio(void *priv, int error) +{ + struct rnbd_io_private *rnbd_priv = priv; + struct rnbd_srv_sess_dev *sess_dev = rnbd_priv->sess_dev; + + rnbd_put_sess_dev(sess_dev); + + rtrs_srv_resp_rdma(rnbd_priv->id, error); + + kfree(priv); +} + +static struct rnbd_srv_sess_dev * +rnbd_get_sess_dev(int dev_id, struct rnbd_srv_session *srv_sess) +{ + struct rnbd_srv_sess_dev *sess_dev; + int ret = 0; + + rcu_read_lock(); + sess_dev = xa_load(&srv_sess->index_idr, dev_id); + if (likely(sess_dev)) + ret = kref_get_unless_zero(&sess_dev->kref); + rcu_read_unlock(); + + if (!sess_dev || !ret) + return ERR_PTR(-ENXIO); + + return sess_dev; +} + +static int process_rdma(struct rtrs_srv *sess, + struct rnbd_srv_session *srv_sess, + struct rtrs_srv_op *id, void *data, u32 datalen, + const void *usr, size_t usrlen) +{ + const struct rnbd_msg_io *msg = usr; + struct rnbd_io_private *priv; + struct rnbd_srv_sess_dev *sess_dev; + u32 dev_id; + int err; + + priv = kmalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + dev_id = le32_to_cpu(msg->device_id); + + sess_dev = rnbd_get_sess_dev(dev_id, srv_sess); + if (IS_ERR(sess_dev)) { + pr_err_ratelimited("Got I/O request on session %s for unknown device id %d\n", + srv_sess->sessname, dev_id); + err = -ENOTCONN; + goto err; + } + + priv->sess_dev = sess_dev; + priv->id = id; + + err = rnbd_dev_submit_io(sess_dev->rnbd_dev, le64_to_cpu(msg->sector), + data, datalen, le32_to_cpu(msg->bi_size), + le32_to_cpu(msg->rw), + srv_sess->ver < RNBD_PROTO_VER_MAJOR || + usrlen < sizeof(*msg) ? + 0 : le16_to_cpu(msg->prio), priv); + if (unlikely(err)) { + rnbd_srv_err(sess_dev, "Submitting I/O to device failed, err: %d\n", + err); + goto sess_dev_put; + } + + return 0; + +sess_dev_put: + rnbd_put_sess_dev(sess_dev); +err: + kfree(priv); + return err; +} + +static void destroy_device(struct rnbd_srv_dev *dev) +{ + WARN_ONCE(!list_empty(&dev->sess_dev_list), + "Device %s is being destroyed but still in use!\n", + dev->id); + + spin_lock(&dev_lock); + list_del(&dev->list); + spin_unlock(&dev_lock); + + mutex_destroy(&dev->lock); + if (dev->dev_kobj.state_in_sysfs) + /* + * Destroy kobj only if it was really created. + */ + rnbd_srv_destroy_dev_sysfs(dev); + else + kfree(dev); +} + +static void destroy_device_cb(struct kref *kref) +{ + struct rnbd_srv_dev *dev; + + dev = container_of(kref, struct rnbd_srv_dev, kref); + + destroy_device(dev); +} + +static void rnbd_put_srv_dev(struct rnbd_srv_dev *dev) +{ + kref_put(&dev->kref, destroy_device_cb); +} + +void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev) +{ + DECLARE_COMPLETION_ONSTACK(dc); + + xa_erase(&sess_dev->sess->index_idr, sess_dev->device_id); + synchronize_rcu(); + sess_dev->destroy_comp = &dc; + rnbd_put_sess_dev(sess_dev); + wait_for_completion(&dc); /* wait for inflights to drop to zero */ + + rnbd_dev_close(sess_dev->rnbd_dev); + list_del(&sess_dev->sess_list); + mutex_lock(&sess_dev->dev->lock); + list_del(&sess_dev->dev_list); + if (sess_dev->open_flags & FMODE_WRITE) + sess_dev->dev->open_write_cnt--; + mutex_unlock(&sess_dev->dev->lock); + + rnbd_put_srv_dev(sess_dev->dev); + + rnbd_srv_info(sess_dev, "Device closed\n"); + kfree(sess_dev); +} + +static void destroy_sess(struct rnbd_srv_session *srv_sess) +{ + struct rnbd_srv_sess_dev *sess_dev, *tmp; + + if (list_empty(&srv_sess->sess_dev_list)) + goto out; + + mutex_lock(&srv_sess->lock); + list_for_each_entry_safe(sess_dev, tmp, &srv_sess->sess_dev_list, + sess_list) + rnbd_srv_destroy_dev_session_sysfs(sess_dev); + mutex_unlock(&srv_sess->lock); + +out: + xa_destroy(&srv_sess->index_idr); + bioset_exit(&srv_sess->sess_bio_set); + + pr_info("RTRS Session %s disconnected\n", srv_sess->sessname); + + mutex_lock(&sess_lock); + list_del(&srv_sess->list); + mutex_unlock(&sess_lock); + + mutex_destroy(&srv_sess->lock); + kfree(srv_sess); +} + +static int create_sess(struct rtrs_srv *rtrs) +{ + struct rnbd_srv_session *srv_sess; + char sessname[NAME_MAX]; + int err; + + err = rtrs_srv_get_sess_name(rtrs, sessname, sizeof(sessname)); + if (err) { + pr_err("rtrs_srv_get_sess_name(%s): %d\n", sessname, err); + + return err; + } + srv_sess = kzalloc(sizeof(*srv_sess), GFP_KERNEL); + if (!srv_sess) + return -ENOMEM; + + srv_sess->queue_depth = rtrs_srv_get_queue_depth(rtrs); + err = bioset_init(&srv_sess->sess_bio_set, srv_sess->queue_depth, + offsetof(struct rnbd_dev_blk_io, bio), + BIOSET_NEED_BVECS); + if (err) { + pr_err("Allocating srv_session for session %s failed\n", + sessname); + kfree(srv_sess); + return err; + } + + xa_init_flags(&srv_sess->index_idr, XA_FLAGS_ALLOC); + INIT_LIST_HEAD(&srv_sess->sess_dev_list); + mutex_init(&srv_sess->lock); + mutex_lock(&sess_lock); + list_add(&srv_sess->list, &sess_list); + mutex_unlock(&sess_lock); + + srv_sess->rtrs = rtrs; + strlcpy(srv_sess->sessname, sessname, sizeof(srv_sess->sessname)); + + rtrs_srv_set_sess_priv(rtrs, srv_sess); + + return 0; +} + +static int rnbd_srv_link_ev(struct rtrs_srv *rtrs, + enum rtrs_srv_link_ev ev, void *priv) +{ + struct rnbd_srv_session *srv_sess = priv; + + switch (ev) { + case RTRS_SRV_LINK_EV_CONNECTED: + return create_sess(rtrs); + + case RTRS_SRV_LINK_EV_DISCONNECTED: + if (WARN_ON_ONCE(!srv_sess)) + return -EINVAL; + + destroy_sess(srv_sess); + return 0; + + default: + pr_warn("Received unknown RTRS session event %d from session %s\n", + ev, srv_sess->sessname); + return -EINVAL; + } +} + +static int process_msg_close(struct rtrs_srv *rtrs, + struct rnbd_srv_session *srv_sess, + void *data, size_t datalen, const void *usr, + size_t usrlen) +{ + const struct rnbd_msg_close *close_msg = usr; + struct rnbd_srv_sess_dev *sess_dev; + + sess_dev = rnbd_get_sess_dev(le32_to_cpu(close_msg->device_id), + srv_sess); + if (IS_ERR(sess_dev)) + return 0; + + rnbd_put_sess_dev(sess_dev); + mutex_lock(&srv_sess->lock); + rnbd_srv_destroy_dev_session_sysfs(sess_dev); + mutex_unlock(&srv_sess->lock); + return 0; +} + +static int process_msg_open(struct rtrs_srv *rtrs, + struct rnbd_srv_session *srv_sess, + const void *msg, size_t len, + void *data, size_t datalen); + +static int process_msg_sess_info(struct rtrs_srv *rtrs, + struct rnbd_srv_session *srv_sess, + const void *msg, size_t len, + void *data, size_t datalen); + +static int rnbd_srv_rdma_ev(struct rtrs_srv *rtrs, void *priv, + struct rtrs_srv_op *id, int dir, + void *data, size_t datalen, const void *usr, + size_t usrlen) +{ + struct rnbd_srv_session *srv_sess = priv; + const struct rnbd_msg_hdr *hdr = usr; + int ret = 0; + u16 type; + + if (WARN_ON_ONCE(!srv_sess)) + return -ENODEV; + + type = le16_to_cpu(hdr->type); + + switch (type) { + case RNBD_MSG_IO: + return process_rdma(rtrs, srv_sess, id, data, datalen, usr, + usrlen); + case RNBD_MSG_CLOSE: + ret = process_msg_close(rtrs, srv_sess, data, datalen, + usr, usrlen); + break; + case RNBD_MSG_OPEN: + ret = process_msg_open(rtrs, srv_sess, usr, usrlen, + data, datalen); + break; + case RNBD_MSG_SESS_INFO: + ret = process_msg_sess_info(rtrs, srv_sess, usr, usrlen, + data, datalen); + break; + default: + pr_warn("Received unexpected message type %d with dir %d from session %s\n", + type, dir, srv_sess->sessname); + return -EINVAL; + } + + rtrs_srv_resp_rdma(id, ret); + return 0; +} + +static struct rnbd_srv_sess_dev +*rnbd_sess_dev_alloc(struct rnbd_srv_session *srv_sess) +{ + struct rnbd_srv_sess_dev *sess_dev; + int error; + + sess_dev = kzalloc(sizeof(*sess_dev), GFP_KERNEL); + if (!sess_dev) + return ERR_PTR(-ENOMEM); + + error = xa_alloc(&srv_sess->index_idr, &sess_dev->device_id, sess_dev, + xa_limit_32b, GFP_NOWAIT); + if (error < 0) { + pr_warn("Allocating idr failed, err: %d\n", error); + kfree(sess_dev); + return ERR_PTR(error); + } + + return sess_dev; +} + +static struct rnbd_srv_dev *rnbd_srv_init_srv_dev(const char *id) +{ + struct rnbd_srv_dev *dev; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return ERR_PTR(-ENOMEM); + + strlcpy(dev->id, id, sizeof(dev->id)); + kref_init(&dev->kref); + INIT_LIST_HEAD(&dev->sess_dev_list); + mutex_init(&dev->lock); + + return dev; +} + +static struct rnbd_srv_dev * +rnbd_srv_find_or_add_srv_dev(struct rnbd_srv_dev *new_dev) +{ + struct rnbd_srv_dev *dev; + + spin_lock(&dev_lock); + list_for_each_entry(dev, &dev_list, list) { + if (!strncmp(dev->id, new_dev->id, sizeof(dev->id))) { + if (!kref_get_unless_zero(&dev->kref)) + /* + * We lost the race, device is almost dead. + * Continue traversing to find a valid one. + */ + continue; + spin_unlock(&dev_lock); + return dev; + } + } + list_add(&new_dev->list, &dev_list); + spin_unlock(&dev_lock); + + return new_dev; +} + +static int rnbd_srv_check_update_open_perm(struct rnbd_srv_dev *srv_dev, + struct rnbd_srv_session *srv_sess, + enum rnbd_access_mode access_mode) +{ + int ret = -EPERM; + + mutex_lock(&srv_dev->lock); + + switch (access_mode) { + case RNBD_ACCESS_RO: + ret = 0; + break; + case RNBD_ACCESS_RW: + if (srv_dev->open_write_cnt == 0) { + srv_dev->open_write_cnt++; + ret = 0; + } else { + pr_err("Mapping device '%s' for session %s with RW permissions failed. Device already opened as 'RW' by %d client(s), access mode %s.\n", + srv_dev->id, srv_sess->sessname, + srv_dev->open_write_cnt, + rnbd_access_mode_str(access_mode)); + } + break; + case RNBD_ACCESS_MIGRATION: + if (srv_dev->open_write_cnt < 2) { + srv_dev->open_write_cnt++; + ret = 0; + } else { + pr_err("Mapping device '%s' for session %s with migration permissions failed. Device already opened as 'RW' by %d client(s), access mode %s.\n", + srv_dev->id, srv_sess->sessname, + srv_dev->open_write_cnt, + rnbd_access_mode_str(access_mode)); + } + break; + default: + pr_err("Received mapping request for device '%s' on session %s with invalid access mode: %d\n", + srv_dev->id, srv_sess->sessname, access_mode); + ret = -EINVAL; + } + + mutex_unlock(&srv_dev->lock); + + return ret; +} + +static struct rnbd_srv_dev * +rnbd_srv_get_or_create_srv_dev(struct rnbd_dev *rnbd_dev, + struct rnbd_srv_session *srv_sess, + enum rnbd_access_mode access_mode) +{ + int ret; + struct rnbd_srv_dev *new_dev, *dev; + + new_dev = rnbd_srv_init_srv_dev(rnbd_dev->name); + if (IS_ERR(new_dev)) + return new_dev; + + dev = rnbd_srv_find_or_add_srv_dev(new_dev); + if (dev != new_dev) + kfree(new_dev); + + ret = rnbd_srv_check_update_open_perm(dev, srv_sess, access_mode); + if (ret) { + rnbd_put_srv_dev(dev); + return ERR_PTR(ret); + } + + return dev; +} + +static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp, + struct rnbd_srv_sess_dev *sess_dev) +{ + struct rnbd_dev *rnbd_dev = sess_dev->rnbd_dev; + + rsp->hdr.type = cpu_to_le16(RNBD_MSG_OPEN_RSP); + rsp->device_id = + cpu_to_le32(sess_dev->device_id); + rsp->nsectors = + cpu_to_le64(get_capacity(rnbd_dev->bdev->bd_disk)); + rsp->logical_block_size = + cpu_to_le16(bdev_logical_block_size(rnbd_dev->bdev)); + rsp->physical_block_size = + cpu_to_le16(bdev_physical_block_size(rnbd_dev->bdev)); + rsp->max_segments = + cpu_to_le16(rnbd_dev_get_max_segs(rnbd_dev)); + rsp->max_hw_sectors = + cpu_to_le32(rnbd_dev_get_max_hw_sects(rnbd_dev)); + rsp->max_write_same_sectors = + cpu_to_le32(bdev_write_same(rnbd_dev->bdev)); + rsp->max_discard_sectors = + cpu_to_le32(rnbd_dev_get_max_discard_sects(rnbd_dev)); + rsp->discard_granularity = + cpu_to_le32(rnbd_dev_get_discard_granularity(rnbd_dev)); + rsp->discard_alignment = + cpu_to_le32(rnbd_dev_get_discard_alignment(rnbd_dev)); + rsp->secure_discard = + cpu_to_le16(rnbd_dev_get_secure_discard(rnbd_dev)); + rsp->rotational = + !blk_queue_nonrot(bdev_get_queue(rnbd_dev->bdev)); +} + +static struct rnbd_srv_sess_dev * +rnbd_srv_create_set_sess_dev(struct rnbd_srv_session *srv_sess, + const struct rnbd_msg_open *open_msg, + struct rnbd_dev *rnbd_dev, fmode_t open_flags, + struct rnbd_srv_dev *srv_dev) +{ + struct rnbd_srv_sess_dev *sdev = rnbd_sess_dev_alloc(srv_sess); + + if (IS_ERR(sdev)) + return sdev; + + kref_init(&sdev->kref); + + strlcpy(sdev->pathname, open_msg->dev_name, sizeof(sdev->pathname)); + + sdev->rnbd_dev = rnbd_dev; + sdev->sess = srv_sess; + sdev->dev = srv_dev; + sdev->open_flags = open_flags; + sdev->access_mode = open_msg->access_mode; + + return sdev; +} + +static char *rnbd_srv_get_full_path(struct rnbd_srv_session *srv_sess, + const char *dev_name) +{ + char *full_path; + char *a, *b; + + full_path = kmalloc(PATH_MAX, GFP_KERNEL); + if (!full_path) + return ERR_PTR(-ENOMEM); + + /* + * Replace %SESSNAME% with a real session name in order to + * create device namespace. + */ + a = strnstr(dev_search_path, "%SESSNAME%", sizeof(dev_search_path)); + if (a) { + int len = a - dev_search_path; + + len = snprintf(full_path, PATH_MAX, "%.*s/%s/%s", len, + dev_search_path, srv_sess->sessname, dev_name); + if (len >= PATH_MAX) { + pr_err("Too long path: %s, %s, %s\n", + dev_search_path, srv_sess->sessname, dev_name); + kfree(full_path); + return ERR_PTR(-EINVAL); + } + } else { + snprintf(full_path, PATH_MAX, "%s/%s", + dev_search_path, dev_name); + } + + /* eliminitate duplicated slashes */ + a = strchr(full_path, '/'); + b = a; + while (*b != '\0') { + if (*b == '/' && *a == '/') { + b++; + } else { + a++; + *a = *b; + b++; + } + } + a++; + *a = '\0'; + + return full_path; +} + +static int process_msg_sess_info(struct rtrs_srv *rtrs, + struct rnbd_srv_session *srv_sess, + const void *msg, size_t len, + void *data, size_t datalen) +{ + const struct rnbd_msg_sess_info *sess_info_msg = msg; + struct rnbd_msg_sess_info_rsp *rsp = data; + + srv_sess->ver = min_t(u8, sess_info_msg->ver, RNBD_PROTO_VER_MAJOR); + pr_debug("Session %s using protocol version %d (client version: %d, server version: %d)\n", + srv_sess->sessname, srv_sess->ver, + sess_info_msg->ver, RNBD_PROTO_VER_MAJOR); + + rsp->hdr.type = cpu_to_le16(RNBD_MSG_SESS_INFO_RSP); + rsp->ver = srv_sess->ver; + + return 0; +} + +/** + * find_srv_sess_dev() - a dev is already opened by this name + * @srv_sess: the session to search. + * @dev_name: string containing the name of the device. + * + * Return struct rnbd_srv_sess_dev if srv_sess already opened the dev_name + * NULL if the session didn't open the device yet. + */ +static struct rnbd_srv_sess_dev * +find_srv_sess_dev(struct rnbd_srv_session *srv_sess, const char *dev_name) +{ + struct rnbd_srv_sess_dev *sess_dev; + + if (list_empty(&srv_sess->sess_dev_list)) + return NULL; + + list_for_each_entry(sess_dev, &srv_sess->sess_dev_list, sess_list) + if (!strcmp(sess_dev->pathname, dev_name)) + return sess_dev; + + return NULL; +} + +static int process_msg_open(struct rtrs_srv *rtrs, + struct rnbd_srv_session *srv_sess, + const void *msg, size_t len, + void *data, size_t datalen) +{ + int ret; + struct rnbd_srv_dev *srv_dev; + struct rnbd_srv_sess_dev *srv_sess_dev; + const struct rnbd_msg_open *open_msg = msg; + fmode_t open_flags; + char *full_path; + struct rnbd_dev *rnbd_dev; + struct rnbd_msg_open_rsp *rsp = data; + + pr_debug("Open message received: session='%s' path='%s' access_mode=%d\n", + srv_sess->sessname, open_msg->dev_name, + open_msg->access_mode); + open_flags = FMODE_READ; + if (open_msg->access_mode != RNBD_ACCESS_RO) + open_flags |= FMODE_WRITE; + + mutex_lock(&srv_sess->lock); + + srv_sess_dev = find_srv_sess_dev(srv_sess, open_msg->dev_name); + if (srv_sess_dev) + goto fill_response; + + if ((strlen(dev_search_path) + strlen(open_msg->dev_name)) + >= PATH_MAX) { + pr_err("Opening device for session %s failed, device path too long. '%s/%s' is longer than PATH_MAX (%d)\n", + srv_sess->sessname, dev_search_path, open_msg->dev_name, + PATH_MAX); + ret = -EINVAL; + goto reject; + } + if (strstr(open_msg->dev_name, "..")) { + pr_err("Opening device for session %s failed, device path %s contains relative path ..\n", + srv_sess->sessname, open_msg->dev_name); + ret = -EINVAL; + goto reject; + } + full_path = rnbd_srv_get_full_path(srv_sess, open_msg->dev_name); + if (IS_ERR(full_path)) { + ret = PTR_ERR(full_path); + pr_err("Opening device '%s' for client %s failed, failed to get device full path, err: %d\n", + open_msg->dev_name, srv_sess->sessname, ret); + goto reject; + } + + rnbd_dev = rnbd_dev_open(full_path, open_flags, + &srv_sess->sess_bio_set); + if (IS_ERR(rnbd_dev)) { + pr_err("Opening device '%s' on session %s failed, failed to open the block device, err: %ld\n", + full_path, srv_sess->sessname, PTR_ERR(rnbd_dev)); + ret = PTR_ERR(rnbd_dev); + goto free_path; + } + + srv_dev = rnbd_srv_get_or_create_srv_dev(rnbd_dev, srv_sess, + open_msg->access_mode); + if (IS_ERR(srv_dev)) { + pr_err("Opening device '%s' on session %s failed, creating srv_dev failed, err: %ld\n", + full_path, srv_sess->sessname, PTR_ERR(srv_dev)); + ret = PTR_ERR(srv_dev); + goto rnbd_dev_close; + } + + srv_sess_dev = rnbd_srv_create_set_sess_dev(srv_sess, open_msg, + rnbd_dev, open_flags, + srv_dev); + if (IS_ERR(srv_sess_dev)) { + pr_err("Opening device '%s' on session %s failed, creating sess_dev failed, err: %ld\n", + full_path, srv_sess->sessname, PTR_ERR(srv_sess_dev)); + ret = PTR_ERR(srv_sess_dev); + goto srv_dev_put; + } + + /* Create the srv_dev sysfs files if they haven't been created yet. The + * reason to delay the creation is not to create the sysfs files before + * we are sure the device can be opened. + */ + mutex_lock(&srv_dev->lock); + if (!srv_dev->dev_kobj.state_in_sysfs) { + ret = rnbd_srv_create_dev_sysfs(srv_dev, rnbd_dev->bdev, + rnbd_dev->name); + if (ret) { + mutex_unlock(&srv_dev->lock); + rnbd_srv_err(srv_sess_dev, + "Opening device failed, failed to create device sysfs files, err: %d\n", + ret); + goto free_srv_sess_dev; + } + } + + ret = rnbd_srv_create_dev_session_sysfs(srv_sess_dev); + if (ret) { + mutex_unlock(&srv_dev->lock); + rnbd_srv_err(srv_sess_dev, + "Opening device failed, failed to create dev client sysfs files, err: %d\n", + ret); + goto free_srv_sess_dev; + } + + list_add(&srv_sess_dev->dev_list, &srv_dev->sess_dev_list); + mutex_unlock(&srv_dev->lock); + + list_add(&srv_sess_dev->sess_list, &srv_sess->sess_dev_list); + + rnbd_srv_info(srv_sess_dev, "Opened device '%s'\n", srv_dev->id); + + kfree(full_path); + +fill_response: + rnbd_srv_fill_msg_open_rsp(rsp, srv_sess_dev); + mutex_unlock(&srv_sess->lock); + return 0; + +free_srv_sess_dev: + xa_erase(&srv_sess->index_idr, srv_sess_dev->device_id); + synchronize_rcu(); + kfree(srv_sess_dev); +srv_dev_put: + if (open_msg->access_mode != RNBD_ACCESS_RO) { + mutex_lock(&srv_dev->lock); + srv_dev->open_write_cnt--; + mutex_unlock(&srv_dev->lock); + } + rnbd_put_srv_dev(srv_dev); +rnbd_dev_close: + rnbd_dev_close(rnbd_dev); +free_path: + kfree(full_path); +reject: + mutex_unlock(&srv_sess->lock); + return ret; +} + +static struct rtrs_srv_ctx *rtrs_ctx; + +static struct rtrs_srv_ops rtrs_ops; +static int __init rnbd_srv_init_module(void) +{ + int err; + + BUILD_BUG_ON(sizeof(struct rnbd_msg_hdr) != 4); + BUILD_BUG_ON(sizeof(struct rnbd_msg_sess_info) != 36); + BUILD_BUG_ON(sizeof(struct rnbd_msg_sess_info_rsp) != 36); + BUILD_BUG_ON(sizeof(struct rnbd_msg_open) != 264); + BUILD_BUG_ON(sizeof(struct rnbd_msg_close) != 8); + BUILD_BUG_ON(sizeof(struct rnbd_msg_open_rsp) != 56); + rtrs_ops = (struct rtrs_srv_ops) { + .rdma_ev = rnbd_srv_rdma_ev, + .link_ev = rnbd_srv_link_ev, + }; + rtrs_ctx = rtrs_srv_open(&rtrs_ops, port_nr); + if (IS_ERR(rtrs_ctx)) { + err = PTR_ERR(rtrs_ctx); + pr_err("rtrs_srv_open(), err: %d\n", err); + return err; + } + + err = rnbd_srv_create_sysfs_files(); + if (err) { + pr_err("rnbd_srv_create_sysfs_files(), err: %d\n", err); + rtrs_srv_close(rtrs_ctx); + return err; + } + + return 0; +} + +static void __exit rnbd_srv_cleanup_module(void) +{ + rtrs_srv_close(rtrs_ctx); + WARN_ON(!list_empty(&sess_list)); + rnbd_srv_destroy_sysfs_files(); +} + +module_init(rnbd_srv_init_module); +module_exit(rnbd_srv_cleanup_module); From f0aad9baadb5b2933e6f4fbb2fd3ffbdcc35b2cf Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:27 +0200 Subject: [PATCH 349/562] block/rnbd: server: functionality for IO submitting to block dev This provides helper functions for IO submitting to block dev. Link: https://lore.kernel.org/r/20200511135131.27580-22-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Reviewed-by: Bart Van Assche Acked-by: Jens Axboe Signed-off-by: Jason Gunthorpe --- drivers/block/rnbd/rnbd-srv-dev.c | 134 ++++++++++++++++++++++++++++++ drivers/block/rnbd/rnbd-srv-dev.h | 92 ++++++++++++++++++++ 2 files changed, 226 insertions(+) create mode 100644 drivers/block/rnbd/rnbd-srv-dev.c create mode 100644 drivers/block/rnbd/rnbd-srv-dev.h diff --git a/drivers/block/rnbd/rnbd-srv-dev.c b/drivers/block/rnbd/rnbd-srv-dev.c new file mode 100644 index 000000000000..5eddfd29ab64 --- /dev/null +++ b/drivers/block/rnbd/rnbd-srv-dev.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RDMA Network Block Driver + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt + +#include "rnbd-srv-dev.h" +#include "rnbd-log.h" + +struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags, + struct bio_set *bs) +{ + struct rnbd_dev *dev; + int ret; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return ERR_PTR(-ENOMEM); + + dev->blk_open_flags = flags; + dev->bdev = blkdev_get_by_path(path, flags, THIS_MODULE); + ret = PTR_ERR_OR_ZERO(dev->bdev); + if (ret) + goto err; + + dev->blk_open_flags = flags; + bdevname(dev->bdev, dev->name); + dev->ibd_bio_set = bs; + + return dev; + +err: + kfree(dev); + return ERR_PTR(ret); +} + +void rnbd_dev_close(struct rnbd_dev *dev) +{ + blkdev_put(dev->bdev, dev->blk_open_flags); + kfree(dev); +} + +static void rnbd_dev_bi_end_io(struct bio *bio) +{ + struct rnbd_dev_blk_io *io = bio->bi_private; + + rnbd_endio(io->priv, blk_status_to_errno(bio->bi_status)); + bio_put(bio); +} + +/** + * rnbd_bio_map_kern - map kernel address into bio + * @data: pointer to buffer to map + * @bs: bio_set to use. + * @len: length in bytes + * @gfp_mask: allocation flags for bio allocation + * + * Map the kernel address into a bio suitable for io to a block + * device. Returns an error pointer in case of error. + */ +static struct bio *rnbd_bio_map_kern(void *data, struct bio_set *bs, + unsigned int len, gfp_t gfp_mask) +{ + unsigned long kaddr = (unsigned long)data; + unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = kaddr >> PAGE_SHIFT; + const int nr_pages = end - start; + int offset, i; + struct bio *bio; + + bio = bio_alloc_bioset(gfp_mask, nr_pages, bs); + if (!bio) + return ERR_PTR(-ENOMEM); + + offset = offset_in_page(kaddr); + for (i = 0; i < nr_pages; i++) { + unsigned int bytes = PAGE_SIZE - offset; + + if (len <= 0) + break; + + if (bytes > len) + bytes = len; + + if (bio_add_page(bio, virt_to_page(data), bytes, + offset) < bytes) { + /* we don't support partial mappings */ + bio_put(bio); + return ERR_PTR(-EINVAL); + } + + data += bytes; + len -= bytes; + offset = 0; + } + + bio->bi_end_io = bio_put; + return bio; +} + +int rnbd_dev_submit_io(struct rnbd_dev *dev, sector_t sector, void *data, + size_t len, u32 bi_size, enum rnbd_io_flags flags, + short prio, void *priv) +{ + struct rnbd_dev_blk_io *io; + struct bio *bio; + + /* Generate bio with pages pointing to the rdma buffer */ + bio = rnbd_bio_map_kern(data, dev->ibd_bio_set, len, GFP_KERNEL); + if (IS_ERR(bio)) + return PTR_ERR(bio); + + io = container_of(bio, struct rnbd_dev_blk_io, bio); + + io->dev = dev; + io->priv = priv; + + bio->bi_end_io = rnbd_dev_bi_end_io; + bio->bi_private = io; + bio->bi_opf = rnbd_to_bio_flags(flags); + bio->bi_iter.bi_sector = sector; + bio->bi_iter.bi_size = bi_size; + bio_set_prio(bio, prio); + bio_set_dev(bio, dev->bdev); + + submit_bio(bio); + + return 0; +} diff --git a/drivers/block/rnbd/rnbd-srv-dev.h b/drivers/block/rnbd/rnbd-srv-dev.h new file mode 100644 index 000000000000..0f65b09a270e --- /dev/null +++ b/drivers/block/rnbd/rnbd-srv-dev.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * RDMA Network Block Driver + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ +#ifndef RNBD_SRV_DEV_H +#define RNBD_SRV_DEV_H + +#include +#include "rnbd-proto.h" + +struct rnbd_dev { + struct block_device *bdev; + struct bio_set *ibd_bio_set; + fmode_t blk_open_flags; + char name[BDEVNAME_SIZE]; +}; + +struct rnbd_dev_blk_io { + struct rnbd_dev *dev; + void *priv; + /* have to be last member for front_pad usage of bioset_init */ + struct bio bio; +}; + +/** + * rnbd_dev_open() - Open a device + * @flags: open flags + * @bs: bio_set to use during block io, + */ +struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags, + struct bio_set *bs); + +/** + * rnbd_dev_close() - Close a device + */ +void rnbd_dev_close(struct rnbd_dev *dev); + +void rnbd_endio(void *priv, int error); + +static inline int rnbd_dev_get_max_segs(const struct rnbd_dev *dev) +{ + return queue_max_segments(bdev_get_queue(dev->bdev)); +} + +static inline int rnbd_dev_get_max_hw_sects(const struct rnbd_dev *dev) +{ + return queue_max_hw_sectors(bdev_get_queue(dev->bdev)); +} + +static inline int rnbd_dev_get_secure_discard(const struct rnbd_dev *dev) +{ + return blk_queue_secure_erase(bdev_get_queue(dev->bdev)); +} + +static inline int rnbd_dev_get_max_discard_sects(const struct rnbd_dev *dev) +{ + if (!blk_queue_discard(bdev_get_queue(dev->bdev))) + return 0; + + return blk_queue_get_max_sectors(bdev_get_queue(dev->bdev), + REQ_OP_DISCARD); +} + +static inline int rnbd_dev_get_discard_granularity(const struct rnbd_dev *dev) +{ + return bdev_get_queue(dev->bdev)->limits.discard_granularity; +} + +static inline int rnbd_dev_get_discard_alignment(const struct rnbd_dev *dev) +{ + return bdev_get_queue(dev->bdev)->limits.discard_alignment; +} + +/** + * rnbd_dev_submit_io() - Submit an I/O to the disk + * @dev: device to that the I/O is submitted + * @sector: address to read/write data to + * @data: I/O data to write or buffer to read I/O date into + * @len: length of @data + * @bi_size: Amount of data that will be read/written + * @prio: IO priority + * @priv: private data passed to @io_fn + */ +int rnbd_dev_submit_io(struct rnbd_dev *dev, sector_t sector, void *data, + size_t len, u32 bi_size, enum rnbd_io_flags flags, + short prio, void *priv); + +#endif /* RNBD_SRV_DEV_H */ From 8cee532f469bbcdb6ac0ab161ebff36fbc6439d7 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:28 +0200 Subject: [PATCH 350/562] block/rnbd: server: sysfs interface functions This is the sysfs interface to rnbd mapped devices on server side: /sys/class/rnbd-server/ctl/devices// |- block_dev | *** link pointing to the corresponding block device sysfs entry | |- sessions// | *** sessions directory | |- read_only | *** is devices mapped as read only | |- mapping_path *** relative device path provided by the client during mapping Link: https://lore.kernel.org/r/20200511135131.27580-23-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Reviewed-by: Bart Van Assche Acked-by: Jens Axboe Signed-off-by: Jason Gunthorpe --- drivers/block/rnbd/rnbd-srv-sysfs.c | 215 ++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100644 drivers/block/rnbd/rnbd-srv-sysfs.c diff --git a/drivers/block/rnbd/rnbd-srv-sysfs.c b/drivers/block/rnbd/rnbd-srv-sysfs.c new file mode 100644 index 000000000000..106775c074d1 --- /dev/null +++ b/drivers/block/rnbd/rnbd-srv-sysfs.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RDMA Network Block Driver + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rnbd-srv.h" + +static struct device *rnbd_dev; +static struct class *rnbd_dev_class; +static struct kobject *rnbd_devs_kobj; + +static void rnbd_srv_dev_release(struct kobject *kobj) +{ + struct rnbd_srv_dev *dev; + + dev = container_of(kobj, struct rnbd_srv_dev, dev_kobj); + + kfree(dev); +} + +static struct kobj_type dev_ktype = { + .sysfs_ops = &kobj_sysfs_ops, + .release = rnbd_srv_dev_release +}; + +int rnbd_srv_create_dev_sysfs(struct rnbd_srv_dev *dev, + struct block_device *bdev, + const char *dev_name) +{ + struct kobject *bdev_kobj; + int ret; + + ret = kobject_init_and_add(&dev->dev_kobj, &dev_ktype, + rnbd_devs_kobj, dev_name); + if (ret) + return ret; + + dev->dev_sessions_kobj = kobject_create_and_add("sessions", + &dev->dev_kobj); + if (!dev->dev_sessions_kobj) + goto put_dev_kobj; + + bdev_kobj = &disk_to_dev(bdev->bd_disk)->kobj; + ret = sysfs_create_link(&dev->dev_kobj, bdev_kobj, "block_dev"); + if (ret) + goto put_sess_kobj; + + return 0; + +put_sess_kobj: + kobject_put(dev->dev_sessions_kobj); +put_dev_kobj: + kobject_put(&dev->dev_kobj); + return ret; +} + +void rnbd_srv_destroy_dev_sysfs(struct rnbd_srv_dev *dev) +{ + sysfs_remove_link(&dev->dev_kobj, "block_dev"); + kobject_del(dev->dev_sessions_kobj); + kobject_put(dev->dev_sessions_kobj); + kobject_del(&dev->dev_kobj); + kobject_put(&dev->dev_kobj); +} + +static ssize_t read_only_show(struct kobject *kobj, struct kobj_attribute *attr, + char *page) +{ + struct rnbd_srv_sess_dev *sess_dev; + + sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj); + + return scnprintf(page, PAGE_SIZE, "%d\n", + !(sess_dev->open_flags & FMODE_WRITE)); +} + +static struct kobj_attribute rnbd_srv_dev_session_ro_attr = + __ATTR_RO(read_only); + +static ssize_t access_mode_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *page) +{ + struct rnbd_srv_sess_dev *sess_dev; + + sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj); + + return scnprintf(page, PAGE_SIZE, "%s\n", + rnbd_access_mode_str(sess_dev->access_mode)); +} + +static struct kobj_attribute rnbd_srv_dev_session_access_mode_attr = + __ATTR_RO(access_mode); + +static ssize_t mapping_path_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + struct rnbd_srv_sess_dev *sess_dev; + + sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj); + + return scnprintf(page, PAGE_SIZE, "%s\n", sess_dev->pathname); +} + +static struct kobj_attribute rnbd_srv_dev_session_mapping_path_attr = + __ATTR_RO(mapping_path); + +static struct attribute *rnbd_srv_default_dev_sessions_attrs[] = { + &rnbd_srv_dev_session_access_mode_attr.attr, + &rnbd_srv_dev_session_ro_attr.attr, + &rnbd_srv_dev_session_mapping_path_attr.attr, + NULL, +}; + +static struct attribute_group rnbd_srv_default_dev_session_attr_group = { + .attrs = rnbd_srv_default_dev_sessions_attrs, +}; + +void rnbd_srv_destroy_dev_session_sysfs(struct rnbd_srv_sess_dev *sess_dev) +{ + sysfs_remove_group(&sess_dev->kobj, + &rnbd_srv_default_dev_session_attr_group); + + kobject_del(&sess_dev->kobj); + kobject_put(&sess_dev->kobj); +} + +static void rnbd_srv_sess_dev_release(struct kobject *kobj) +{ + struct rnbd_srv_sess_dev *sess_dev; + + sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj); + rnbd_destroy_sess_dev(sess_dev); +} + +static struct kobj_type rnbd_srv_sess_dev_ktype = { + .sysfs_ops = &kobj_sysfs_ops, + .release = rnbd_srv_sess_dev_release, +}; + +int rnbd_srv_create_dev_session_sysfs(struct rnbd_srv_sess_dev *sess_dev) +{ + int ret; + + ret = kobject_init_and_add(&sess_dev->kobj, &rnbd_srv_sess_dev_ktype, + sess_dev->dev->dev_sessions_kobj, "%s", + sess_dev->sess->sessname); + if (ret) + return ret; + + ret = sysfs_create_group(&sess_dev->kobj, + &rnbd_srv_default_dev_session_attr_group); + if (ret) + goto err; + + return 0; + +err: + kobject_put(&sess_dev->kobj); + + return ret; +} + +int rnbd_srv_create_sysfs_files(void) +{ + int err; + + rnbd_dev_class = class_create(THIS_MODULE, "rnbd-server"); + if (IS_ERR(rnbd_dev_class)) + return PTR_ERR(rnbd_dev_class); + + rnbd_dev = device_create(rnbd_dev_class, NULL, + MKDEV(0, 0), NULL, "ctl"); + if (IS_ERR(rnbd_dev)) { + err = PTR_ERR(rnbd_dev); + goto cls_destroy; + } + rnbd_devs_kobj = kobject_create_and_add("devices", &rnbd_dev->kobj); + if (!rnbd_devs_kobj) { + err = -ENOMEM; + goto dev_destroy; + } + + return 0; + +dev_destroy: + device_destroy(rnbd_dev_class, MKDEV(0, 0)); +cls_destroy: + class_destroy(rnbd_dev_class); + + return err; +} + +void rnbd_srv_destroy_sysfs_files(void) +{ + kobject_del(rnbd_devs_kobj); + kobject_put(rnbd_devs_kobj); + device_destroy(rnbd_dev_class, MKDEV(0, 0)); + class_destroy(rnbd_dev_class); +} From bc01885342e193e7943d86ccbd7bc3e8fee50a68 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:29 +0200 Subject: [PATCH 351/562] block/rnbd: include client and server modules into kernel compilation Add rnbd Makefile, Kconfig and also corresponding lines into upper block layer files. Link: https://lore.kernel.org/r/20200511135131.27580-24-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Reviewed-by: Bart Van Assche Acked-by: Jens Axboe Signed-off-by: Jason Gunthorpe --- drivers/block/Kconfig | 2 ++ drivers/block/Makefile | 1 + drivers/block/rnbd/Kconfig | 28 ++++++++++++++++++++++++++++ drivers/block/rnbd/Makefile | 15 +++++++++++++++ 4 files changed, 46 insertions(+) create mode 100644 drivers/block/rnbd/Kconfig create mode 100644 drivers/block/rnbd/Makefile diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 025b1b77b11a..084b9efcefca 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -458,4 +458,6 @@ config BLK_DEV_RSXX To compile this driver as a module, choose M here: the module will be called rsxx. +source "drivers/block/rnbd/Kconfig" + endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 795facd8cf19..e1f63117ee94 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ obj-$(CONFIG_ZRAM) += zram/ +obj-$(CONFIG_BLK_DEV_RNBD) += rnbd/ obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o null_blk-objs := null_blk_main.o diff --git a/drivers/block/rnbd/Kconfig b/drivers/block/rnbd/Kconfig new file mode 100644 index 000000000000..4b6d3d816d1f --- /dev/null +++ b/drivers/block/rnbd/Kconfig @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +config BLK_DEV_RNBD + bool + +config BLK_DEV_RNBD_CLIENT + tristate "RDMA Network Block Device driver client" + depends on INFINIBAND_RTRS_CLIENT + select BLK_DEV_RNBD + help + RNBD client is a network block device driver using rdma transport. + + RNBD client allows for mapping of a remote block devices over + RTRS protocol from a target system where RNBD server is running. + + If unsure, say N. + +config BLK_DEV_RNBD_SERVER + tristate "RDMA Network Block Device driver server" + depends on INFINIBAND_RTRS_SERVER + select BLK_DEV_RNBD + help + RNBD server is the server side of RNBD using rdma transport. + + RNBD server allows for exporting local block devices to a remote client + over RTRS protocol. + + If unsure, say N. diff --git a/drivers/block/rnbd/Makefile b/drivers/block/rnbd/Makefile new file mode 100644 index 000000000000..5bb1a7ad1ada --- /dev/null +++ b/drivers/block/rnbd/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +ccflags-y := -I$(srctree)/drivers/infiniband/ulp/rtrs + +rnbd-client-y := rnbd-clt.o \ + rnbd-clt-sysfs.o \ + rnbd-common.o + +rnbd-server-y := rnbd-common.o \ + rnbd-srv.o \ + rnbd-srv-dev.o \ + rnbd-srv-sysfs.o + +obj-$(CONFIG_BLK_DEV_RNBD_CLIENT) += rnbd-client.o +obj-$(CONFIG_BLK_DEV_RNBD_SERVER) += rnbd-server.o From aa4d16e44f607caccaa697fcb29f2c94672f08d5 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:30 +0200 Subject: [PATCH 352/562] block/rnbd: a bit of documentation README with description of major sysfs entries, sysfs documentation are moved to ABI dir as Bart suggested. Link: https://lore.kernel.org/r/20200511135131.27580-25-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Reviewed-by: Bart Van Assche Acked-by: Jens Axboe Signed-off-by: Jason Gunthorpe --- Documentation/ABI/testing/sysfs-block-rnbd | 46 ++++++++ .../ABI/testing/sysfs-class-rnbd-client | 111 ++++++++++++++++++ .../ABI/testing/sysfs-class-rnbd-server | 50 ++++++++ drivers/block/rnbd/README | 92 +++++++++++++++ 4 files changed, 299 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-block-rnbd create mode 100644 Documentation/ABI/testing/sysfs-class-rnbd-client create mode 100644 Documentation/ABI/testing/sysfs-class-rnbd-server create mode 100644 drivers/block/rnbd/README diff --git a/Documentation/ABI/testing/sysfs-block-rnbd b/Documentation/ABI/testing/sysfs-block-rnbd new file mode 100644 index 000000000000..8f070b47f361 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-block-rnbd @@ -0,0 +1,46 @@ +What: /sys/block/rnbd/rnbd/unmap_device +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: To unmap a volume, "normal" or "force" has to be written to: + /sys/block/rnbd/rnbd/unmap_device + + When "normal" is used, the operation will fail with EBUSY if any process + is using the device. When "force" is used, the device is also unmapped + when device is in use. All I/Os that are in progress will fail. + + Example: + + # echo "normal" > /sys/block/rnbd0/rnbd/unmap_device + +What: /sys/block/rnbd/rnbd/state +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: The file contains the current state of the block device. The state file + returns "open" when the device is successfully mapped from the server + and accepting I/O requests. When the connection to the server gets + disconnected in case of an error (e.g. link failure), the state file + returns "closed" and all I/O requests submitted to it will fail with -EIO. + +What: /sys/block/rnbd/rnbd/session +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: RNBD uses RTRS session to transport the data between client and + server. The entry "session" contains the name of the session, that + was used to establish the RTRS session. It's the same name that + was passed as server parameter to the map_device entry. + +What: /sys/block/rnbd/rnbd/mapping_path +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: Contains the path that was passed as "device_path" to the map_device + operation. + +What: /sys/block/rnbd/rnbd/access_mode +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: Contains the device access mode: ro, rw or migration. diff --git a/Documentation/ABI/testing/sysfs-class-rnbd-client b/Documentation/ABI/testing/sysfs-class-rnbd-client new file mode 100644 index 000000000000..c084f203b41e --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-rnbd-client @@ -0,0 +1,111 @@ +What: /sys/class/rnbd-client +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: Provide information about RNBD-client. + All sysfs files that are not read-only provide the usage information on read: + + Example: + # cat /sys/class/rnbd-client/ctl/map_device + + > Usage: echo "sessname= path=<[srcaddr,]dstaddr> + > [path=<[srcaddr,]dstaddr>] device_path= + > [access_mode=] > map_device + > + > addr ::= [ ip: | ip: | gid: ] + +What: /sys/class/rnbd-client/ctl/map_device +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: Expected format is the following: + + sessname= + path=<[srcaddr,]dstaddr> [path=<[srcaddr,]dstaddr> ...] + device_path= + [access_mode=] + + Where: + + sessname: accepts a string not bigger than 256 chars, which identifies + a given session on the client and on the server. + I.e. "clt_hostname-srv_hostname" could be a natural choice. + + path: describes a connection between the client and the server by + specifying destination and, when required, the source address. + The addresses are to be provided in the following format: + + ip: + ip: + gid: + + for example: + + path=ip:10.0.0.66 + The single addr is treated as the destination. + The connection will be established to this server from any client IP address. + + path=ip:10.0.0.66,ip:10.0.1.66 + First addr is the source address and the second is the destination. + + If multiple "path=" options are specified multiple connection + will be established and data will be sent according to + the selected multipath policy (see RTRS mp_policy sysfs entry description). + + device_path: Path to the block device on the server side. Path is specified + relative to the directory on server side configured in the + 'dev_search_path' module parameter of the rnbd_server. + The rnbd_server prepends the received from client + with and tries to open the + / block device. On success, + a /dev/rnbd device file, a /sys/block/rnbd_client/rnbd/ + directory and an entry in /sys/class/rnbd-client/ctl/devices + will be created. + + If 'dev_search_path' contains '%SESSNAME%', then each session can + have different devices namespace, e.g. server was configured with + the following parameter "dev_search_path=/run/rnbd-devs/%SESSNAME%", + client has this string "sessname=blya device_path=sda", then server + will try to open: /run/rnbd-devs/blya/sda. + + access_mode: the access_mode parameter specifies if the device is to be + mapped as "ro" read-only or "rw" read-write. The server allows + a device to be exported in rw mode only once. The "migration" + access mode has to be specified if a second mapping in read-write + mode is desired. + + By default "rw" is used. + + Exit Codes: + + If the device is already mapped it will fail with EEXIST. If the input + has an invalid format it will return EINVAL. If the device path cannot + be found on the server, it will fail with ENOENT. + + Finding device file after mapping + --------------------------------- + + After mapping, the device file can be found by: + o The symlink /sys/class/rnbd-client/ctl/devices/ + points to /sys/block/. The last part of the symlink destination + is the same as the device name. By extracting the last part of the + path the path to the device /dev/ can be build. + + o /dev/block/$(cat /sys/class/rnbd-client/ctl/devices//dev) + + How to find the of the device is described on the next + section. + +What: /sys/class/rnbd-client/ctl/devices/ +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: For each device mapped on the client a new symbolic link is created as + /sys/class/rnbd-client/ctl/devices/, which points + to the block device created by rnbd (/sys/block/rnbd/). + The of each device is created as follows: + + - If the 'device_path' provided during mapping contains slashes ("/"), + they are replaced by exclamation mark ("!") and used as as the + . Otherwise, the will be the same as the + "device_path" provided. diff --git a/Documentation/ABI/testing/sysfs-class-rnbd-server b/Documentation/ABI/testing/sysfs-class-rnbd-server new file mode 100644 index 000000000000..ba60a90c0e45 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-rnbd-server @@ -0,0 +1,50 @@ +What: /sys/class/rnbd-server +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: provide information about RNBD-server. + +What: /sys/class/rnbd-server/ctl/ +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: When a client maps a device, a directory entry with the name of the + block device is created under /sys/class/rnbd-server/ctl/devices/. + +What: /sys/class/rnbd-server/ctl/devices//block_dev +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: Is a symlink to the sysfs entry of the exported device. + + Example: + block_dev -> ../../../../class/block/ram0 + +What: /sys/class/rnbd-server/ctl/devices//sessions/ +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: For each client a particular device is exported to, following directory will be + created: + + /sys/class/rnbd-server/ctl/devices//sessions// + + When the device is unmapped by that client, the directory will be removed. + +What: /sys/class/rnbd-server/ctl/devices//sessions//read_only +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: Contains '1' if device is mapped read-only, otherwise '0'. + +What: /sys/class/rnbd-server/ctl/devices//sessions//mapping_path +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: Contains the relative device path provided by the user during mapping. + +What: /sys/class/rnbd-server/ctl/devices//sessions//access_mode +Date: Feb 2020 +KernelVersion: 5.7 +Contact: Jack Wang Danil Kipnis +Description: Contains the device access mode: ro, rw or migration. diff --git a/drivers/block/rnbd/README b/drivers/block/rnbd/README new file mode 100644 index 000000000000..1773c0aa0bd4 --- /dev/null +++ b/drivers/block/rnbd/README @@ -0,0 +1,92 @@ +******************************** +RDMA Network Block Device (RNBD) +******************************** + +Introduction +------------ + +RNBD (RDMA Network Block Device) is a pair of kernel modules +(client and server) that allow for remote access of a block device on +the server over RTRS protocol using the RDMA (InfiniBand, RoCE, iWARP) +transport. After being mapped, the remote block devices can be accessed +on the client side as local block devices. + +I/O is transferred between client and server by the RTRS transport +modules. The administration of RNBD and RTRS modules is done via +sysfs entries. + +Requirements +------------ + + RTRS kernel modules + +Quick Start +----------- + +Server side: + # modprobe rnbd_server + +Client side: + # modprobe rnbd_client + # echo "sessname=blya path=ip:10.50.100.66 device_path=/dev/ram0" > \ + /sys/devices/virtual/rnbd-client/ctl/map_device + + Where "sessname=" is a session name, a string to identify the session + on client and on server sides; "path=" is a destination IP address or + a pair of a source and a destination IPs, separated by comma. Multiple + "path=" options can be specified in order to use multipath (see RTRS + description for details); "device_path=" is the block device to be + mapped from the server side. After the session to the server machine is + established, the mapped device will appear on the client side under + /dev/rnbd. + + +RNBD-Server Module Parameters +============================= + +dev_search_path +--------------- + +When a device is mapped from the client, the server generates the path +to the block device on the server side by concatenating dev_search_path +and the "device_path" that was specified in the map_device operation. + +The default dev_search_path is: "/". + +dev_search_path option can also contain %SESSNAME% in order to provide +different device namespaces for different sessions. See "device_path" +option for details. + +============================ +Protocol (rnbd/rnbd-proto.h) +============================ + +1. Before mapping first device from a given server, client sends an +RNBD_MSG_SESS_INFO to the server. Server responds with +RNBD_MSG_SESS_INFO_RSP. Currently the messages only contain the protocol +version for backward compatibility. + +2. Client requests to open a device by sending RNBD_MSG_OPEN message. This +contains the path to the device and access mode (read-only or writable). +Server responds to the message with RNBD_MSG_OPEN_RSP. This contains +a 32 bit device id to be used for IOs and device "geometry" related +information: side, max_hw_sectors, etc. + +3. Client attaches RNBD_MSG_IO to each IO message send to a device. This +message contains device id, provided by server in his rnbd_msg_open_rsp, +sector to be accessed, read-write flags and bi_size. + +4. Client closes a device by sending RNBD_MSG_CLOSE which contains only the +device id provided by the server. + +========================================= +Contributors List(in alphabetical order) +========================================= +Danil Kipnis +Fabian Holler +Guoqing Jiang +Jack Wang +Kleber Souza +Lutz Pogrell +Milind Dumbare +Roman Penyaev From f11e0ec55f0c80ff47693af2150bad5db0e20387 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 11 May 2020 15:51:31 +0200 Subject: [PATCH 353/562] MAINTAINERS: Add maintainers for RNBD/RTRS modules Danil and I will maintain RNBD/RTRS modules. Link: https://lore.kernel.org/r/20200511135131.27580-26-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Reviewed-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- MAINTAINERS | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index e64e5db31497..8031a923e5a5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14448,6 +14448,13 @@ F: arch/riscv/ N: riscv K: riscv +RNBD BLOCK DRIVERS +M: Danil Kipnis +M: Jack Wang +L: linux-block@vger.kernel.org +S: Maintained +F: drivers/block/rnbd/ + ROCCAT DRIVERS M: Stefan Achatz S: Maintained @@ -14576,6 +14583,13 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/jes/linux.git rtl8xxxu-devel F: drivers/net/wireless/realtek/rtl8xxxu/ +RTRS TRANSPORT DRIVERS +M: Danil Kipnis +M: Jack Wang +L: linux-rdma@vger.kernel.org +S: Maintained +F: drivers/infiniband/ulp/rtrs/ + RXRPC SOCKETS (AF_RXRPC) M: David Howells L: linux-afs@lists.infradead.org From b19a530b002fabdd93da62504b9cb0778447e8e2 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 6 May 2020 11:24:38 +0300 Subject: [PATCH 354/562] RDMA/uverbs: Cleanup wq/srq context usage from uverbs layer Both wq_context and srq_context are some leftover from the past in uverbs layer, they are not really in use, drop them. Link: https://lore.kernel.org/r/20200506082444.14502-5-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 060b4ebbd2ba..dc8fe1a4eba3 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2954,7 +2954,6 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) wq_init_attr.cq = cq; wq_init_attr.max_sge = cmd.max_sge; wq_init_attr.max_wr = cmd.max_wr; - wq_init_attr.wq_context = attrs->ufile; wq_init_attr.wq_type = cmd.wq_type; wq_init_attr.event_handler = ib_uverbs_wq_event_handler; wq_init_attr.create_flags = cmd.create_flags; @@ -2972,7 +2971,6 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) wq->cq = cq; wq->pd = pd; wq->device = pd->device; - wq->wq_context = wq_init_attr.wq_context; atomic_set(&wq->usecnt, 0); atomic_inc(&pd->usecnt); atomic_inc(&cq->usecnt); @@ -3441,7 +3439,6 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, } attr.event_handler = ib_uverbs_srq_event_handler; - attr.srq_context = attrs->ufile; attr.srq_type = cmd->srq_type; attr.attr.max_wr = cmd->max_wr; attr.attr.max_sge = cmd->max_sge; @@ -3460,7 +3457,6 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, srq->srq_type = cmd->srq_type; srq->uobject = obj; srq->event_handler = attr.event_handler; - srq->srq_context = attr.srq_context; ret = pd->device->ops.create_srq(srq, &attr, udata); if (ret) From dbd67252869ba58d086edfa14113e10f8059b97e Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 6 May 2020 11:24:42 +0300 Subject: [PATCH 355/562] RDMA/uverbs: Fix create WQ to use the given user handle Fix create WQ to use the given user handle, in addition dropped some duplicated code from this flow. Fixes: fd3c7904db6e ("IB/core: Change idr objects to use the new schema") Fixes: f213c0527210 ("IB/uverbs: Add WQ support") Link: https://lore.kernel.org/r/20200506082444.14502-9-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index dc8fe1a4eba3..e03f3a43996b 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2958,6 +2958,7 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) wq_init_attr.event_handler = ib_uverbs_wq_event_handler; wq_init_attr.create_flags = cmd.create_flags; INIT_LIST_HEAD(&obj->uevent.event_list); + obj->uevent.uobject.user_handle = cmd.user_handle; wq = pd->device->ops.create_wq(pd, &wq_init_attr, &attrs->driver_udata); if (IS_ERR(wq)) { @@ -2974,8 +2975,6 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) atomic_set(&wq->usecnt, 0); atomic_inc(&pd->usecnt); atomic_inc(&cq->usecnt); - wq->uobject = obj; - obj->uevent.uobject.object = wq; memset(&resp, 0, sizeof(resp)); resp.wq_handle = obj->uevent.uobject.id; From b0810b037de0b62a3c6e3abfc123fe2734335f53 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 6 May 2020 11:24:39 +0300 Subject: [PATCH 356/562] RDMA/core: Consolidate ib_create_srq flows The uverbs layer largely duplicate the code in ib_create_srq(), with the slight difference that it passes in a udata. Move all the code together into ib_create_srq_user() and provide an inline for kernel users, similar to other create calls. Link: https://lore.kernel.org/r/20200506082444.14502-6-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 40 +++++----------------------- drivers/infiniband/core/verbs.c | 29 +++++++++++++++----- include/rdma/ib_verbs.h | 27 +++++++++---------- 3 files changed, 40 insertions(+), 56 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index e03f3a43996b..d5642bcf93ee 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3444,38 +3444,15 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, attr.attr.srq_limit = cmd->srq_limit; INIT_LIST_HEAD(&obj->uevent.event_list); + obj->uevent.uobject.user_handle = cmd->user_handle; - srq = rdma_zalloc_drv_obj(ib_dev, ib_srq); - if (!srq) { - ret = -ENOMEM; - goto err_put; + srq = ib_create_srq_user(pd, &attr, obj, udata); + if (IS_ERR(srq)) { + ret = PTR_ERR(srq); + goto err_put_pd; } - srq->device = pd->device; - srq->pd = pd; - srq->srq_type = cmd->srq_type; - srq->uobject = obj; - srq->event_handler = attr.event_handler; - - ret = pd->device->ops.create_srq(srq, &attr, udata); - if (ret) - goto err_free; - - if (ib_srq_has_cq(cmd->srq_type)) { - srq->ext.cq = attr.ext.cq; - atomic_inc(&attr.ext.cq->usecnt); - } - - if (cmd->srq_type == IB_SRQT_XRC) { - srq->ext.xrc.xrcd = attr.ext.xrc.xrcd; - atomic_inc(&attr.ext.xrc.xrcd->usecnt); - } - - atomic_inc(&pd->usecnt); - atomic_set(&srq->usecnt, 0); - obj->uevent.uobject.object = srq; - obj->uevent.uobject.user_handle = cmd->user_handle; memset(&resp, 0, sizeof resp); resp.srq_handle = obj->uevent.uobject.id; @@ -3501,13 +3478,8 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, err_copy: ib_destroy_srq_user(srq, uverbs_get_cleared_udata(attrs)); - /* It was released in ib_destroy_srq_user */ - srq = NULL; -err_free: - kfree(srq); -err_put: +err_put_pd: uobj_put_obj_read(pd); - err_put_cq: if (ib_srq_has_cq(cmd->srq_type)) rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject, diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index bf0249f76ae9..e2c9430a3ff1 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -981,15 +981,29 @@ EXPORT_SYMBOL(rdma_destroy_ah_user); /* Shared receive queues */ -struct ib_srq *ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *srq_init_attr) +/** + * ib_create_srq_user - Creates a SRQ associated with the specified protection + * domain. + * @pd: The protection domain associated with the SRQ. + * @srq_init_attr: A list of initial attributes required to create the + * SRQ. If SRQ creation succeeds, then the attributes are updated to + * the actual capabilities of the created SRQ. + * @uobject - uobject pointer if this is not a kernel SRQ + * @udata - udata pointer if this is not a kernel SRQ + * + * srq_attr->max_wr and srq_attr->max_sge are read the determine the + * requested size of the SRQ, and set to the actual values allocated + * on return. If ib_create_srq() succeeds, then max_wr and max_sge + * will always be at least as large as the requested values. + */ +struct ib_srq *ib_create_srq_user(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_usrq_object *uobject, + struct ib_udata *udata) { struct ib_srq *srq; int ret; - if (!pd->device->ops.create_srq) - return ERR_PTR(-EOPNOTSUPP); - srq = rdma_zalloc_drv_obj(pd->device, ib_srq); if (!srq) return ERR_PTR(-ENOMEM); @@ -999,6 +1013,7 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, srq->event_handler = srq_init_attr->event_handler; srq->srq_context = srq_init_attr->srq_context; srq->srq_type = srq_init_attr->srq_type; + srq->uobject = uobject; if (ib_srq_has_cq(srq->srq_type)) { srq->ext.cq = srq_init_attr->ext.cq; @@ -1010,7 +1025,7 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, } atomic_inc(&pd->usecnt); - ret = pd->device->ops.create_srq(srq, srq_init_attr, NULL); + ret = pd->device->ops.create_srq(srq, srq_init_attr, udata); if (ret) { atomic_dec(&srq->pd->usecnt); if (srq->srq_type == IB_SRQT_XRC) @@ -1023,7 +1038,7 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, return srq; } -EXPORT_SYMBOL(ib_create_srq); +EXPORT_SYMBOL(ib_create_srq_user); int ib_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4c488cade70f..db58f11552f1 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3559,21 +3559,18 @@ static inline int rdma_destroy_ah(struct ib_ah *ah, u32 flags) return rdma_destroy_ah_user(ah, flags, NULL); } -/** - * ib_create_srq - Creates a SRQ associated with the specified protection - * domain. - * @pd: The protection domain associated with the SRQ. - * @srq_init_attr: A list of initial attributes required to create the - * SRQ. If SRQ creation succeeds, then the attributes are updated to - * the actual capabilities of the created SRQ. - * - * srq_attr->max_wr and srq_attr->max_sge are read the determine the - * requested size of the SRQ, and set to the actual values allocated - * on return. If ib_create_srq() succeeds, then max_wr and max_sge - * will always be at least as large as the requested values. - */ -struct ib_srq *ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *srq_init_attr); +struct ib_srq *ib_create_srq_user(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_usrq_object *uobject, + struct ib_udata *udata); +static inline struct ib_srq * +ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr) +{ + if (!pd->device->ops.create_srq) + return ERR_PTR(-EOPNOTSUPP); + + return ib_create_srq_user(pd, srq_init_attr, NULL, NULL); +} /** * ib_modify_srq - Modifies the attributes for the specified SRQ. From a8f5c1f1a5c80e5a4bde5eaa1de645d72c562da6 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Mon, 11 May 2020 15:37:09 +0800 Subject: [PATCH 357/562] RDMA/srpt: Add a newline when printing parameter 'srpt_service_guid' by sysfs When I cat module parameter 'srpt_service_guid', it displays as follows. It is better to add a newline for easy reading. [root@hulk-202 ~]# cat /sys/module/ib_srpt/parameters/srpt_service_guid 0x0205cdfffe8346b9[root@hulk-202 ~]# Link: https://lore.kernel.org/r/1589182629-27743-1-git-send-email-wangxiongfeng2@huawei.com Signed-off-by: Xiongfeng Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 7ed38d1cb997..63056af5337c 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -81,7 +81,7 @@ MODULE_PARM_DESC(srpt_srq_size, static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp) { - return sprintf(buffer, "0x%016llx", *(u64 *)kp->arg); + return sprintf(buffer, "0x%016llx\n", *(u64 *)kp->arg); } module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid, 0444); From daeee976904c0b7326eb4c033df7b28d4b726177 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 13 May 2020 12:53:04 +0300 Subject: [PATCH 358/562] RDMA/mlx5: Update mlx5_ib driver name Current description doesn't include new devices, change it by updating to have more generic description and remove DRIVER_NAME and DRIVER_VERSION defines. Link: https://lore.kernel.org/r/20200513095304.210240-1-leon@kernel.org Signed-off-by: Shay Drory Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 38bf3841741c..26f0b39c7f74 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -72,17 +72,10 @@ #define UVERBS_MODULE_NAME mlx5_ib #include -#define DRIVER_NAME "mlx5_ib" -#define DRIVER_VERSION "5.0-0" - MODULE_AUTHOR("Eli Cohen "); -MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver"); +MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) IB driver"); MODULE_LICENSE("Dual BSD/GPL"); -static char mlx5_version[] = - DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" - DRIVER_VERSION "\n"; - struct mlx5_ib_event_work { struct work_struct work; union { @@ -7315,8 +7308,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) int port_type_cap; int num_ports; - printk_once(KERN_INFO "%s", mlx5_version); - if (MLX5_ESWITCH_MANAGER(mdev) && mlx5_ib_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) { if (!mlx5_core_mp_enabled(mdev)) From 23bbd5818e2b0d265aa1835e66f5055f63a8fa4c Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 14 May 2020 14:47:20 +0300 Subject: [PATCH 359/562] RDMA/srpt: Fix disabling device management Avoid disabling device management for devices that don't support Management datagrams (MADs) by checking if the "mad_agent" pointer is initialized before calling ib_modify_port, also fix the error flow in srpt_refresh_port() to disable device management if ib_register_mad_agent() fail. Fixes: 09f8a1486dca ("RDMA/srpt: Fix handling of SR-IOV and iWARP ports") Link: https://lore.kernel.org/r/20200514114720.141139-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 63056af5337c..a294630f2100 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -607,6 +607,11 @@ static int srpt_refresh_port(struct srpt_port *sport) dev_name(&sport->sdev->device->dev), sport->port, PTR_ERR(sport->mad_agent)); sport->mad_agent = NULL; + memset(&port_modify, 0, sizeof(port_modify)); + port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; + ib_modify_port(sport->sdev->device, sport->port, 0, + &port_modify); + } } @@ -630,9 +635,8 @@ static void srpt_unregister_mad_agent(struct srpt_device *sdev) for (i = 1; i <= sdev->device->phys_port_cnt; i++) { sport = &sdev->port[i - 1]; WARN_ON(sport->port != i); - if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0) - pr_err("disabling MAD processing failed.\n"); if (sport->mad_agent) { + ib_modify_port(sdev->device, i, 0, &port_modify); ib_unregister_mad_agent(sport->mad_agent); sport->mad_agent = NULL; } From 672de5274446fd41167953528da3426805652c1a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 12 May 2020 21:26:22 +0300 Subject: [PATCH 360/562] gpio: mb86s7x: Use devm_clk_get_optional() to get the input clock Simplify the code which fetches the input clock by using devm_clk_get_optional(). If no input clock is present devm_clk_get_optional() will return NULL instead of an error which matches the behavior of the old code. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200512182623.54990-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mb86s7x.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/gpio/gpio-mb86s7x.c b/drivers/gpio/gpio-mb86s7x.c index 501e89548f53..3be2e56f7c43 100644 --- a/drivers/gpio/gpio-mb86s7x.c +++ b/drivers/gpio/gpio-mb86s7x.c @@ -168,15 +168,13 @@ static int mb86s70_gpio_probe(struct platform_device *pdev) if (IS_ERR(gchip->base)) return PTR_ERR(gchip->base); - if (!has_acpi_companion(&pdev->dev)) { - gchip->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(gchip->clk)) - return PTR_ERR(gchip->clk); + gchip->clk = devm_clk_get_optional(&pdev->dev, NULL); + if (IS_ERR(gchip->clk)) + return PTR_ERR(gchip->clk); - ret = clk_prepare_enable(gchip->clk); - if (ret) - return ret; - } + ret = clk_prepare_enable(gchip->clk); + if (ret) + return ret; spin_lock_init(&gchip->lock); From db67aa33d58d1497c1e025c7dc34de95e83de529 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 12 May 2020 21:26:23 +0300 Subject: [PATCH 361/562] gpio: mb86s7x: Remove superfluous test for ACPI companion acpi_gpiochip_request_interrupts() will check for ACPI handle of the GPIO chip parent device and bail out if there is none defined. Thus, has_acpi_companion() is effectively repeating above and is not needed in the individual driver. Assigning ->to_irq() unconditionally doesn't change anything, except an error code, but this we fix as well by propagating it from platform_get_irq(). Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200512182623.54990-2-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mb86s7x.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/gpio/gpio-mb86s7x.c b/drivers/gpio/gpio-mb86s7x.c index 3be2e56f7c43..37c5363e391e 100644 --- a/drivers/gpio/gpio-mb86s7x.c +++ b/drivers/gpio/gpio-mb86s7x.c @@ -145,7 +145,9 @@ static int mb86s70_gpio_to_irq(struct gpio_chip *gc, unsigned int offset) for (index = 0;; index++) { irq = platform_get_irq(to_platform_device(gc->parent), index); - if (irq <= 0) + if (irq < 0) + return irq; + if (irq == 0) break; if (irq_get_irq_data(irq)->hwirq == offset) return irq; @@ -184,15 +186,13 @@ static int mb86s70_gpio_probe(struct platform_device *pdev) gchip->gc.free = mb86s70_gpio_free; gchip->gc.get = mb86s70_gpio_get; gchip->gc.set = mb86s70_gpio_set; + gchip->gc.to_irq = mb86s70_gpio_to_irq; gchip->gc.label = dev_name(&pdev->dev); gchip->gc.ngpio = 32; gchip->gc.owner = THIS_MODULE; gchip->gc.parent = &pdev->dev; gchip->gc.base = -1; - if (has_acpi_companion(&pdev->dev)) - gchip->gc.to_irq = mb86s70_gpio_to_irq; - ret = gpiochip_add_data(&gchip->gc, gchip); if (ret) { dev_err(&pdev->dev, "couldn't register gpio driver\n"); @@ -200,8 +200,7 @@ static int mb86s70_gpio_probe(struct platform_device *pdev) return ret; } - if (has_acpi_companion(&pdev->dev)) - acpi_gpiochip_request_interrupts(&gchip->gc); + acpi_gpiochip_request_interrupts(&gchip->gc); return 0; } @@ -210,8 +209,7 @@ static int mb86s70_gpio_remove(struct platform_device *pdev) { struct mb86s70_gpio_chip *gchip = platform_get_drvdata(pdev); - if (has_acpi_companion(&pdev->dev)) - acpi_gpiochip_free_interrupts(&gchip->gc); + acpi_gpiochip_free_interrupts(&gchip->gc); gpiochip_remove(&gchip->gc); clk_disable_unprepare(gchip->clk); From 4d3a050039a98bf12a8b9aa106395ce0c2dd9219 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 12 May 2020 21:27:19 +0300 Subject: [PATCH 362/562] gpio: xgene-sb: Drop redundant OF_GPIO dependency There is nothing in the driver requires OF_GPIO. Moreover, driver supports ACPI and OF_GPIO may be a quite overhead on such configurations. Drop dependency for good and replace of_gpio.h to of.h since we have one function to be defined from there. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200512182721.55127-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/Kconfig | 2 +- drivers/gpio/gpio-xgene-sb.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 8ef2179fb999..739f179e28b1 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -638,7 +638,7 @@ config GPIO_XGENE config GPIO_XGENE_SB tristate "APM X-Gene GPIO standby controller support" - depends on ARCH_XGENE && OF_GPIO + depends on ARCH_XGENE select GPIO_GENERIC select GPIOLIB_IRQCHIP select IRQ_DOMAIN_HIERARCHY diff --git a/drivers/gpio/gpio-xgene-sb.c b/drivers/gpio/gpio-xgene-sb.c index b45bfa9baa26..203a268dd09c 100644 --- a/drivers/gpio/gpio-xgene-sb.c +++ b/drivers/gpio/gpio-xgene-sb.c @@ -10,8 +10,8 @@ #include #include +#include #include -#include #include #include From b24bc583dea7e0cf2d5d68e3325e1689fa7c2275 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 12 May 2020 21:27:20 +0300 Subject: [PATCH 363/562] gpio: xgene-sb: Allow driver to be built with COMPILE_TEST Allow driver to be built with COMPILE_TEST for better test coverage. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200512182721.55127-2-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 739f179e28b1..8fc7893fba2b 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -638,7 +638,7 @@ config GPIO_XGENE config GPIO_XGENE_SB tristate "APM X-Gene GPIO standby controller support" - depends on ARCH_XGENE + depends on (ARCH_XGENE || COMPILE_TEST) select GPIO_GENERIC select GPIOLIB_IRQCHIP select IRQ_DOMAIN_HIERARCHY From d850c6f42cd8edcb019cfae5f00b1432d0d1a3c5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 12 May 2020 21:27:21 +0300 Subject: [PATCH 364/562] gpio: xgene-sb: Drop extra check to call acpi_gpiochip_request_interrupts() There is no need to have an additional check to call acpi_gpiochip_request_interrupts(). Even without any interrupts available the registered ACPI Event handlers can be useful for debugging purposes. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200512182721.55127-3-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-xgene-sb.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/gpio/gpio-xgene-sb.c b/drivers/gpio/gpio-xgene-sb.c index 203a268dd09c..a809609ee957 100644 --- a/drivers/gpio/gpio-xgene-sb.c +++ b/drivers/gpio/gpio-xgene-sb.c @@ -290,10 +290,8 @@ static int xgene_gpio_sb_probe(struct platform_device *pdev) dev_info(&pdev->dev, "X-Gene GPIO Standby driver registered\n"); - if (priv->nirq > 0) { - /* Register interrupt handlers for gpio signaled acpi events */ - acpi_gpiochip_request_interrupts(&priv->gc); - } + /* Register interrupt handlers for GPIO signaled ACPI Events */ + acpi_gpiochip_request_interrupts(&priv->gc); return ret; } @@ -302,9 +300,7 @@ static int xgene_gpio_sb_remove(struct platform_device *pdev) { struct xgene_gpio_sb *priv = platform_get_drvdata(pdev); - if (priv->nirq > 0) { - acpi_gpiochip_free_interrupts(&priv->gc); - } + acpi_gpiochip_free_interrupts(&priv->gc); irq_domain_remove(priv->irq_domain); From 22b3bc63da37dc3c24f877951ff7bf60737848cf Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 May 2020 16:52:52 +0200 Subject: [PATCH 365/562] i2c: i801: Use GPIO_LOOKUP() helper macro i801_add_mux() fills in the GPIO lookup table by manually populating an array of gpiod_lookup structures. Use the existing GPIO_LOOKUP() helper macro instead, to relax a dependency on the gpiod_lookup structure's member names. Signed-off-by: Geert Uytterhoeven Reviewed-by: Jean Delvare Link: https://lore.kernel.org/r/20200511145257.22970-2-geert+renesas@glider.be Signed-off-by: Linus Walleij --- drivers/i2c/busses/i2c-i801.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index a9c03f5c3482..4f333889489c 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1439,9 +1439,9 @@ static int i801_add_mux(struct i801_priv *priv) return -ENOMEM; lookup->dev_id = "i2c-mux-gpio"; for (i = 0; i < mux_config->n_gpios; i++) { - lookup->table[i].chip_label = mux_config->gpio_chip; - lookup->table[i].chip_hwnum = mux_config->gpios[i]; - lookup->table[i].con_id = "mux"; + lookup->table[i] = (struct gpiod_lookup) + GPIO_LOOKUP(mux_config->gpio_chip, + mux_config->gpios[i], "mux", 0); } gpiod_add_lookup_table(lookup); priv->lookup = lookup; From 7b67b836625d9b8350aaec7ecd8347b0336f3f92 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 May 2020 16:52:53 +0200 Subject: [PATCH 366/562] mfd: sm501: Use GPIO_LOOKUP_IDX() helper macro i801_add_mux() fills in the GPIO lookup table by manually populating an array of gpiod_lookup structures. Use the existing GPIO_LOOKUP_IDX() helper macro instead, to relax a dependency on the gpiod_lookup structure's member names. Signed-off-by: Geert Uytterhoeven Acked-by: Lee Jones Link: https://lore.kernel.org/r/20200511145257.22970-3-geert+renesas@glider.be Signed-off-by: Linus Walleij --- drivers/mfd/sm501.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c index e49787e6bb93..ccd62b963952 100644 --- a/drivers/mfd/sm501.c +++ b/drivers/mfd/sm501.c @@ -1145,22 +1145,14 @@ static int sm501_register_gpio_i2c_instance(struct sm501_devdata *sm, return -ENOMEM; lookup->dev_id = "i2c-gpio"; - if (iic->pin_sda < 32) - lookup->table[0].chip_label = "SM501-LOW"; - else - lookup->table[0].chip_label = "SM501-HIGH"; - lookup->table[0].chip_hwnum = iic->pin_sda % 32; - lookup->table[0].con_id = NULL; - lookup->table[0].idx = 0; - lookup->table[0].flags = GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN; - if (iic->pin_scl < 32) - lookup->table[1].chip_label = "SM501-LOW"; - else - lookup->table[1].chip_label = "SM501-HIGH"; - lookup->table[1].chip_hwnum = iic->pin_scl % 32; - lookup->table[1].con_id = NULL; - lookup->table[1].idx = 1; - lookup->table[1].flags = GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN; + lookup->table[0] = (struct gpiod_lookup) + GPIO_LOOKUP_IDX(iic->pin_sda < 32 ? "SM501-LOW" : "SM501-HIGH", + iic->pin_sda % 32, NULL, 0, + GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN); + lookup->table[1] = (struct gpiod_lookup) + GPIO_LOOKUP_IDX(iic->pin_scl < 32 ? "SM501-LOW" : "SM501-HIGH", + iic->pin_scl % 32, NULL, 1, + GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN); gpiod_add_lookup_table(lookup); icd = dev_get_platdata(&pdev->dev); From 4c033b549912bc301c5e2adfb7b6ca007c11bf31 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 May 2020 16:52:54 +0200 Subject: [PATCH 367/562] gpiolib: Add support for GPIO lookup by line name Currently a GPIO lookup table can only refer to a specific GPIO by a tuple, consisting of a GPIO controller label and a GPIO offset inside the controller. However, a GPIO may also carry a line name, defined by DT or ACPI. If present, the line name is the most use-centric way to refer to a GPIO. Hence add support for looking up GPIOs by line name. Note that there is no guarantee that GPIO line names are globally unique, so this will use the first match found. Implement this by reusing the existing gpiod_lookup infrastructure. Rename gpiod_lookup.chip_label to gpiod_lookup.key, to make it clear that this field can have two meanings, and update the kerneldoc and GPIO_LOOKUP*() macros. Signed-off-by: Geert Uytterhoeven Tested-by: Eugeniu Rosca Reviewed-by: Ulrich Hecht Reviewed-by: Eugeniu Rosca Link: https://lore.kernel.org/r/20200511145257.22970-4-geert+renesas@glider.be Signed-off-by: Linus Walleij --- Documentation/driver-api/gpio/board.rst | 15 ++++++++++----- drivers/gpio/gpiolib.c | 22 +++++++++++++++++----- include/linux/gpio/machine.h | 17 ++++++++++------- 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/Documentation/driver-api/gpio/board.rst b/Documentation/driver-api/gpio/board.rst index ce91518bf9f4..191fa867826a 100644 --- a/Documentation/driver-api/gpio/board.rst +++ b/Documentation/driver-api/gpio/board.rst @@ -113,13 +113,15 @@ files that desire to do so need to include the following header:: GPIOs are mapped by the means of tables of lookups, containing instances of the gpiod_lookup structure. Two macros are defined to help declaring such mappings:: - GPIO_LOOKUP(chip_label, chip_hwnum, con_id, flags) - GPIO_LOOKUP_IDX(chip_label, chip_hwnum, con_id, idx, flags) + GPIO_LOOKUP(key, chip_hwnum, con_id, flags) + GPIO_LOOKUP_IDX(key, chip_hwnum, con_id, idx, flags) where - - chip_label is the label of the gpiod_chip instance providing the GPIO - - chip_hwnum is the hardware number of the GPIO within the chip + - key is either the label of the gpiod_chip instance providing the GPIO, or + the GPIO line name + - chip_hwnum is the hardware number of the GPIO within the chip, or U16_MAX + to indicate that key is a GPIO line name - con_id is the name of the GPIO function from the device point of view. It can be NULL, in which case it will match any function. - idx is the index of the GPIO within the function. @@ -135,7 +137,10 @@ where In the future, these flags might be extended to support more properties. -Note that GPIO_LOOKUP() is just a shortcut to GPIO_LOOKUP_IDX() where idx = 0. +Note that: + 1. GPIO line names are not guaranteed to be globally unique, so the first + match found will be used. + 2. GPIO_LOOKUP() is just a shortcut to GPIO_LOOKUP_IDX() where idx = 0. A lookup table can then be defined as follows, with an empty entry defining its end. The 'dev_id' field of the table is the identifier of the device that will diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 40f2d7f69be2..3c1dcdfe8572 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -4618,7 +4618,7 @@ static struct gpio_desc *gpiod_find(struct device *dev, const char *con_id, if (!table) return desc; - for (p = &table->table[0]; p->chip_label; p++) { + for (p = &table->table[0]; p->key; p++) { struct gpio_chip *gc; /* idx must always match exactly */ @@ -4629,18 +4629,30 @@ static struct gpio_desc *gpiod_find(struct device *dev, const char *con_id, if (p->con_id && (!con_id || strcmp(p->con_id, con_id))) continue; - gc = find_chip_by_name(p->chip_label); + if (p->chip_hwnum == U16_MAX) { + desc = gpio_name_to_desc(p->key); + if (desc) { + *flags = p->flags; + return desc; + } + + dev_warn(dev, "cannot find GPIO line %s, deferring\n", + p->key); + return ERR_PTR(-EPROBE_DEFER); + } + + gc = find_chip_by_name(p->key); if (!gc) { /* * As the lookup table indicates a chip with - * p->chip_label should exist, assume it may + * p->key should exist, assume it may * still appear later and let the interested * consumer be probed again or let the Deferred * Probe infrastructure handle the error. */ dev_warn(dev, "cannot find GPIO chip %s, deferring\n", - p->chip_label); + p->key); return ERR_PTR(-EPROBE_DEFER); } @@ -4671,7 +4683,7 @@ static int platform_gpio_count(struct device *dev, const char *con_id) if (!table) return -ENOENT; - for (p = &table->table[0]; p->chip_label; p++) { + for (p = &table->table[0]; p->key; p++) { if ((con_id && p->con_id && !strcmp(con_id, p->con_id)) || (!con_id && !p->con_id)) count++; diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h index 1ebe5be05d5f..781a053abbb9 100644 --- a/include/linux/gpio/machine.h +++ b/include/linux/gpio/machine.h @@ -20,8 +20,11 @@ enum gpio_lookup_flags { /** * struct gpiod_lookup - lookup table - * @chip_label: name of the chip the GPIO belongs to - * @chip_hwnum: hardware number (i.e. relative to the chip) of the GPIO + * @key: either the name of the chip the GPIO belongs to, or the GPIO line name + * Note that GPIO line names are not guaranteed to be globally unique, + * so this will use the first match found! + * @chip_hwnum: hardware number (i.e. relative to the chip) of the GPIO, or + * U16_MAX to indicate that @key is a GPIO line name * @con_id: name of the GPIO from the device's point of view * @idx: index of the GPIO in case several GPIOs share the same name * @flags: bitmask of gpio_lookup_flags GPIO_* values @@ -30,7 +33,7 @@ enum gpio_lookup_flags { * functions using platform data. */ struct gpiod_lookup { - const char *chip_label; + const char *key; u16 chip_hwnum; const char *con_id; unsigned int idx; @@ -63,17 +66,17 @@ struct gpiod_hog { /* * Simple definition of a single GPIO under a con_id */ -#define GPIO_LOOKUP(_chip_label, _chip_hwnum, _con_id, _flags) \ - GPIO_LOOKUP_IDX(_chip_label, _chip_hwnum, _con_id, 0, _flags) +#define GPIO_LOOKUP(_key, _chip_hwnum, _con_id, _flags) \ + GPIO_LOOKUP_IDX(_key, _chip_hwnum, _con_id, 0, _flags) /* * Use this macro if you need to have several GPIOs under the same con_id. * Each GPIO needs to use a different index and can be accessed using * gpiod_get_index() */ -#define GPIO_LOOKUP_IDX(_chip_label, _chip_hwnum, _con_id, _idx, _flags) \ +#define GPIO_LOOKUP_IDX(_key, _chip_hwnum, _con_id, _idx, _flags) \ { \ - .chip_label = _chip_label, \ + .key = _key, \ .chip_hwnum = _chip_hwnum, \ .con_id = _con_id, \ .idx = _idx, \ From 828546e24280f721350a7a0dcc92416e917b4382 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 May 2020 16:52:55 +0200 Subject: [PATCH 368/562] gpio: Add GPIO Aggregator GPIO controllers are exported to userspace using /dev/gpiochip* character devices. Access control to these devices is provided by standard UNIX file system permissions, on an all-or-nothing basis: either a GPIO controller is accessible for a user, or it is not. Currently no mechanism exists to control access to individual GPIOs. Hence add a GPIO driver to aggregate existing GPIOs, and expose them as a new gpiochip. This supports the following use cases: - Aggregating GPIOs using Sysfs This is useful for implementing access control, and assigning a set of GPIOs to a specific user or virtual machine. - Generic GPIO Driver This is useful for industrial control, where it can provide userspace access to a simple GPIO-operated device described in DT, cfr. e.g. spidev for SPI-operated devices. Signed-off-by: Geert Uytterhoeven Tested-by: Eugeniu Rosca Reviewed-by: Eugeniu Rosca Link: https://lore.kernel.org/r/20200511145257.22970-5-geert+renesas@glider.be Signed-off-by: Linus Walleij --- drivers/gpio/Kconfig | 12 + drivers/gpio/Makefile | 1 + drivers/gpio/gpio-aggregator.c | 568 +++++++++++++++++++++++++++++++++ 3 files changed, 581 insertions(+) create mode 100644 drivers/gpio/gpio-aggregator.c diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 1b96169d84f7..cf45dfa079ee 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -1541,6 +1541,18 @@ config GPIO_VIPERBOARD endmenu +config GPIO_AGGREGATOR + tristate "GPIO Aggregator" + help + Say yes here to enable the GPIO Aggregator, which provides a way to + aggregate existing GPIO lines into a new virtual GPIO chip. + This can serve the following purposes: + - Assign permissions for a collection of GPIO lines to a user, + - Export a collection of GPIO lines to a virtual machine, + - Provide a generic driver for a GPIO-operated device in an + industrial control context, to be operated from userspace using + the GPIO chardev interface. + config GPIO_MOCKUP tristate "GPIO Testing Driver" select IRQ_SIM diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index b2cfc21a97f3..65bf3940e33c 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_GPIO_74XX_MMIO) += gpio-74xx-mmio.o obj-$(CONFIG_GPIO_ADNP) += gpio-adnp.o obj-$(CONFIG_GPIO_ADP5520) += gpio-adp5520.o obj-$(CONFIG_GPIO_ADP5588) += gpio-adp5588.o +obj-$(CONFIG_GPIO_AGGREGATOR) += gpio-aggregator.o obj-$(CONFIG_GPIO_ALTERA_A10SR) += gpio-altera-a10sr.o obj-$(CONFIG_GPIO_ALTERA) += gpio-altera.o obj-$(CONFIG_GPIO_AMD8111) += gpio-amd8111.o diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c new file mode 100644 index 000000000000..9b0adbdddbfc --- /dev/null +++ b/drivers/gpio/gpio-aggregator.c @@ -0,0 +1,568 @@ +// SPDX-License-Identifier: GPL-2.0-only +// +// GPIO Aggregator +// +// Copyright (C) 2019-2020 Glider bv + +#define DRV_NAME "gpio-aggregator" +#define pr_fmt(fmt) DRV_NAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * GPIO Aggregator sysfs interface + */ + +struct gpio_aggregator { + struct gpiod_lookup_table *lookups; + struct platform_device *pdev; + char args[]; +}; + +static DEFINE_MUTEX(gpio_aggregator_lock); /* protects idr */ +static DEFINE_IDR(gpio_aggregator_idr); + +static char *get_arg(char **args) +{ + char *start = *args, *end; + + start = skip_spaces(start); + if (!*start) + return NULL; + + if (*start == '"') { + /* Quoted arg */ + end = strchr(++start, '"'); + if (!end) + return ERR_PTR(-EINVAL); + } else { + /* Unquoted arg */ + for (end = start; *end && !isspace(*end); end++) ; + } + + if (*end) + *end++ = '\0'; + + *args = end; + return start; +} + +static bool isrange(const char *s) +{ + size_t n; + + if (IS_ERR_OR_NULL(s)) + return false; + + while (1) { + n = strspn(s, "0123456789"); + if (!n) + return false; + + s += n; + + switch (*s++) { + case '\0': + return true; + + case '-': + case ',': + break; + + default: + return false; + } + } +} + +static int aggr_add_gpio(struct gpio_aggregator *aggr, const char *key, + int hwnum, unsigned int *n) +{ + struct gpiod_lookup_table *lookups; + + lookups = krealloc(aggr->lookups, struct_size(lookups, table, *n + 2), + GFP_KERNEL); + if (!lookups) + return -ENOMEM; + + lookups->table[*n] = + (struct gpiod_lookup)GPIO_LOOKUP_IDX(key, hwnum, NULL, *n, 0); + + (*n)++; + memset(&lookups->table[*n], 0, sizeof(lookups->table[*n])); + + aggr->lookups = lookups; + return 0; +} + +static int aggr_parse(struct gpio_aggregator *aggr) +{ + unsigned int first_index, last_index, i, n = 0; + char *name, *offsets, *first, *last, *next; + char *args = aggr->args; + int error; + + for (name = get_arg(&args), offsets = get_arg(&args); name; + offsets = get_arg(&args)) { + if (IS_ERR(name)) { + pr_err("Cannot get GPIO specifier: %pe\n", name); + return PTR_ERR(name); + } + + if (!isrange(offsets)) { + /* Named GPIO line */ + error = aggr_add_gpio(aggr, name, U16_MAX, &n); + if (error) + return error; + + name = offsets; + continue; + } + + /* GPIO chip + offset(s) */ + for (first = offsets; *first; first = next) { + next = strchrnul(first, ','); + if (*next) + *next++ = '\0'; + + last = strchr(first, '-'); + if (last) + *last++ = '\0'; + + if (kstrtouint(first, 10, &first_index)) { + pr_err("Cannot parse GPIO index %s\n", first); + return -EINVAL; + } + + if (!last) { + last_index = first_index; + } else if (kstrtouint(last, 10, &last_index)) { + pr_err("Cannot parse GPIO index %s\n", last); + return -EINVAL; + } + + for (i = first_index; i <= last_index; i++) { + error = aggr_add_gpio(aggr, name, i, &n); + if (error) + return error; + } + } + + name = get_arg(&args); + } + + if (!n) { + pr_err("No GPIOs specified\n"); + return -EINVAL; + } + + return 0; +} + +static ssize_t new_device_store(struct device_driver *driver, const char *buf, + size_t count) +{ + struct gpio_aggregator *aggr; + struct platform_device *pdev; + int res, id; + + /* kernfs guarantees string termination, so count + 1 is safe */ + aggr = kzalloc(sizeof(*aggr) + count + 1, GFP_KERNEL); + if (!aggr) + return -ENOMEM; + + memcpy(aggr->args, buf, count + 1); + + aggr->lookups = kzalloc(struct_size(aggr->lookups, table, 1), + GFP_KERNEL); + if (!aggr->lookups) { + res = -ENOMEM; + goto free_ga; + } + + mutex_lock(&gpio_aggregator_lock); + id = idr_alloc(&gpio_aggregator_idr, aggr, 0, 0, GFP_KERNEL); + mutex_unlock(&gpio_aggregator_lock); + + if (id < 0) { + res = id; + goto free_table; + } + + aggr->lookups->dev_id = kasprintf(GFP_KERNEL, "%s.%d", DRV_NAME, id); + if (!aggr->lookups->dev_id) { + res = -ENOMEM; + goto remove_idr; + } + + res = aggr_parse(aggr); + if (res) + goto free_dev_id; + + gpiod_add_lookup_table(aggr->lookups); + + pdev = platform_device_register_simple(DRV_NAME, id, NULL, 0); + if (IS_ERR(pdev)) { + res = PTR_ERR(pdev); + goto remove_table; + } + + aggr->pdev = pdev; + return count; + +remove_table: + gpiod_remove_lookup_table(aggr->lookups); +free_dev_id: + kfree(aggr->lookups->dev_id); +remove_idr: + mutex_lock(&gpio_aggregator_lock); + idr_remove(&gpio_aggregator_idr, id); + mutex_unlock(&gpio_aggregator_lock); +free_table: + kfree(aggr->lookups); +free_ga: + kfree(aggr); + return res; +} + +static DRIVER_ATTR_WO(new_device); + +static void gpio_aggregator_free(struct gpio_aggregator *aggr) +{ + platform_device_unregister(aggr->pdev); + gpiod_remove_lookup_table(aggr->lookups); + kfree(aggr->lookups->dev_id); + kfree(aggr->lookups); + kfree(aggr); +} + +static ssize_t delete_device_store(struct device_driver *driver, + const char *buf, size_t count) +{ + struct gpio_aggregator *aggr; + unsigned int id; + int error; + + if (!str_has_prefix(buf, DRV_NAME ".")) + return -EINVAL; + + error = kstrtouint(buf + strlen(DRV_NAME "."), 10, &id); + if (error) + return error; + + mutex_lock(&gpio_aggregator_lock); + aggr = idr_remove(&gpio_aggregator_idr, id); + mutex_unlock(&gpio_aggregator_lock); + if (!aggr) + return -ENOENT; + + gpio_aggregator_free(aggr); + return count; +} +static DRIVER_ATTR_WO(delete_device); + +static struct attribute *gpio_aggregator_attrs[] = { + &driver_attr_new_device.attr, + &driver_attr_delete_device.attr, + NULL, +}; +ATTRIBUTE_GROUPS(gpio_aggregator); + +static int __exit gpio_aggregator_idr_remove(int id, void *p, void *data) +{ + gpio_aggregator_free(p); + return 0; +} + +static void __exit gpio_aggregator_remove_all(void) +{ + mutex_lock(&gpio_aggregator_lock); + idr_for_each(&gpio_aggregator_idr, gpio_aggregator_idr_remove, NULL); + idr_destroy(&gpio_aggregator_idr); + mutex_unlock(&gpio_aggregator_lock); +} + + +/* + * GPIO Forwarder + */ + +struct gpiochip_fwd { + struct gpio_chip chip; + struct gpio_desc **descs; + union { + struct mutex mlock; /* protects tmp[] if can_sleep */ + spinlock_t slock; /* protects tmp[] if !can_sleep */ + }; + unsigned long tmp[]; /* values and descs for multiple ops */ +}; + +static int gpio_fwd_get_direction(struct gpio_chip *chip, unsigned int offset) +{ + struct gpiochip_fwd *fwd = gpiochip_get_data(chip); + + return gpiod_get_direction(fwd->descs[offset]); +} + +static int gpio_fwd_direction_input(struct gpio_chip *chip, unsigned int offset) +{ + struct gpiochip_fwd *fwd = gpiochip_get_data(chip); + + return gpiod_direction_input(fwd->descs[offset]); +} + +static int gpio_fwd_direction_output(struct gpio_chip *chip, + unsigned int offset, int value) +{ + struct gpiochip_fwd *fwd = gpiochip_get_data(chip); + + return gpiod_direction_output(fwd->descs[offset], value); +} + +static int gpio_fwd_get(struct gpio_chip *chip, unsigned int offset) +{ + struct gpiochip_fwd *fwd = gpiochip_get_data(chip); + + return gpiod_get_value(fwd->descs[offset]); +} + +static int gpio_fwd_get_multiple(struct gpio_chip *chip, unsigned long *mask, + unsigned long *bits) +{ + struct gpiochip_fwd *fwd = gpiochip_get_data(chip); + unsigned long *values, flags = 0; + struct gpio_desc **descs; + unsigned int i, j = 0; + int error; + + if (chip->can_sleep) + mutex_lock(&fwd->mlock); + else + spin_lock_irqsave(&fwd->slock, flags); + + /* Both values bitmap and desc pointers are stored in tmp[] */ + values = &fwd->tmp[0]; + descs = (void *)&fwd->tmp[BITS_TO_LONGS(fwd->chip.ngpio)]; + + bitmap_clear(values, 0, fwd->chip.ngpio); + for_each_set_bit(i, mask, fwd->chip.ngpio) + descs[j++] = fwd->descs[i]; + + error = gpiod_get_array_value(j, descs, NULL, values); + if (!error) { + j = 0; + for_each_set_bit(i, mask, fwd->chip.ngpio) + __assign_bit(i, bits, test_bit(j++, values)); + } + + if (chip->can_sleep) + mutex_unlock(&fwd->mlock); + else + spin_unlock_irqrestore(&fwd->slock, flags); + + return error; +} + +static void gpio_fwd_set(struct gpio_chip *chip, unsigned int offset, int value) +{ + struct gpiochip_fwd *fwd = gpiochip_get_data(chip); + + gpiod_set_value(fwd->descs[offset], value); +} + +static void gpio_fwd_set_multiple(struct gpio_chip *chip, unsigned long *mask, + unsigned long *bits) +{ + struct gpiochip_fwd *fwd = gpiochip_get_data(chip); + unsigned long *values, flags = 0; + struct gpio_desc **descs; + unsigned int i, j = 0; + + if (chip->can_sleep) + mutex_lock(&fwd->mlock); + else + spin_lock_irqsave(&fwd->slock, flags); + + /* Both values bitmap and desc pointers are stored in tmp[] */ + values = &fwd->tmp[0]; + descs = (void *)&fwd->tmp[BITS_TO_LONGS(fwd->chip.ngpio)]; + + for_each_set_bit(i, mask, fwd->chip.ngpio) { + __assign_bit(j, values, test_bit(i, bits)); + descs[j++] = fwd->descs[i]; + } + + gpiod_set_array_value(j, descs, NULL, values); + + if (chip->can_sleep) + mutex_unlock(&fwd->mlock); + else + spin_unlock_irqrestore(&fwd->slock, flags); +} + +static int gpio_fwd_set_config(struct gpio_chip *chip, unsigned int offset, + unsigned long config) +{ + struct gpiochip_fwd *fwd = gpiochip_get_data(chip); + + return gpiod_set_config(fwd->descs[offset], config); +} + +/** + * gpiochip_fwd_create() - Create a new GPIO forwarder + * @dev: Parent device pointer + * @ngpios: Number of GPIOs in the forwarder. + * @descs: Array containing the GPIO descriptors to forward to. + * This array must contain @ngpios entries, and must not be deallocated + * before the forwarder has been destroyed again. + * + * This function creates a new gpiochip, which forwards all GPIO operations to + * the passed GPIO descriptors. + * + * Return: An opaque object pointer, or an ERR_PTR()-encoded negative error + * code on failure. + */ +static struct gpiochip_fwd *gpiochip_fwd_create(struct device *dev, + unsigned int ngpios, + struct gpio_desc *descs[]) +{ + const char *label = dev_name(dev); + struct gpiochip_fwd *fwd; + struct gpio_chip *chip; + unsigned int i; + int error; + + fwd = devm_kzalloc(dev, struct_size(fwd, tmp, + BITS_TO_LONGS(ngpios) + ngpios), GFP_KERNEL); + if (!fwd) + return ERR_PTR(-ENOMEM); + + chip = &fwd->chip; + + /* + * If any of the GPIO lines are sleeping, then the entire forwarder + * will be sleeping. + * If any of the chips support .set_config(), then the forwarder will + * support setting configs. + */ + for (i = 0; i < ngpios; i++) { + struct gpio_chip *parent = gpiod_to_chip(descs[i]); + + dev_dbg(dev, "%u => gpio-%d\n", i, desc_to_gpio(descs[i])); + + if (gpiod_cansleep(descs[i])) + chip->can_sleep = true; + if (parent && parent->set_config) + chip->set_config = gpio_fwd_set_config; + } + + chip->label = label; + chip->parent = dev; + chip->owner = THIS_MODULE; + chip->get_direction = gpio_fwd_get_direction; + chip->direction_input = gpio_fwd_direction_input; + chip->direction_output = gpio_fwd_direction_output; + chip->get = gpio_fwd_get; + chip->get_multiple = gpio_fwd_get_multiple; + chip->set = gpio_fwd_set; + chip->set_multiple = gpio_fwd_set_multiple; + chip->base = -1; + chip->ngpio = ngpios; + fwd->descs = descs; + + if (chip->can_sleep) + mutex_init(&fwd->mlock); + else + spin_lock_init(&fwd->slock); + + error = devm_gpiochip_add_data(dev, chip, fwd); + if (error) + return ERR_PTR(error); + + return fwd; +} + + +/* + * GPIO Aggregator platform device + */ + +static int gpio_aggregator_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct gpio_desc **descs; + struct gpiochip_fwd *fwd; + int i, n; + + n = gpiod_count(dev, NULL); + if (n < 0) + return n; + + descs = devm_kmalloc_array(dev, n, sizeof(*descs), GFP_KERNEL); + if (!descs) + return -ENOMEM; + + for (i = 0; i < n; i++) { + descs[i] = devm_gpiod_get_index(dev, NULL, i, GPIOD_ASIS); + if (IS_ERR(descs[i])) + return PTR_ERR(descs[i]); + } + + fwd = gpiochip_fwd_create(dev, n, descs); + if (IS_ERR(fwd)) + return PTR_ERR(fwd); + + platform_set_drvdata(pdev, fwd); + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id gpio_aggregator_dt_ids[] = { + /* + * Add GPIO-operated devices controlled from userspace below, + * or use "driver_override" in sysfs + */ + {}, +}; +MODULE_DEVICE_TABLE(of, gpio_aggregator_dt_ids); +#endif + +static struct platform_driver gpio_aggregator_driver = { + .probe = gpio_aggregator_probe, + .driver = { + .name = DRV_NAME, + .groups = gpio_aggregator_groups, + .of_match_table = of_match_ptr(gpio_aggregator_dt_ids), + }, +}; + +static int __init gpio_aggregator_init(void) +{ + return platform_driver_register(&gpio_aggregator_driver); +} +module_init(gpio_aggregator_init); + +static void __exit gpio_aggregator_exit(void) +{ + gpio_aggregator_remove_all(); + platform_driver_unregister(&gpio_aggregator_driver); +} +module_exit(gpio_aggregator_exit); + +MODULE_AUTHOR("Geert Uytterhoeven "); +MODULE_DESCRIPTION("GPIO Aggregator"); +MODULE_LICENSE("GPL v2"); From ce7a2f77f9766378b51422f8f45fd06cdc44ef0b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 May 2020 16:52:56 +0200 Subject: [PATCH 369/562] docs: gpio: Add GPIO Aggregator documentation Document the GPIO Aggregator, and the two typical use-cases. Signed-off-by: Geert Uytterhoeven Tested-by: Eugeniu Rosca Reviewed-by: Ulrich Hecht Reviewed-by: Eugeniu Rosca Link: https://lore.kernel.org/r/20200511145257.22970-6-geert+renesas@glider.be Signed-off-by: Linus Walleij --- .../admin-guide/gpio/gpio-aggregator.rst | 111 ++++++++++++++++++ Documentation/admin-guide/gpio/index.rst | 1 + 2 files changed, 112 insertions(+) create mode 100644 Documentation/admin-guide/gpio/gpio-aggregator.rst diff --git a/Documentation/admin-guide/gpio/gpio-aggregator.rst b/Documentation/admin-guide/gpio/gpio-aggregator.rst new file mode 100644 index 000000000000..5cd1e7221756 --- /dev/null +++ b/Documentation/admin-guide/gpio/gpio-aggregator.rst @@ -0,0 +1,111 @@ +.. SPDX-License-Identifier: GPL-2.0-only + +GPIO Aggregator +=============== + +The GPIO Aggregator provides a mechanism to aggregate GPIOs, and expose them as +a new gpio_chip. This supports the following use cases. + + +Aggregating GPIOs using Sysfs +----------------------------- + +GPIO controllers are exported to userspace using /dev/gpiochip* character +devices. Access control to these devices is provided by standard UNIX file +system permissions, on an all-or-nothing basis: either a GPIO controller is +accessible for a user, or it is not. + +The GPIO Aggregator provides access control for a set of one or more GPIOs, by +aggregating them into a new gpio_chip, which can be assigned to a group or user +using standard UNIX file ownership and permissions. Furthermore, this +simplifies and hardens exporting GPIOs to a virtual machine, as the VM can just +grab the full GPIO controller, and no longer needs to care about which GPIOs to +grab and which not, reducing the attack surface. + +Aggregated GPIO controllers are instantiated and destroyed by writing to +write-only attribute files in sysfs. + + /sys/bus/platform/drivers/gpio-aggregator/ + + "new_device" ... + Userspace may ask the kernel to instantiate an aggregated GPIO + controller by writing a string describing the GPIOs to + aggregate to the "new_device" file, using the format + + .. code-block:: none + + [] [ ] ... + + Where: + + "" ... + is a GPIO line name, + + "" ... + is a GPIO chip label, and + + "" ... + is a comma-separated list of GPIO offsets and/or + GPIO offset ranges denoted by dashes. + + Example: Instantiate a new GPIO aggregator by aggregating GPIO + line 19 of "e6052000.gpio" and GPIO lines 20-21 of + "e6050000.gpio" into a new gpio_chip: + + .. code-block:: sh + + $ echo 'e6052000.gpio 19 e6050000.gpio 20-21' > new_device + + "delete_device" ... + Userspace may ask the kernel to destroy an aggregated GPIO + controller after use by writing its device name to the + "delete_device" file. + + Example: Destroy the previously-created aggregated GPIO + controller, assumed to be "gpio-aggregator.0": + + .. code-block:: sh + + $ echo gpio-aggregator.0 > delete_device + + +Generic GPIO Driver +------------------- + +The GPIO Aggregator can also be used as a generic driver for a simple +GPIO-operated device described in DT, without a dedicated in-kernel driver. +This is useful in industrial control, and is not unlike e.g. spidev, which +allows the user to communicate with an SPI device from userspace. + +Binding a device to the GPIO Aggregator is performed either by modifying the +gpio-aggregator driver, or by writing to the "driver_override" file in Sysfs. + +Example: If "door" is a GPIO-operated device described in DT, using its own +compatible value:: + + door { + compatible = "myvendor,mydoor"; + + gpios = <&gpio2 19 GPIO_ACTIVE_HIGH>, + <&gpio2 20 GPIO_ACTIVE_LOW>; + gpio-line-names = "open", "lock"; + }; + +it can be bound to the GPIO Aggregator by either: + +1. Adding its compatible value to ``gpio_aggregator_dt_ids[]``, +2. Binding manually using "driver_override": + +.. code-block:: sh + + $ echo gpio-aggregator > /sys/bus/platform/devices/door/driver_override + $ echo door > /sys/bus/platform/drivers/gpio-aggregator/bind + +After that, a new gpiochip "door" has been created: + +.. code-block:: sh + + $ gpioinfo door + gpiochip12 - 2 lines: + line 0: "open" unused input active-high + line 1: "lock" unused input active-high diff --git a/Documentation/admin-guide/gpio/index.rst b/Documentation/admin-guide/gpio/index.rst index a244ba4e87d5..ef2838638e96 100644 --- a/Documentation/admin-guide/gpio/index.rst +++ b/Documentation/admin-guide/gpio/index.rst @@ -7,6 +7,7 @@ gpio .. toctree:: :maxdepth: 1 + gpio-aggregator sysfs .. only:: subproject and html From d9646a4866b008538dca327095c1b5e5f0204d4f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 May 2020 16:52:57 +0200 Subject: [PATCH 370/562] MAINTAINERS: Add GPIO Aggregator section Add a maintainership section for the GPIO Aggregator, covering documentation and driver source code. Signed-off-by: Geert Uytterhoeven Tested-by: Eugeniu Rosca Reviewed-by: Eugeniu Rosca Link: https://lore.kernel.org/r/20200511145257.22970-7-geert+renesas@glider.be Signed-off-by: Linus Walleij --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index e64e5db31497..faf0298307a2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7228,6 +7228,13 @@ F: Documentation/firmware-guide/acpi/gpio-properties.rst F: drivers/gpio/gpiolib-acpi.c F: drivers/gpio/gpiolib-acpi.h +GPIO AGGREGATOR +M: Geert Uytterhoeven +L: linux-gpio@vger.kernel.org +S: Supported +F: Documentation/admin-guide/gpio/gpio-aggregator.rst +F: drivers/gpio/gpio-aggregator.c + GPIO IR Transmitter M: Sean Young L: linux-media@vger.kernel.org From 0b671eed0cf069839c4e73b66d88ec483ee6c3f5 Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Wed, 8 Apr 2020 19:59:58 +0800 Subject: [PATCH 371/562] ipmi:bt-bmc: Avoid unnecessary check bt_bmc_probe() is only called with an openfirmware platform device. Therefore there is no need to check that the passed in device is NULL or that it has an openfirmware node. Signed-off-by: Tang Bin Message-Id: <20200408115958.2848-1-tangbin@cmss.chinamobile.com> [Fixed the title up a bit.] Signed-off-by: Corey Minyard --- drivers/char/ipmi/bt-bmc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c index d36aeacb290e..890ad55aae79 100644 --- a/drivers/char/ipmi/bt-bmc.c +++ b/drivers/char/ipmi/bt-bmc.c @@ -430,9 +430,6 @@ static int bt_bmc_probe(struct platform_device *pdev) struct device *dev; int rc; - if (!pdev || !pdev->dev.of_node) - return -ENODEV; - dev = &pdev->dev; dev_info(dev, "Found bt bmc device\n"); From 8ed678dbac8c8c5685893f3af1c47f167b61c5ec Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Tue, 14 Apr 2020 22:18:14 +0800 Subject: [PATCH 372/562] ipmi:bt-bmc: Fix some format issue of the code Fix some format issue of the code in bt-bmc.c Signed-off-by: Tang Bin Signed-off-by: Shengju Zhang Message-Id: <20200414141814.19048-1-tangbin@cmss.chinamobile.com> Signed-off-by: Corey Minyard --- drivers/char/ipmi/bt-bmc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c index 890ad55aae79..cd0349bfffe9 100644 --- a/drivers/char/ipmi/bt-bmc.c +++ b/drivers/char/ipmi/bt-bmc.c @@ -463,9 +463,9 @@ static int bt_bmc_probe(struct platform_device *pdev) init_waitqueue_head(&bt_bmc->queue); bt_bmc->miscdev.minor = MISC_DYNAMIC_MINOR, - bt_bmc->miscdev.name = DEVICE_NAME, - bt_bmc->miscdev.fops = &bt_bmc_fops, - bt_bmc->miscdev.parent = dev; + bt_bmc->miscdev.name = DEVICE_NAME, + bt_bmc->miscdev.fops = &bt_bmc_fops, + bt_bmc->miscdev.parent = dev; rc = misc_register(&bt_bmc->miscdev); if (rc) { dev_err(dev, "Unable to register misc device\n"); From 7c47a219b95d0e06b5ef5fcc7bad807895015eac Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Fri, 17 Apr 2020 12:48:28 +0800 Subject: [PATCH 373/562] ipmi: use vzalloc instead of kmalloc for user creation We met mulitple times of failure of staring bmc-watchdog, due to the runtime memory allocation failure of order 4. bmc-watchdog: page allocation failure: order:4, mode:0x40cc0(GFP_KERNEL|__GFP_COMP), nodemask=(null),cpuset=/,mems_allowed=0-1 CPU: 1 PID: 2571 Comm: bmc-watchdog Not tainted 5.5.0-00045-g7d6bb61d6188c #1 Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.00.01.0015.110720180833 11/07/2018 Call Trace: dump_stack+0x66/0x8b warn_alloc+0xfe/0x160 __alloc_pages_slowpath+0xd3e/0xd80 __alloc_pages_nodemask+0x2f0/0x340 kmalloc_order+0x18/0x70 kmalloc_order_trace+0x1d/0xb0 ipmi_create_user+0x55/0x2c0 [ipmi_msghandler] ipmi_open+0x72/0x110 [ipmi_devintf] chrdev_open+0xcb/0x1e0 do_dentry_open+0x1ce/0x380 path_openat+0x305/0x14f0 do_filp_open+0x9b/0x110 do_sys_open+0x1bd/0x250 do_syscall_64+0x5b/0x1f0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Using vzalloc/vfree for creating ipmi_user heals the problem Thanks to Stephen Rothwell for finding the vmalloc.h inclusion issue. Signed-off-by: Feng Tang Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index c48d8f086382..9afd220cd824 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -33,6 +33,7 @@ #include #include #include +#include #define IPMI_DRIVER_VERSION "39.2" @@ -1153,7 +1154,7 @@ static void free_user_work(struct work_struct *work) remove_work); cleanup_srcu_struct(&user->release_barrier); - kfree(user); + vfree(user); } int ipmi_create_user(unsigned int if_num, @@ -1185,7 +1186,7 @@ int ipmi_create_user(unsigned int if_num, if (rv) return rv; - new_user = kmalloc(sizeof(*new_user), GFP_KERNEL); + new_user = vzalloc(sizeof(*new_user)); if (!new_user) return -ENOMEM; @@ -1232,7 +1233,7 @@ int ipmi_create_user(unsigned int if_num, out_kfree: srcu_read_unlock(&ipmi_interfaces_srcu, index); - kfree(new_user); + vfree(new_user); return rv; } EXPORT_SYMBOL(ipmi_create_user); From 878caa96596963ba2d73393572b02624cd23e4ff Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 22 Apr 2020 16:03:48 +0300 Subject: [PATCH 374/562] ipmi: Replace guid_copy() with import_guid() where it makes sense There is a specific API to treat raw data as GUID, i.e. import_guid(). Use it instead of guid_copy() with explicit casting. Signed-off-by: Andy Shevchenko Message-Id: <20200422130348.38749-1-andriy.shevchenko@linux.intel.com> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 9afd220cd824..e1b22fe0916c 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -3172,7 +3172,7 @@ static void guid_handler(struct ipmi_smi *intf, struct ipmi_recv_msg *msg) goto out; } - guid_copy(&bmc->fetch_guid, (guid_t *)(msg->msg.data + 1)); + import_guid(&bmc->fetch_guid, msg->msg.data + 1); /* * Make sure the guid data is available before setting * dyn_guid_set. From 49826937e7c7917140515aaf10c17bedcc4acaad Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Tue, 5 May 2020 18:29:06 +0800 Subject: [PATCH 375/562] ipmi:bt-bmc: Fix error handling and status check If the function platform_get_irq() failed, the negative value returned will not be detected here. So fix error handling in bt_bmc_config_irq(). And in the function bt_bmc_probe(), when get irq failed, it will print error message. So use platform_get_irq_optional() to simplify code. Finally in the function bt_bmc_remove() should make the right status check if get irq failed. Signed-off-by: Shengju Zhang Signed-off-by: Tang Bin Message-Id: <20200505102906.17196-1-tangbin@cmss.chinamobile.com> [Also set bt_bmc->irq to a negative value if devm_request_irq() fails.] Signed-off-by: Corey Minyard --- drivers/char/ipmi/bt-bmc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c index cd0349bfffe9..a395e2e70dc5 100644 --- a/drivers/char/ipmi/bt-bmc.c +++ b/drivers/char/ipmi/bt-bmc.c @@ -399,15 +399,15 @@ static int bt_bmc_config_irq(struct bt_bmc *bt_bmc, struct device *dev = &pdev->dev; int rc; - bt_bmc->irq = platform_get_irq(pdev, 0); - if (!bt_bmc->irq) - return -ENODEV; + bt_bmc->irq = platform_get_irq_optional(pdev, 0); + if (bt_bmc->irq < 0) + return bt_bmc->irq; rc = devm_request_irq(dev, bt_bmc->irq, bt_bmc_irq, IRQF_SHARED, DEVICE_NAME, bt_bmc); if (rc < 0) { dev_warn(dev, "Unable to request IRQ %d\n", bt_bmc->irq); - bt_bmc->irq = 0; + bt_bmc->irq = rc; return rc; } @@ -474,7 +474,7 @@ static int bt_bmc_probe(struct platform_device *pdev) bt_bmc_config_irq(bt_bmc, pdev); - if (bt_bmc->irq) { + if (bt_bmc->irq >= 0) { dev_info(dev, "Using IRQ %d\n", bt_bmc->irq); } else { dev_info(dev, "No IRQ; using timer\n"); @@ -500,7 +500,7 @@ static int bt_bmc_remove(struct platform_device *pdev) struct bt_bmc *bt_bmc = dev_get_drvdata(&pdev->dev); misc_deregister(&bt_bmc->miscdev); - if (!bt_bmc->irq) + if (bt_bmc->irq < 0) del_timer_sync(&bt_bmc->poll_timer); return 0; } From 429b00f606659cbaee41da60be6a6f8965a4f6f8 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Wed, 11 Mar 2020 15:24:09 -0400 Subject: [PATCH 376/562] ipmi_si: Load acpi_ipmi when ACPI IPMI interface added Try to load acpi_ipmi when an ACPI IPMI interface is added, so that the ACPI IPMI OpRegion is accessible. Signed-off-by: Stuart Hayes Message-Id: <20200311192409.59923-1-stuart.w.hayes@gmail.com> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_platform.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/char/ipmi/ipmi_si_platform.c b/drivers/char/ipmi/ipmi_si_platform.c index 638c693e17ad..129b5713f187 100644 --- a/drivers/char/ipmi/ipmi_si_platform.c +++ b/drivers/char/ipmi/ipmi_si_platform.c @@ -393,6 +393,8 @@ static int acpi_ipmi_probe(struct platform_device *pdev) dev_info(io.dev, "%pR regsize %d spacing %d irq %d\n", res, io.regsize, io.regspacing, io.irq); + request_module("acpi_ipmi"); + return ipmi_si_add_smi(&io); err_free: From e641abd3c7261741f295d41d80b213c20f127de2 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Mon, 11 May 2020 16:13:59 -0500 Subject: [PATCH 377/562] Try to load acpi_ipmi when an SSIF ACPI IPMI interface is added This is similar to the recent patch for the SI interface, but for SSIF. Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_ssif.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 2704470e021d..6fffeb241f45 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -1472,6 +1472,7 @@ static bool check_acpi(struct ssif_info *ssif_info, struct device *dev) if (acpi_handle) { ssif_info->addr_source = SI_ACPI; ssif_info->addr_info.acpi_info.acpi_handle = acpi_handle; + request_module("acpi_ipmi"); return true; } #endif From 41311242221e3482b20bfed10fa4d9db98d87016 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 28 Apr 2020 17:02:24 -0600 Subject: [PATCH 378/562] vfio/type1: Support faulting PFNMAP vmas With conversion to follow_pfn(), DMA mapping a PFNMAP range depends on the range being faulted into the vma. Add support to manually provide that, in the same way as done on KVM with hva_to_pfn_remapped(). Reviewed-by: Peter Xu Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 36 ++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index cc1d64765ce7..4a4cb7cd86b2 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -317,6 +317,32 @@ static int put_pfn(unsigned long pfn, int prot) return 0; } +static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, + unsigned long vaddr, unsigned long *pfn, + bool write_fault) +{ + int ret; + + ret = follow_pfn(vma, vaddr, pfn); + if (ret) { + bool unlocked = false; + + ret = fixup_user_fault(NULL, mm, vaddr, + FAULT_FLAG_REMOTE | + (write_fault ? FAULT_FLAG_WRITE : 0), + &unlocked); + if (unlocked) + return -EAGAIN; + + if (ret) + return ret; + + ret = follow_pfn(vma, vaddr, pfn); + } + + return ret; +} + static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, int prot, unsigned long *pfn) { @@ -339,12 +365,16 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, vaddr = untagged_addr(vaddr); +retry: vma = find_vma_intersection(mm, vaddr, vaddr + 1); if (vma && vma->vm_flags & VM_PFNMAP) { - if (!follow_pfn(vma, vaddr, pfn) && - is_invalid_reserved_pfn(*pfn)) - ret = 0; + ret = follow_fault_pfn(vma, mm, vaddr, pfn, prot & IOMMU_WRITE); + if (ret == -EAGAIN) + goto retry; + + if (!ret && !is_invalid_reserved_pfn(*pfn)) + ret = -EFAULT; } done: up_read(&mm->mmap_sem); From 11c4cd07ba111a09f49625f9e4c851d83daf0a22 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 28 Apr 2020 13:12:20 -0600 Subject: [PATCH 379/562] vfio-pci: Fault mmaps to enable vma tracking Rather than calling remap_pfn_range() when a region is mmap'd, setup a vm_ops handler to support dynamic faulting of the range on access. This allows us to manage a list of vmas actively mapping the area that we can later use to invalidate those mappings. The open callback invalidates the vma range so that all tracking is inserted in the fault handler and removed in the close handler. Reviewed-by: Peter Xu Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 76 ++++++++++++++++++++++++++++- drivers/vfio/pci/vfio_pci_private.h | 7 +++ 2 files changed, 81 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 6c6b37b5c04e..66a545a01f8f 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1299,6 +1299,70 @@ static ssize_t vfio_pci_write(void *device_data, const char __user *buf, return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true); } +static int vfio_pci_add_vma(struct vfio_pci_device *vdev, + struct vm_area_struct *vma) +{ + struct vfio_pci_mmap_vma *mmap_vma; + + mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL); + if (!mmap_vma) + return -ENOMEM; + + mmap_vma->vma = vma; + + mutex_lock(&vdev->vma_lock); + list_add(&mmap_vma->vma_next, &vdev->vma_list); + mutex_unlock(&vdev->vma_lock); + + return 0; +} + +/* + * Zap mmaps on open so that we can fault them in on access and therefore + * our vma_list only tracks mappings accessed since last zap. + */ +static void vfio_pci_mmap_open(struct vm_area_struct *vma) +{ + zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); +} + +static void vfio_pci_mmap_close(struct vm_area_struct *vma) +{ + struct vfio_pci_device *vdev = vma->vm_private_data; + struct vfio_pci_mmap_vma *mmap_vma; + + mutex_lock(&vdev->vma_lock); + list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) { + if (mmap_vma->vma == vma) { + list_del(&mmap_vma->vma_next); + kfree(mmap_vma); + break; + } + } + mutex_unlock(&vdev->vma_lock); +} + +static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct vfio_pci_device *vdev = vma->vm_private_data; + + if (vfio_pci_add_vma(vdev, vma)) + return VM_FAULT_OOM; + + if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, vma->vm_page_prot)) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} + +static const struct vm_operations_struct vfio_pci_mmap_ops = { + .open = vfio_pci_mmap_open, + .close = vfio_pci_mmap_close, + .fault = vfio_pci_mmap_fault, +}; + static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) { struct vfio_pci_device *vdev = device_data; @@ -1357,8 +1421,14 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff; - return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - req_len, vma->vm_page_prot); + /* + * See remap_pfn_range(), called from vfio_pci_fault() but we can't + * change vm_flags within the fault handler. Set them now. + */ + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_ops = &vfio_pci_mmap_ops; + + return 0; } static void vfio_pci_request(void *device_data, unsigned int count) @@ -1608,6 +1678,8 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) spin_lock_init(&vdev->irqlock); mutex_init(&vdev->ioeventfds_lock); INIT_LIST_HEAD(&vdev->ioeventfds_list); + mutex_init(&vdev->vma_lock); + INIT_LIST_HEAD(&vdev->vma_list); ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev); if (ret) diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 36ec69081ecd..9b25f9f6ce1d 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -92,6 +92,11 @@ struct vfio_pci_vf_token { int users; }; +struct vfio_pci_mmap_vma { + struct vm_area_struct *vma; + struct list_head vma_next; +}; + struct vfio_pci_device { struct pci_dev *pdev; void __iomem *barmap[PCI_STD_NUM_BARS]; @@ -132,6 +137,8 @@ struct vfio_pci_device { struct list_head ioeventfds_list; struct vfio_pci_vf_token *vf_token; struct notifier_block nb; + struct mutex vma_lock; + struct list_head vma_list; }; #define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX) From abafbc551fddede3e0a08dee1dcde08fc0eb8476 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 22 Apr 2020 13:48:11 -0600 Subject: [PATCH 380/562] vfio-pci: Invalidate mmaps and block MMIO access on disabled memory Accessing the disabled memory space of a PCI device would typically result in a master abort response on conventional PCI, or an unsupported request on PCI express. The user would generally see these as a -1 response for the read return data and the write would be silently discarded, possibly with an uncorrected, non-fatal AER error triggered on the host. Some systems however take it upon themselves to bring down the entire system when they see something that might indicate a loss of data, such as this discarded write to a disabled memory space. To avoid this, we want to try to block the user from accessing memory spaces while they're disabled. We start with a semaphore around the memory enable bit, where writers modify the memory enable state and must be serialized, while readers make use of the memory region and can access in parallel. Writers include both direct manipulation via the command register, as well as any reset path where the internal mechanics of the reset may both explicitly and implicitly disable memory access, and manipulation of the MSI-X configuration, where the MSI-X vector table resides in MMIO space of the device. Readers include the read and write file ops to access the vfio device fd offsets as well as memory mapped access. In the latter case, we make use of our new vma list support to zap, or invalidate, those memory mappings in order to force them to be faulted back in on access. Our semaphore usage will stall user access to MMIO spaces across internal operations like reset, but the user might experience new behavior when trying to access the MMIO space while disabled via the PCI command register. Access via read or write while disabled will return -EIO and access via memory maps will result in a SIGBUS. This is expected to be compatible with known use cases and potentially provides better error handling capabilities than present in the hardware, while avoiding the more readily accessible and severe platform error responses that might otherwise occur. Fixes: CVE-2020-12888 Reviewed-by: Peter Xu Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 291 ++++++++++++++++++++++++---- drivers/vfio/pci/vfio_pci_config.c | 36 +++- drivers/vfio/pci/vfio_pci_intrs.c | 14 ++ drivers/vfio/pci/vfio_pci_private.h | 8 + drivers/vfio/pci/vfio_pci_rdwr.c | 24 ++- 5 files changed, 330 insertions(+), 43 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 66a545a01f8f..aabba6439a5b 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "vfio_pci_private.h" @@ -184,6 +185,7 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev) static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); static void vfio_pci_disable(struct vfio_pci_device *vdev); +static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data); /* * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND @@ -736,6 +738,12 @@ int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, return 0; } +struct vfio_devices { + struct vfio_device **devices; + int cur_index; + int max_index; +}; + static long vfio_pci_ioctl(void *device_data, unsigned int cmd, unsigned long arg) { @@ -809,7 +817,7 @@ static long vfio_pci_ioctl(void *device_data, { void __iomem *io; size_t size; - u16 orig_cmd; + u16 cmd; info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); info.flags = 0; @@ -829,10 +837,7 @@ static long vfio_pci_ioctl(void *device_data, * Is it really there? Enable memory decode for * implicit access in pci_map_rom(). */ - pci_read_config_word(pdev, PCI_COMMAND, &orig_cmd); - pci_write_config_word(pdev, PCI_COMMAND, - orig_cmd | PCI_COMMAND_MEMORY); - + cmd = vfio_pci_memory_lock_and_enable(vdev); io = pci_map_rom(pdev, &size); if (io) { info.flags = VFIO_REGION_INFO_FLAG_READ; @@ -840,8 +845,8 @@ static long vfio_pci_ioctl(void *device_data, } else { info.size = 0; } + vfio_pci_memory_unlock_and_restore(vdev, cmd); - pci_write_config_word(pdev, PCI_COMMAND, orig_cmd); break; } case VFIO_PCI_VGA_REGION_INDEX: @@ -984,8 +989,16 @@ static long vfio_pci_ioctl(void *device_data, return ret; } else if (cmd == VFIO_DEVICE_RESET) { - return vdev->reset_works ? - pci_try_reset_function(vdev->pdev) : -EINVAL; + int ret; + + if (!vdev->reset_works) + return -EINVAL; + + vfio_pci_zap_and_down_write_memory_lock(vdev); + ret = pci_try_reset_function(vdev->pdev); + up_write(&vdev->memory_lock); + + return ret; } else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) { struct vfio_pci_hot_reset_info hdr; @@ -1065,8 +1078,9 @@ static long vfio_pci_ioctl(void *device_data, int32_t *group_fds; struct vfio_pci_group_entry *groups; struct vfio_pci_group_info info; + struct vfio_devices devs = { .cur_index = 0 }; bool slot = false; - int i, count = 0, ret = 0; + int i, group_idx, mem_idx = 0, count = 0, ret = 0; minsz = offsetofend(struct vfio_pci_hot_reset, count); @@ -1118,9 +1132,9 @@ static long vfio_pci_ioctl(void *device_data, * user interface and store the group and iommu ID. This * ensures the group is held across the reset. */ - for (i = 0; i < hdr.count; i++) { + for (group_idx = 0; group_idx < hdr.count; group_idx++) { struct vfio_group *group; - struct fd f = fdget(group_fds[i]); + struct fd f = fdget(group_fds[group_idx]); if (!f.file) { ret = -EBADF; break; @@ -1133,8 +1147,9 @@ static long vfio_pci_ioctl(void *device_data, break; } - groups[i].group = group; - groups[i].id = vfio_external_user_iommu_id(group); + groups[group_idx].group = group; + groups[group_idx].id = + vfio_external_user_iommu_id(group); } kfree(group_fds); @@ -1153,13 +1168,63 @@ static long vfio_pci_ioctl(void *device_data, ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_validate_devs, &info, slot); - if (!ret) - /* User has access, do the reset */ - ret = pci_reset_bus(vdev->pdev); + if (ret) + goto hot_reset_release; + + devs.max_index = count; + devs.devices = kcalloc(count, sizeof(struct vfio_device *), + GFP_KERNEL); + if (!devs.devices) { + ret = -ENOMEM; + goto hot_reset_release; + } + + /* + * We need to get memory_lock for each device, but devices + * can share mmap_sem, therefore we need to zap and hold + * the vma_lock for each device, and only then get each + * memory_lock. + */ + ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, + vfio_pci_try_zap_and_vma_lock_cb, + &devs, slot); + if (ret) + goto hot_reset_release; + + for (; mem_idx < devs.cur_index; mem_idx++) { + struct vfio_pci_device *tmp; + + tmp = vfio_device_data(devs.devices[mem_idx]); + + ret = down_write_trylock(&tmp->memory_lock); + if (!ret) { + ret = -EBUSY; + goto hot_reset_release; + } + mutex_unlock(&tmp->vma_lock); + } + + /* User has access, do the reset */ + ret = pci_reset_bus(vdev->pdev); hot_reset_release: - for (i--; i >= 0; i--) - vfio_group_put_external_user(groups[i].group); + for (i = 0; i < devs.cur_index; i++) { + struct vfio_device *device; + struct vfio_pci_device *tmp; + + device = devs.devices[i]; + tmp = vfio_device_data(device); + + if (i < mem_idx) + up_write(&tmp->memory_lock); + else + mutex_unlock(&tmp->vma_lock); + vfio_device_put(device); + } + kfree(devs.devices); + + for (group_idx--; group_idx >= 0; group_idx--) + vfio_group_put_external_user(groups[group_idx].group); kfree(groups); return ret; @@ -1299,8 +1364,126 @@ static ssize_t vfio_pci_write(void *device_data, const char __user *buf, return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true); } -static int vfio_pci_add_vma(struct vfio_pci_device *vdev, - struct vm_area_struct *vma) +/* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */ +static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try) +{ + struct vfio_pci_mmap_vma *mmap_vma, *tmp; + + /* + * Lock ordering: + * vma_lock is nested under mmap_sem for vm_ops callback paths. + * The memory_lock semaphore is used by both code paths calling + * into this function to zap vmas and the vm_ops.fault callback + * to protect the memory enable state of the device. + * + * When zapping vmas we need to maintain the mmap_sem => vma_lock + * ordering, which requires using vma_lock to walk vma_list to + * acquire an mm, then dropping vma_lock to get the mmap_sem and + * reacquiring vma_lock. This logic is derived from similar + * requirements in uverbs_user_mmap_disassociate(). + * + * mmap_sem must always be the top-level lock when it is taken. + * Therefore we can only hold the memory_lock write lock when + * vma_list is empty, as we'd need to take mmap_sem to clear + * entries. vma_list can only be guaranteed empty when holding + * vma_lock, thus memory_lock is nested under vma_lock. + * + * This enables the vm_ops.fault callback to acquire vma_lock, + * followed by memory_lock read lock, while already holding + * mmap_sem without risk of deadlock. + */ + while (1) { + struct mm_struct *mm = NULL; + + if (try) { + if (!mutex_trylock(&vdev->vma_lock)) + return 0; + } else { + mutex_lock(&vdev->vma_lock); + } + while (!list_empty(&vdev->vma_list)) { + mmap_vma = list_first_entry(&vdev->vma_list, + struct vfio_pci_mmap_vma, + vma_next); + mm = mmap_vma->vma->vm_mm; + if (mmget_not_zero(mm)) + break; + + list_del(&mmap_vma->vma_next); + kfree(mmap_vma); + mm = NULL; + } + if (!mm) + return 1; + mutex_unlock(&vdev->vma_lock); + + if (try) { + if (!down_read_trylock(&mm->mmap_sem)) { + mmput(mm); + return 0; + } + } else { + down_read(&mm->mmap_sem); + } + if (mmget_still_valid(mm)) { + if (try) { + if (!mutex_trylock(&vdev->vma_lock)) { + up_read(&mm->mmap_sem); + mmput(mm); + return 0; + } + } else { + mutex_lock(&vdev->vma_lock); + } + list_for_each_entry_safe(mmap_vma, tmp, + &vdev->vma_list, vma_next) { + struct vm_area_struct *vma = mmap_vma->vma; + + if (vma->vm_mm != mm) + continue; + + list_del(&mmap_vma->vma_next); + kfree(mmap_vma); + + zap_vma_ptes(vma, vma->vm_start, + vma->vm_end - vma->vm_start); + } + mutex_unlock(&vdev->vma_lock); + } + up_read(&mm->mmap_sem); + mmput(mm); + } +} + +void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device *vdev) +{ + vfio_pci_zap_and_vma_lock(vdev, false); + down_write(&vdev->memory_lock); + mutex_unlock(&vdev->vma_lock); +} + +u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev) +{ + u16 cmd; + + down_write(&vdev->memory_lock); + pci_read_config_word(vdev->pdev, PCI_COMMAND, &cmd); + if (!(cmd & PCI_COMMAND_MEMORY)) + pci_write_config_word(vdev->pdev, PCI_COMMAND, + cmd | PCI_COMMAND_MEMORY); + + return cmd; +} + +void vfio_pci_memory_unlock_and_restore(struct vfio_pci_device *vdev, u16 cmd) +{ + pci_write_config_word(vdev->pdev, PCI_COMMAND, cmd); + up_write(&vdev->memory_lock); +} + +/* Caller holds vma_lock */ +static int __vfio_pci_add_vma(struct vfio_pci_device *vdev, + struct vm_area_struct *vma) { struct vfio_pci_mmap_vma *mmap_vma; @@ -1309,10 +1492,7 @@ static int vfio_pci_add_vma(struct vfio_pci_device *vdev, return -ENOMEM; mmap_vma->vma = vma; - - mutex_lock(&vdev->vma_lock); list_add(&mmap_vma->vma_next, &vdev->vma_list); - mutex_unlock(&vdev->vma_lock); return 0; } @@ -1346,15 +1526,32 @@ static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct vfio_pci_device *vdev = vma->vm_private_data; + vm_fault_t ret = VM_FAULT_NOPAGE; - if (vfio_pci_add_vma(vdev, vma)) - return VM_FAULT_OOM; + mutex_lock(&vdev->vma_lock); + down_read(&vdev->memory_lock); + + if (!__vfio_pci_memory_enabled(vdev)) { + ret = VM_FAULT_SIGBUS; + mutex_unlock(&vdev->vma_lock); + goto up_out; + } + + if (__vfio_pci_add_vma(vdev, vma)) { + ret = VM_FAULT_OOM; + mutex_unlock(&vdev->vma_lock); + goto up_out; + } + + mutex_unlock(&vdev->vma_lock); if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, vma->vm_end - vma->vm_start, vma->vm_page_prot)) - return VM_FAULT_SIGBUS; + ret = VM_FAULT_SIGBUS; - return VM_FAULT_NOPAGE; +up_out: + up_read(&vdev->memory_lock); + return ret; } static const struct vm_operations_struct vfio_pci_mmap_ops = { @@ -1680,6 +1877,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) INIT_LIST_HEAD(&vdev->ioeventfds_list); mutex_init(&vdev->vma_lock); INIT_LIST_HEAD(&vdev->vma_list); + init_rwsem(&vdev->memory_lock); ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev); if (ret) @@ -1933,12 +2131,6 @@ static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck) kref_put_mutex(&reflck->kref, vfio_pci_reflck_release, &reflck_lock); } -struct vfio_devices { - struct vfio_device **devices; - int cur_index; - int max_index; -}; - static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data) { struct vfio_devices *devs = data; @@ -1969,6 +2161,39 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data) return 0; } +static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data) +{ + struct vfio_devices *devs = data; + struct vfio_device *device; + struct vfio_pci_device *vdev; + + if (devs->cur_index == devs->max_index) + return -ENOSPC; + + device = vfio_device_get_from_dev(&pdev->dev); + if (!device) + return -EINVAL; + + if (pci_dev_driver(pdev) != &vfio_pci_driver) { + vfio_device_put(device); + return -EBUSY; + } + + vdev = vfio_device_data(device); + + /* + * Locking multiple devices is prone to deadlock, runaway and + * unwind if we hit contention. + */ + if (!vfio_pci_zap_and_vma_lock(vdev, true)) { + vfio_device_put(device); + return -EBUSY; + } + + devs->devices[devs->cur_index++] = device; + return 0; +} + /* * If a bus or slot reset is available for the provided device and: * - All of the devices affected by that bus or slot reset are unused diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 90c0b80f8acf..3dcddbd572e6 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -395,6 +395,14 @@ static inline void p_setd(struct perm_bits *p, int off, u32 virt, u32 write) *(__le32 *)(&p->write[off]) = cpu_to_le32(write); } +/* Caller should hold memory_lock semaphore */ +bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev) +{ + u16 cmd = le16_to_cpu(*(__le16 *)&vdev->vconfig[PCI_COMMAND]); + + return cmd & PCI_COMMAND_MEMORY; +} + /* * Restore the *real* BARs after we detect a FLR or backdoor reset. * (backdoor = some device specific technique that we didn't catch) @@ -556,13 +564,18 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos, new_cmd = le32_to_cpu(val); + phys_io = !!(phys_cmd & PCI_COMMAND_IO); + virt_io = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_IO); + new_io = !!(new_cmd & PCI_COMMAND_IO); + phys_mem = !!(phys_cmd & PCI_COMMAND_MEMORY); virt_mem = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_MEMORY); new_mem = !!(new_cmd & PCI_COMMAND_MEMORY); - phys_io = !!(phys_cmd & PCI_COMMAND_IO); - virt_io = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_IO); - new_io = !!(new_cmd & PCI_COMMAND_IO); + if (!new_mem) + vfio_pci_zap_and_down_write_memory_lock(vdev); + else + down_write(&vdev->memory_lock); /* * If the user is writing mem/io enable (new_mem/io) and we @@ -579,8 +592,11 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos, } count = vfio_default_config_write(vdev, pos, count, perm, offset, val); - if (count < 0) + if (count < 0) { + if (offset == PCI_COMMAND) + up_write(&vdev->memory_lock); return count; + } /* * Save current memory/io enable bits in vconfig to allow for @@ -591,6 +607,8 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos, *virt_cmd &= cpu_to_le16(~mask); *virt_cmd |= cpu_to_le16(new_cmd & mask); + + up_write(&vdev->memory_lock); } /* Emulate INTx disable */ @@ -828,8 +846,11 @@ static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos, pos - offset + PCI_EXP_DEVCAP, &cap); - if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) + if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) { + vfio_pci_zap_and_down_write_memory_lock(vdev); pci_try_reset_function(vdev->pdev); + up_write(&vdev->memory_lock); + } } /* @@ -907,8 +928,11 @@ static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos, pos - offset + PCI_AF_CAP, &cap); - if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) + if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) { + vfio_pci_zap_and_down_write_memory_lock(vdev); pci_try_reset_function(vdev->pdev); + up_write(&vdev->memory_lock); + } } return count; diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 2056f3f85f59..1d9fb2592945 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -249,6 +249,7 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix) struct pci_dev *pdev = vdev->pdev; unsigned int flag = msix ? PCI_IRQ_MSIX : PCI_IRQ_MSI; int ret; + u16 cmd; if (!is_irq_none(vdev)) return -EINVAL; @@ -258,13 +259,16 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix) return -ENOMEM; /* return the number of supported vectors if we can't get all: */ + cmd = vfio_pci_memory_lock_and_enable(vdev); ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag); if (ret < nvec) { if (ret > 0) pci_free_irq_vectors(pdev); + vfio_pci_memory_unlock_and_restore(vdev, cmd); kfree(vdev->ctx); return ret; } + vfio_pci_memory_unlock_and_restore(vdev, cmd); vdev->num_ctx = nvec; vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX : @@ -287,6 +291,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, struct pci_dev *pdev = vdev->pdev; struct eventfd_ctx *trigger; int irq, ret; + u16 cmd; if (vector < 0 || vector >= vdev->num_ctx) return -EINVAL; @@ -295,7 +300,11 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, if (vdev->ctx[vector].trigger) { irq_bypass_unregister_producer(&vdev->ctx[vector].producer); + + cmd = vfio_pci_memory_lock_and_enable(vdev); free_irq(irq, vdev->ctx[vector].trigger); + vfio_pci_memory_unlock_and_restore(vdev, cmd); + kfree(vdev->ctx[vector].name); eventfd_ctx_put(vdev->ctx[vector].trigger); vdev->ctx[vector].trigger = NULL; @@ -323,6 +332,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, * such a reset it would be unsuccessful. To avoid this, restore the * cached value of the message prior to enabling. */ + cmd = vfio_pci_memory_lock_and_enable(vdev); if (msix) { struct msi_msg msg; @@ -332,6 +342,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, ret = request_irq(irq, vfio_msihandler, 0, vdev->ctx[vector].name, trigger); + vfio_pci_memory_unlock_and_restore(vdev, cmd); if (ret) { kfree(vdev->ctx[vector].name); eventfd_ctx_put(trigger); @@ -376,6 +387,7 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) { struct pci_dev *pdev = vdev->pdev; int i; + u16 cmd; for (i = 0; i < vdev->num_ctx; i++) { vfio_virqfd_disable(&vdev->ctx[i].unmask); @@ -384,7 +396,9 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); + cmd = vfio_pci_memory_lock_and_enable(vdev); pci_free_irq_vectors(pdev); + vfio_pci_memory_unlock_and_restore(vdev, cmd); /* * Both disable paths above use pci_intx_for_msi() to clear DisINTx diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 9b25f9f6ce1d..86a02aff8735 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -139,6 +139,7 @@ struct vfio_pci_device { struct notifier_block nb; struct mutex vma_lock; struct list_head vma_list; + struct rw_semaphore memory_lock; }; #define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX) @@ -181,6 +182,13 @@ extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, extern int vfio_pci_set_power_state(struct vfio_pci_device *vdev, pci_power_t state); +extern bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev); +extern void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device + *vdev); +extern u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev); +extern void vfio_pci_memory_unlock_and_restore(struct vfio_pci_device *vdev, + u16 cmd); + #ifdef CONFIG_VFIO_PCI_IGD extern int vfio_pci_igd_init(struct vfio_pci_device *vdev); #else diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index a87992892a9f..916b184df3a5 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -162,6 +162,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, size_t x_start = 0, x_end = 0; resource_size_t end; void __iomem *io; + struct resource *res = &vdev->pdev->resource[bar]; ssize_t done; if (pci_resource_start(pdev, bar)) @@ -177,6 +178,14 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, count = min(count, (size_t)(end - pos)); + if (res->flags & IORESOURCE_MEM) { + down_read(&vdev->memory_lock); + if (!__vfio_pci_memory_enabled(vdev)) { + up_read(&vdev->memory_lock); + return -EIO; + } + } + if (bar == PCI_ROM_RESOURCE) { /* * The ROM can fill less space than the BAR, so we start the @@ -184,13 +193,17 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, * filling large ROM BARs much faster. */ io = pci_map_rom(pdev, &x_start); - if (!io) - return -ENOMEM; + if (!io) { + done = -ENOMEM; + goto out; + } x_end = end; } else { int ret = vfio_pci_setup_barmap(vdev, bar); - if (ret) - return ret; + if (ret) { + done = ret; + goto out; + } io = vdev->barmap[bar]; } @@ -207,6 +220,9 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, if (bar == PCI_ROM_RESOURCE) pci_unmap_rom(pdev, io); +out: + if (res->flags & IORESOURCE_MEM) + up_read(&vdev->memory_lock); return done; } From bc138db1b96264b9c1779cf18d5a3b186aa90066 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 8 Apr 2020 11:45:28 -0600 Subject: [PATCH 381/562] vfio-pci: Mask cap zero The PCI Code and ID Assignment Specification changed capability ID 0 from reserved to a NULL capability in the v1.1 revision. The NULL capability is defined to include only the 16-bit capability header, ie. only the ID and next pointer. Unfortunately vfio-pci creates a map of config space, where ID 0 is used to reserve the standard type 0 header. Finding an actual capability with this ID therefore results in a bogus range marked in that map and conflicts with subsequent capabilities. As this seems to be a dummy capability anyway and we already support dropping capabilities, let's hide this one rather than delving into the potentially subtle dependencies within our map. Seen on an NVIDIA Tesla T4. Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_config.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 90c0b80f8acf..8fc27393bf7a 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1462,7 +1462,12 @@ static int vfio_cap_init(struct vfio_pci_device *vdev) if (ret) return ret; - if (cap <= PCI_CAP_ID_MAX) { + /* + * ID 0 is a NULL capability, conflicting with our fake + * PCI_CAP_ID_BASIC. As it has no content, consider it + * hidden for now. + */ + if (cap && cap <= PCI_CAP_ID_MAX) { len = pci_cap_length[cap]; if (len == 0xFF) { /* Variable length */ len = vfio_cap_len(vdev, cap, pos); From 3e63b94b6274324ff2e7d8615df31586de827c4e Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Sun, 10 May 2020 12:16:56 -0400 Subject: [PATCH 382/562] vfio/pci: fix memory leaks in alloc_perm_bits() vfio_pci_disable() calls vfio_config_free() but forgets to call free_perm_bits() resulting in memory leaks, unreferenced object 0xc000000c4db2dee0 (size 16): comm "qemu-kvm", pid 4305, jiffies 4295020272 (age 3463.780s) hex dump (first 16 bytes): 00 00 ff 00 ff ff ff ff ff ff ff ff ff ff 00 00 ................ backtrace: [<00000000a6a4552d>] alloc_perm_bits+0x58/0xe0 [vfio_pci] [<00000000ac990549>] vfio_config_init+0xdf0/0x11b0 [vfio_pci] init_pci_cap_msi_perm at drivers/vfio/pci/vfio_pci_config.c:1125 (inlined by) vfio_msi_cap_len at drivers/vfio/pci/vfio_pci_config.c:1180 (inlined by) vfio_cap_len at drivers/vfio/pci/vfio_pci_config.c:1241 (inlined by) vfio_cap_init at drivers/vfio/pci/vfio_pci_config.c:1468 (inlined by) vfio_config_init at drivers/vfio/pci/vfio_pci_config.c:1707 [<000000006db873a1>] vfio_pci_open+0x234/0x700 [vfio_pci] [<00000000630e1906>] vfio_group_fops_unl_ioctl+0x8e0/0xb84 [vfio] [<000000009e34c54f>] ksys_ioctl+0xd8/0x130 [<000000006577923d>] sys_ioctl+0x28/0x40 [<000000006d7b1cf2>] system_call_exception+0x114/0x1e0 [<0000000008ea7dd5>] system_call_common+0xf0/0x278 unreferenced object 0xc000000c4db2e330 (size 16): comm "qemu-kvm", pid 4305, jiffies 4295020272 (age 3463.780s) hex dump (first 16 bytes): 00 ff ff 00 ff ff ff ff ff ff ff ff ff ff 00 00 ................ backtrace: [<000000004c71914f>] alloc_perm_bits+0x44/0xe0 [vfio_pci] [<00000000ac990549>] vfio_config_init+0xdf0/0x11b0 [vfio_pci] [<000000006db873a1>] vfio_pci_open+0x234/0x700 [vfio_pci] [<00000000630e1906>] vfio_group_fops_unl_ioctl+0x8e0/0xb84 [vfio] [<000000009e34c54f>] ksys_ioctl+0xd8/0x130 [<000000006577923d>] sys_ioctl+0x28/0x40 [<000000006d7b1cf2>] system_call_exception+0x114/0x1e0 [<0000000008ea7dd5>] system_call_common+0xf0/0x278 Fixes: 89e1f7d4c66d ("vfio: Add PCI device driver") Signed-off-by: Qian Cai [aw: rolled in follow-up patch] Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_config.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 90c0b80f8acf..43b95f9cdaf7 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1728,8 +1728,11 @@ void vfio_config_free(struct vfio_pci_device *vdev) vdev->vconfig = NULL; kfree(vdev->pci_config_map); vdev->pci_config_map = NULL; - kfree(vdev->msi_perm); - vdev->msi_perm = NULL; + if (vdev->msi_perm) { + free_perm_bits(vdev->msi_perm); + kfree(vdev->msi_perm); + vdev->msi_perm = NULL; + } } /* From b386cd65d961e29710ef6ad84bc788f0a7e9d64e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 19 May 2020 16:32:23 +0300 Subject: [PATCH 383/562] RDMA/rtrs: Fix some signedness bugs in error handling The problem is that "req->sg_cnt" is an unsigned int so if "nr" is negative, it gets type promoted to a high positive value and the condition is false. This patch fixes it by handling negatives separately. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Link: https://lore.kernel.org/r/20200519133223.GN2078@kadam Signed-off-by: Dan Carpenter Reviewed-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 7 +++---- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 468fdd0d8713..96cba06e8ba7 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1047,11 +1047,10 @@ static int rtrs_map_sg_fr(struct rtrs_clt_io_req *req, size_t count) /* Align the MR to a 4K page size to match the block virt boundary */ nr = ib_map_mr_sg(req->mr, req->sglist, count, NULL, SZ_4K); - if (unlikely(nr < req->sg_cnt)) { - if (nr < 0) - return nr; + if (nr < 0) + return nr; + if (unlikely(nr < req->sg_cnt)) return -EINVAL; - } ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); return nr; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index ba8ab33b94a2..eefd149ce7a4 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -649,7 +649,7 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess) } nr = ib_map_mr_sg(mr, sgt->sgl, sgt->nents, NULL, max_chunk_size); - if (nr < sgt->nents) { + if (nr < 0 || nr < sgt->nents) { err = nr < 0 ? nr : -EINVAL; goto dereg_mr; } From bf1d8edb38bbf0628c1f2de7d13ab98533c1fe60 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 19 May 2020 18:45:25 +0300 Subject: [PATCH 384/562] RDMA/rtrs: Fix a couple off by one bugs in rtrs_srv_rdma_done() These > comparisons should be >= to prevent accessing one element beyond the end of the buffer. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Link: https://lore.kernel.org/r/20200519154525.GA66801@mwanda Signed-off-by: Dan Carpenter Acked-by: Danil Kipnis Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index eefd149ce7a4..863b3942e333 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1213,8 +1213,8 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) msg_id = imm_payload >> sess->mem_bits; off = imm_payload & ((1 << sess->mem_bits) - 1); - if (unlikely(msg_id > srv->queue_depth || - off > max_chunk_size)) { + if (unlikely(msg_id >= srv->queue_depth || + off >= max_chunk_size)) { rtrs_err(s, "Wrong msg_id %u, off %u\n", msg_id, off); close_sess(sess); From e19840867013f0f4081265fdb65b31e80b7bcb5b Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 19 May 2020 11:36:12 -0500 Subject: [PATCH 385/562] RDMA/rtrs: client: Fix function return on success Remove the if-statement and return the value contained in _err_, unconditionally. Link: https://lore.kernel.org/r/20200519163612.GA6043@embeddedor Addresses-Coverity-ID: 1493753 ("Identical code for different branches") Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 96cba06e8ba7..1b98785fd8ac 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1590,9 +1590,6 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) * In case of error we do not bother to clean previous allocations, * since destroy_con_cq_qp() must be called. */ - - if (err) - return err; return err; } From 6b31afcef51e578e936e66c347ab333c024963da Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 19 May 2020 09:19:12 +0000 Subject: [PATCH 386/562] RDMA/rtrs: server: Fix some error return code Fix to return negative error code -ENOMEM from the some error handling cases instead of 0, as done elsewhere in this function. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Fixes: 91b11610af8d ("RDMA/rtrs: server: sysfs interface functions") Link: https://lore.kernel.org/r/20200519091912.134358-1-weiyongjun1@huawei.com Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Reviewed-by: Danil Kipnis Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c | 1 + drivers/infiniband/ulp/rtrs/rtrs-srv.c | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index 0cf015634338..3d7877534bcc 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -189,6 +189,7 @@ static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_sess *sess) } srv->kobj_paths = kobject_create_and_add("paths", &srv->dev.kobj); if (!srv->kobj_paths) { + err = -ENOMEM; pr_err("kobject_create_and_add(): %d\n", err); device_unregister(&srv->dev); goto unlock; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 863b3942e333..1fc6ece036ff 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -660,8 +660,8 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess) GFP_KERNEL, sess->s.dev->ib_dev, DMA_TO_DEVICE, rtrs_srv_rdma_done); if (!srv_mr->iu) { - rtrs_err(ss, "rtrs_iu_alloc(), err: %d\n", - -ENOMEM); + err = -ENOMEM; + rtrs_err(ss, "rtrs_iu_alloc(), err: %d\n", err); goto free_iu; } } @@ -2150,8 +2150,10 @@ static int __init rtrs_server_init(void) goto out_chunk_pool; } rtrs_wq = alloc_workqueue("rtrs_server_wq", WQ_MEM_RECLAIM, 0); - if (!rtrs_wq) + if (!rtrs_wq) { + err = -ENOMEM; goto out_dev_class; + } return 0; From d6ea395072457153f2120e2361657e00f3c0958d Mon Sep 17 00:00:00 2001 From: Danil Kipnis Date: Tue, 19 May 2020 13:14:19 +0200 Subject: [PATCH 387/562] rnbd/rtrs: Pass max segment size from blk user to the rdma library When Block Device Layer is disabled, BLK_MAX_SEGMENT_SIZE is undefined. The rtrs is a transport library and should compile independently of the block layer. The desired max segment size should be passed down by the user. Introduce max_segment_size parameter for the rtrs_clt_open() call. Fixes: f7a7a5c228d4 ("block/rnbd: client: main functionality") Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Fixes: cb80329c9434 ("RDMA/rtrs: client: private header with client structs and functions") Fixes: b5c27cdb094e ("RDMA/rtrs: public interface header to establish RDMA connections") Link: https://lore.kernel.org/r/20200519111419.924170-1-danil.kipnis@cloud.ionos.com Signed-off-by: Danil Kipnis Reported-by: Randy Dunlap Acked-by: Randy Dunlap # build-tested Signed-off-by: Jason Gunthorpe --- drivers/block/rnbd/rnbd-clt.c | 1 + drivers/infiniband/ulp/rtrs/rtrs-clt.c | 17 +++++++++++------ drivers/infiniband/ulp/rtrs/rtrs-clt.h | 1 + drivers/infiniband/ulp/rtrs/rtrs.h | 1 + 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 55bff3b1be71..450a571e6a1e 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1216,6 +1216,7 @@ find_and_get_or_create_sess(const char *sessname, paths, path_cnt, port_nr, sizeof(struct rnbd_iu), RECONNECT_DELAY, BMAX_SEGMENTS, + BLK_MAX_SEGMENT_SIZE, MAX_RECONNECTS); if (IS_ERR(sess->rtrs)) { err = PTR_ERR(sess->rtrs); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 1b98785fd8ac..0ab7e5e912c0 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -12,7 +12,6 @@ #include #include -#include /* for BLK_MAX_SEGMENT_SIZE */ #include "rtrs-clt.h" #include "rtrs-log.h" @@ -1406,7 +1405,8 @@ static void rtrs_clt_close_work(struct work_struct *work); static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt, const struct rtrs_addr *path, - size_t con_num, u16 max_segments) + size_t con_num, u16 max_segments, + size_t max_segment_size) { struct rtrs_clt_sess *sess; int err = -ENOMEM; @@ -1443,7 +1443,7 @@ static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt, strlcpy(sess->s.sessname, clt->sessname, sizeof(sess->s.sessname)); sess->s.con_num = con_num; sess->clt = clt; - sess->max_pages_per_mr = max_segments * BLK_MAX_SEGMENT_SIZE >> 12; + sess->max_pages_per_mr = max_segments * max_segment_size >> 12; init_waitqueue_head(&sess->state_wq); sess->state = RTRS_CLT_CONNECTING; atomic_set(&sess->connected_cnt, 0); @@ -2526,6 +2526,7 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, void (*link_ev)(void *priv, enum rtrs_clt_link_ev ev), unsigned int max_segments, + size_t max_segment_size, unsigned int reconnect_delay_sec, unsigned int max_reconnect_attempts) { @@ -2555,6 +2556,7 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, clt->port = port; clt->pdu_sz = pdu_sz; clt->max_segments = max_segments; + clt->max_segment_size = max_segment_size; clt->reconnect_delay_sec = reconnect_delay_sec; clt->max_reconnect_attempts = max_reconnect_attempts; clt->priv = priv; @@ -2636,6 +2638,7 @@ static void free_clt(struct rtrs_clt *clt) * @pdu_sz: Size of extra payload which can be accessed after permit allocation. * @reconnect_delay_sec: time between reconnect tries * @max_segments: Max. number of segments per IO request + * @max_segment_size: Max. size of one segment * @max_reconnect_attempts: Number of times to reconnect on error before giving * up, 0 for * disabled, -1 for forever * @@ -2650,6 +2653,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, size_t paths_num, u16 port, size_t pdu_sz, u8 reconnect_delay_sec, u16 max_segments, + size_t max_segment_size, s16 max_reconnect_attempts) { struct rtrs_clt_sess *sess, *tmp; @@ -2658,7 +2662,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, clt = alloc_clt(sessname, paths_num, port, pdu_sz, ops->priv, ops->link_ev, - max_segments, reconnect_delay_sec, + max_segments, max_segment_size, reconnect_delay_sec, max_reconnect_attempts); if (IS_ERR(clt)) { err = PTR_ERR(clt); @@ -2668,7 +2672,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, struct rtrs_clt_sess *sess; sess = alloc_sess(clt, &paths[i], nr_cpu_ids, - max_segments); + max_segments, max_segment_size); if (IS_ERR(sess)) { err = PTR_ERR(sess); goto close_all_sess; @@ -2917,7 +2921,8 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt, struct rtrs_clt_sess *sess; int err; - sess = alloc_sess(clt, addr, nr_cpu_ids, clt->max_segments); + sess = alloc_sess(clt, addr, nr_cpu_ids, clt->max_segments, + clt->max_segment_size); if (IS_ERR(sess)) return PTR_ERR(sess); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h index 039a2ebba2f9..167acd3c90fc 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -164,6 +164,7 @@ struct rtrs_clt { unsigned int max_reconnect_attempts; unsigned int reconnect_delay_sec; unsigned int max_segments; + size_t max_segment_size; void *permits; unsigned long *permits_map; size_t queue_depth; diff --git a/drivers/infiniband/ulp/rtrs/rtrs.h b/drivers/infiniband/ulp/rtrs/rtrs.h index 9879d40467b6..9af750f4d783 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.h +++ b/drivers/infiniband/ulp/rtrs/rtrs.h @@ -58,6 +58,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, size_t path_cnt, u16 port, size_t pdu_sz, u8 reconnect_delay_sec, u16 max_segments, + size_t max_segment_size, s16 max_reconnect_attempts); void rtrs_clt_close(struct rtrs_clt *sess); From bd25c8066fc2e0868228b3cb0563d6c1b65505b2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 19 May 2020 18:30:18 -0500 Subject: [PATCH 388/562] RDMA/siw: Replace one-element array and use struct_size() helper The current codebase makes use of one-element arrays in the following form: struct something { int length; u8 data[1]; }; struct something *instance; instance = kmalloc(sizeof(*instance) + size, GFP_KERNEL); instance->length = size; memcpy(instance->data, source, size); but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. So, replace the one-element array with a flexible-array member. Also, make use of the new struct_size() helper to properly calculate the size of struct siw_pbl. This issue was found with the help of Coccinelle and, audited and fixed _manually_. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Link: https://lore.kernel.org/r/20200519233018.GA6105@embeddedor Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw.h | 2 +- drivers/infiniband/sw/siw/siw_mem.c | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index af5e9f8c0fcd..5a58a1cc7a7e 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -139,7 +139,7 @@ struct siw_pble { struct siw_pbl { unsigned int num_buf; unsigned int max_buf; - struct siw_pble pbe[1]; + struct siw_pble pbe[]; }; /* diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c index e2061dc0b043..87117781d637 100644 --- a/drivers/infiniband/sw/siw/siw_mem.c +++ b/drivers/infiniband/sw/siw/siw_mem.c @@ -349,14 +349,11 @@ dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx) struct siw_pbl *siw_pbl_alloc(u32 num_buf) { struct siw_pbl *pbl; - int buf_size = sizeof(*pbl); if (num_buf == 0) return ERR_PTR(-EINVAL); - buf_size += ((num_buf - 1) * sizeof(struct siw_pble)); - - pbl = kzalloc(buf_size, GFP_KERNEL); + pbl = kzalloc(struct_size(pbl, pbe, num_buf), GFP_KERNEL); if (!pbl) return ERR_PTR(-ENOMEM); From 349be276509455ac2f19fa4051ed773082c6a27e Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Fri, 8 May 2020 17:45:51 +0800 Subject: [PATCH 389/562] RDMA/hns: Bugfix for querying qkey The qkey queried through the query ud qp verb is a fixed value and it should be read from qp context. Fixes: 926a01dc000d ("RDMA/hns: Add QP operations support for hip08 SoC") Link: https://lore.kernel.org/r/1588931159-56875-2-git-send-email-liweihang@huawei.com Signed-off-by: Lijun Ou Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index ebe570aa2323..8c24ddfb76fe 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4683,7 +4683,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->path_mig_state = IB_MIG_ARMED; qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; if (hr_qp->ibqp.qp_type == IB_QPT_UD) - qp_attr->qkey = V2_QKEY_VAL; + qp_attr->qkey = le32_to_cpu(context.qkey_xrcd); qp_attr->rq_psn = roce_get_field(context.byte_108_rx_reqepsn, V2_QPC_BYTE_108_RX_REQ_EPSN_M, From 441c88d5b3ff80108ff536c6cf80591187015403 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Fri, 8 May 2020 17:45:52 +0800 Subject: [PATCH 390/562] RDMA/hns: Fix cmdq parameter of querying pf timer resource The firmware has reduced the number of descriptions of command HNS_ROCE_OPC_QUERY_PF_TIMER_RES to 1. The driver needs to adapt, otherwise the hardware will report error 4(CMD_NEXT_ERR). Fixes: 0e40dc2f70cd ("RDMA/hns: Add timer allocation support for hip08") Link: https://lore.kernel.org/r/1588931159-56875-3-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 32 ++++++++-------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8c24ddfb76fe..0e488de22355 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1450,34 +1450,26 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) static int hns_roce_query_pf_timer_resource(struct hns_roce_dev *hr_dev) { struct hns_roce_pf_timer_res_a *req_a; - struct hns_roce_cmq_desc desc[2]; - int ret, i; + struct hns_roce_cmq_desc desc; + int ret; - for (i = 0; i < 2; i++) { - hns_roce_cmq_setup_basic_desc(&desc[i], - HNS_ROCE_OPC_QUERY_PF_TIMER_RES, - true); + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_PF_TIMER_RES, + true); - if (i == 0) - desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - else - desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - } - - ret = hns_roce_cmq_send(hr_dev, desc, 2); + ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) return ret; - req_a = (struct hns_roce_pf_timer_res_a *)desc[0].data; + req_a = (struct hns_roce_pf_timer_res_a *)desc.data; hr_dev->caps.qpc_timer_bt_num = - roce_get_field(req_a->qpc_timer_bt_idx_num, - PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_M, - PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_S); + roce_get_field(req_a->qpc_timer_bt_idx_num, + PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_M, + PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_S); hr_dev->caps.cqc_timer_bt_num = - roce_get_field(req_a->cqc_timer_bt_idx_num, - PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_M, - PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_S); + roce_get_field(req_a->cqc_timer_bt_idx_num, + PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_M, + PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_S); return 0; } From 053c0acf52edf97cae7d53c9f249f7c2eb565ed9 Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Fri, 8 May 2020 17:45:53 +0800 Subject: [PATCH 391/562] RDMA/hns: Fix assignment to ba_pg_sz of eqe When allocating eq buffer, the size of base address page should be defined by eqe_ba_pg_sz instead of srqwqe_ba_pg_sz. Fixes: 477a0a387072 ("RDMA/hns: Optimize 0 hop addressing for EQE buffer") Link: https://lore.kernel.org/r/1588931159-56875-4-git-send-email-liweihang@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0e488de22355..96a5ff565b2f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5615,7 +5615,7 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) buf_attr.fixed_page = true; err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr, - hr_dev->caps.srqwqe_ba_pg_sz + + hr_dev->caps.eqe_ba_pg_sz + PAGE_ADDR_SHIFT, NULL, 0); if (err) dev_err(hr_dev->dev, "Failed to alloc EQE mtr, err %d\n", err); From 6968aeb5aa64a46175f408fa91e49c19e9558428 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Fri, 8 May 2020 17:45:54 +0800 Subject: [PATCH 392/562] RDMA/hns: Fix wrong assignment of SRQ's max_wr srq's attribute max_wr should be 1 less than the total count of wqe. Fixes: ffb1308b88b6 ("RDMA/hns: Move SRQ code to the reasonable place") Link: https://lore.kernel.org/r/1588931159-56875-5-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 96a5ff565b2f..155c658ed3eb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5044,8 +5044,8 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) SRQC_BYTE_8_SRQ_LIMIT_WL_S); attr->srq_limit = limit_wl; - attr->max_wr = srq->wqe_cnt; - attr->max_sge = srq->max_gs; + attr->max_wr = srq->wqe_cnt - 1; + attr->max_sge = srq->max_gs; memcpy(srq_context, mailbox->buf, sizeof(*srq_context)); From d4d813874156063eae6542c66da2a6971592e46f Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Fri, 8 May 2020 17:45:55 +0800 Subject: [PATCH 393/562] RDMA/hns: Fix error with to_hr_hem_entries_count() For ilog2(x), if x is 0 and not a constant variable, it will return -1. And there will be an error as below: hns3 0000:7d:00.0 hns_0: Local work queue 0x8 catast error, sub_event type is: 2 So modify to_hr_hem_entries_shift() to return 0 if conut is 0. Fixes: 54d6638765b0 ("RDMA/hns: Optimize WQE buffer size calculating process") Link: https://lore.kernel.org/r/1588931159-56875-6-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 4fcd608ee55f..770a6d5517d5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1112,6 +1112,9 @@ static inline u32 to_hr_hem_entries_count(u32 count, u32 buf_shift) static inline u32 to_hr_hem_entries_shift(u32 count, u32 buf_shift) { + if (!count) + return 0; + return ilog2(to_hr_hem_entries_count(count, buf_shift)); } From 7b611d2f6e8b99b699996c52b823454f4a74978f Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Fri, 8 May 2020 17:45:56 +0800 Subject: [PATCH 394/562] RDMA/hns: Store mr len information into mr obj The length information should be stored in the struct ib_mr object, otherwise the length value of a valid mr object would always be 0. Link: https://lore.kernel.org/r/1588931159-56875-7-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_mr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index ecd76759d47a..f727b1875af8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -285,6 +285,7 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_alloc_pbl; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; + mr->ibmr.length = length; return &mr->ibmr; @@ -451,6 +452,7 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, goto err_pbl; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; + mr->ibmr.length = length; return &mr->ibmr; From 252067e95035151372f21c0c8626bf6fed9c5f0b Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Fri, 8 May 2020 17:45:57 +0800 Subject: [PATCH 395/562] RDMA/hns: Remove redundant memcpy() srq_context is a local variables and is only used to get some fields from buffer of mailbox. It's meaningless to copy mailbox's buffer's contents back to it. Link: https://lore.kernel.org/r/1588931159-56875-8-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 155c658ed3eb..34c183181908 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5047,8 +5047,6 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) attr->max_wr = srq->wqe_cnt - 1; attr->max_sge = srq->max_gs; - memcpy(srq_context, mailbox->buf, sizeof(*srq_context)); - out: hns_roce_free_cmd_mailbox(hr_dev, mailbox); return ret; From 9581a356ccadb24d4a18c62c5c8327997e47241e Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Fri, 8 May 2020 17:45:58 +0800 Subject: [PATCH 396/562] RDMA/hns: Rename macro for defining hns hardware page size Rename the PAGE_ADDR_SHIFT as HNS_HW_PAGE_SHIFT to make code more readable. Link: https://lore.kernel.org/r/1588931159-56875-9-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_alloc.c | 6 +++--- drivers/infiniband/hw/hns/hns_roce_cq.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_device.h | 10 ++++++---- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_mr.c | 6 +++--- drivers/infiniband/hw/hns/hns_roce_qp.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_srq.c | 8 ++++---- 7 files changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 365e7db6c498..742aee846676 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -189,8 +189,8 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, u32 page_size; int i; - /* The minimum shift of the page accessed by hw is PAGE_ADDR_SHIFT */ - buf->page_shift = max_t(int, PAGE_ADDR_SHIFT, page_shift); + /* The minimum shift of the page accessed by hw is HNS_HW_PAGE_SHIFT */ + buf->page_shift = max_t(int, HNS_HW_PAGE_SHIFT, page_shift); page_size = 1 << buf->page_shift; buf->npages = DIV_ROUND_UP(size, page_size); @@ -261,7 +261,7 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int idx = 0; u64 addr; - if (page_shift < PAGE_ADDR_SHIFT) { + if (page_shift < HNS_HW_PAGE_SHIFT) { dev_err(hr_dev->dev, "Failed to check umem page shift %d!\n", page_shift); return -EINVAL; diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index d2d7074bbe69..6dd8deaffec8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -149,14 +149,14 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, struct hns_roce_buf_attr buf_attr = {}; int err; - buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + PAGE_ADDR_SHIFT; + buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; buf_attr.region[0].size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz; buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num; buf_attr.region_count = 1; buf_attr.fixed_page = true; err = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, - hr_dev->caps.cqe_ba_pg_sz + PAGE_ADDR_SHIFT, + hr_dev->caps.cqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); if (err) ibdev_err(ibdev, "Failed to alloc CQ mtr, err %d\n", err); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 770a6d5517d5..1befdbe00b89 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -262,7 +262,9 @@ enum { #define HNS_ROCE_PORT_DOWN 0 #define HNS_ROCE_PORT_UP 1 -#define PAGE_ADDR_SHIFT 12 +/* The minimum page size is 4K for hardware */ +#define HNS_HW_PAGE_SHIFT 12 +#define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) /* The minimum page count for hardware access page directly. */ #define HNS_HW_DIRECT_PAGE_COUNT 2 @@ -1080,16 +1082,16 @@ static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx) return buf->page_list[idx].map; } -#define hr_hw_page_align(x) ALIGN(x, 1 << PAGE_ADDR_SHIFT) +#define hr_hw_page_align(x) ALIGN(x, 1 << HNS_HW_PAGE_SHIFT) static inline u64 to_hr_hw_page_addr(u64 addr) { - return addr >> PAGE_ADDR_SHIFT; + return addr >> HNS_HW_PAGE_SHIFT; } static inline u32 to_hr_hw_page_shift(u32 page_shift) { - return page_shift - PAGE_ADDR_SHIFT; + return page_shift - HNS_HW_PAGE_SHIFT; } static inline u32 to_hr_hem_hopnum(u32 hopnum, u32 count) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 34c183181908..d31ecaa7010c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5606,7 +5606,7 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) else eq->hop_num = hr_dev->caps.eqe_hop_num; - buf_attr.page_shift = hr_dev->caps.eqe_buf_pg_sz + PAGE_ADDR_SHIFT; + buf_attr.page_shift = hr_dev->caps.eqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; buf_attr.region[0].size = eq->entries * eq->eqe_size; buf_attr.region[0].hopnum = eq->hop_num; buf_attr.region_count = 1; @@ -5614,7 +5614,7 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr, hr_dev->caps.eqe_ba_pg_sz + - PAGE_ADDR_SHIFT, NULL, 0); + HNS_HW_PAGE_SHIFT, NULL, 0); if (err) dev_err(hr_dev->dev, "Failed to alloc EQE mtr, err %d\n", err); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index f727b1875af8..3075e8450cda 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -120,7 +120,7 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num; buf_attr.page_shift = is_fast ? PAGE_SHIFT : - hr_dev->caps.pbl_buf_pg_sz + PAGE_ADDR_SHIFT; + hr_dev->caps.pbl_buf_pg_sz + HNS_HW_PAGE_SHIFT; buf_attr.region[0].size = length; buf_attr.region[0].hopnum = mr->pbl_hop_num; buf_attr.region_count = 1; @@ -130,7 +130,7 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, buf_attr.mtt_only = is_fast; err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr, - hr_dev->caps.pbl_ba_pg_sz + PAGE_ADDR_SHIFT, + hr_dev->caps.pbl_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, start); if (err) ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err); @@ -819,7 +819,7 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, } /* must bigger than minimum hardware page shift */ - if (best_pg_shift < PAGE_ADDR_SHIFT || all_pg_count < 1) { + if (best_pg_shift < HNS_HW_PAGE_SHIFT || all_pg_count < 1) { ret = -EINVAL; ibdev_err(ibdev, "Failed to check mtr page shift %d count %d\n", best_pg_shift, all_pg_count); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index dca979d8c345..e6ecdce4d63e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -546,7 +546,7 @@ static int split_wqe_buf_region(struct hns_roce_dev *hr_dev, if (hr_qp->buff_size < 1) return -EINVAL; - buf_attr->page_shift = PAGE_ADDR_SHIFT + hr_dev->caps.mtt_buf_pg_sz; + buf_attr->page_shift = HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; buf_attr->fixed_page = true; buf_attr->region_count = idx; @@ -681,7 +681,7 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, goto err_inline; } ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr, - PAGE_ADDR_SHIFT + hr_dev->caps.mtt_ba_pg_sz, + HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz, udata, addr); if (ret) { ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 6e5a2adc2ab2..03b76e69a185 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -187,7 +187,7 @@ static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, HNS_ROCE_SGE_SIZE * srq->max_gs))); - buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + PAGE_ADDR_SHIFT; + buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, srq->wqe_shift); buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; @@ -196,7 +196,7 @@ static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, err = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, hr_dev->caps.srqwqe_ba_pg_sz + - PAGE_ADDR_SHIFT, udata, addr); + HNS_HW_PAGE_SHIFT, udata, addr); if (err) ibdev_err(ibdev, "Failed to alloc SRQ buf mtr, err %d\n", err); @@ -218,7 +218,7 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ); - buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + PAGE_ADDR_SHIFT; + buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + HNS_HW_PAGE_SHIFT; buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, srq->idx_que.entry_shift); buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num; @@ -226,7 +226,7 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, buf_attr.fixed_page = true; err = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, - hr_dev->caps.idx_ba_pg_sz + PAGE_ADDR_SHIFT, + hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); if (err) { ibdev_err(ibdev, "Failed to alloc SRQ idx mtr, err %d\n", err); From 711195e57d341e58133d92cf8aaab1db24e4768d Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Fri, 8 May 2020 17:45:59 +0800 Subject: [PATCH 397/562] RDMA/hns: Reserve one sge in order to avoid local length error When rq/srq sge length is smaller than sq sge length, it will produce a local length error and may cause the bus to hang. Therefore, for rq wqe and srq wqe, one reserved sge pointing to a reserved mr is used to avoid this error. Link: https://lore.kernel.org/r/1588931159-56875-10-git-send-email-liweihang@huawei.com Signed-off-by: Lijun Ou Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 ++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 9 +++++---- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 4 +++- drivers/infiniband/hw/hns/hns_roce_qp.c | 5 +++-- drivers/infiniband/hw/hns/hns_roce_srq.c | 2 +- 5 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1befdbe00b89..bd6e295f4669 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -66,6 +66,8 @@ #define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2 #define HNS_ROCE_MIN_CQE_CNT 16 +#define HNS_ROCE_RESERVED_SGE 1 + #define HNS_ROCE_MAX_IRQ_NUM 128 #define HNS_ROCE_SGE_IN_WQE 2 diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d31ecaa7010c..d2c58d395962 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -629,7 +629,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); - if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { + if (unlikely(wr->num_sge >= hr_qp->rq.max_gs)) { ibdev_err(ibdev, "rq:num_sge=%d >= qp->sq.max_gs=%d\n", wr->num_sge, hr_qp->rq.max_gs); ret = -EINVAL; @@ -649,6 +649,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, if (i < hr_qp->rq.max_gs) { dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); dseg->addr = 0; + dseg->len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); } /* rq support inline data */ @@ -782,8 +783,8 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, } if (i < srq->max_gs) { - dseg[i].len = 0; - dseg[i].lkey = cpu_to_le32(0x100); + dseg[i].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); + dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); dseg[i].addr = 0; } @@ -5045,7 +5046,7 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) attr->srq_limit = limit_wl; attr->max_wr = srq->wqe_cnt - 1; - attr->max_sge = srq->max_gs; + attr->max_sge = srq->max_gs - HNS_ROCE_RESERVED_SGE; out: hns_roce_free_cmd_mailbox(hr_dev, mailbox); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 938b7b522faf..532dcf6a05ff 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -92,7 +92,9 @@ #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 -#define HNS_ROCE_INVALID_LKEY 0x100 +#define HNS_ROCE_INVALID_LKEY 0x0 +#define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000 + #define HNS_ROCE_CMQ_TX_TIMEOUT 30000 #define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2 #define HNS_ROCE_V2_RSV_QPS 8 diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index e6ecdce4d63e..fb71755f6179 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -386,7 +386,8 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, return -EINVAL; } - hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); + hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) + + HNS_ROCE_RESERVED_SGE); if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE) hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); @@ -401,7 +402,7 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, hr_qp->rq_inl_buf.wqe_cnt = 0; cap->max_recv_wr = cnt; - cap->max_recv_sge = hr_qp->rq.max_gs; + cap->max_recv_sge = hr_qp->rq.max_gs - HNS_ROCE_RESERVED_SGE; return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 03b76e69a185..3018c981f1d1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -297,7 +297,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, spin_lock_init(&srq->lock); srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1); - srq->max_gs = init_attr->attr.max_sge; + srq->max_gs = init_attr->attr.max_sge + HNS_ROCE_RESERVED_SGE; if (udata) { ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); From 819f7427bafd494ef7ca4942ec6322db20722d7b Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Wed, 13 May 2020 12:55:50 +0300 Subject: [PATCH 398/562] RDMA/mlx5: Add init2init as a modify command Missing INIT2INIT entry in the list of modify commands caused DEVX applications to be unable to modify_qp for this transition state. Add the MLX5_CMD_OP_INIT2INIT_QP opcode to the list of allowed DEVX opcodes. Fixes: e662e14d801b ("IB/mlx5: Add DEVX support for modify and query commands") Link: https://lore.kernel.org/r/20200513095550.211345-1-leon@kernel.org Signed-off-by: Aharon Landau Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 1d7feed6d3cb..c339dd5ee694 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -820,6 +820,7 @@ static bool devx_is_obj_modify_cmd(const void *in) case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: case MLX5_CMD_OP_RST2INIT_QP: case MLX5_CMD_OP_INIT2RTR_QP: + case MLX5_CMD_OP_INIT2INIT_QP: case MLX5_CMD_OP_RTR2RTS_QP: case MLX5_CMD_OP_RTS2RTS_QP: case MLX5_CMD_OP_SQERR2RTS_QP: From d0b1e4a638d670a09f42017a3e567dc846931ba8 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 28 Apr 2020 14:18:55 +0000 Subject: [PATCH 399/562] scsi: cxlflash: Fix error return code in cxlflash_probe() Fix to return negative error code -ENOMEM from create_afu error handling case instead of 0, as done elsewhere in this function. Link: https://lore.kernel.org/r/20200428141855.88704-1-weiyongjun1@huawei.com Acked-by: Matthew R. Ochs Signed-off-by: Wei Yongjun Signed-off-by: Martin K. Petersen --- drivers/scsi/cxlflash/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index fbd2ae40dab4..fcc5aa9f6014 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -3744,6 +3744,7 @@ static int cxlflash_probe(struct pci_dev *pdev, cfg->afu_cookie = cfg->ops->create_afu(pdev); if (unlikely(!cfg->afu_cookie)) { dev_err(dev, "%s: create_afu failed\n", __func__); + rc = -ENOMEM; goto out_remove; } From 5cac1095cf289f7623f835a9212b9ec0ad3b85b3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 7 May 2020 15:27:50 -0700 Subject: [PATCH 400/562] scsi: ufs: Make ufshcd_wait_for_register() sleep instead of busy-waiting The ufshcd_wait_for_register() function either sleeps or spins until the specified register has reached the desired value. Busy-waiting is not only considered a bad practice but also has a bad impact on energy consumption. Always sleep instead of spinning by making sure that all ufshcd_wait_for_register() calls happen from a context where it is allowed to sleep. The only function call that has to be moved is the ufshcd_hba_stop() call in ufshcd_host_reset_and_restore(). Link: https://lore.kernel.org/r/20200507222750.19113-1-bvanassche@acm.org Cc: Can Guo Cc: Avri Altman Cc: Bean Huo Cc: Alim Akhtar Cc: Asutosh Das Tested-by: Bean Huo Reviewed-by: Stanley Chu Reviewed-by: Bean Huo Reviewed-by: Asutosh Das Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 51 +++++++++++++++++++++------------------ drivers/scsi/ufs/ufshcd.h | 2 +- 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index aca50ed39844..c3389c9a4f29 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -562,21 +562,21 @@ void ufshcd_delay_us(unsigned long us, unsigned long tolerance) } EXPORT_SYMBOL_GPL(ufshcd_delay_us); -/* +/** * ufshcd_wait_for_register - wait for register value to change - * @hba - per-adapter interface - * @reg - mmio register offset - * @mask - mask to apply to read register value - * @val - wait condition - * @interval_us - polling interval in microsecs - * @timeout_ms - timeout in millisecs - * @can_sleep - perform sleep or just spin + * @hba: per-adapter interface + * @reg: mmio register offset + * @mask: mask to apply to the read register value + * @val: value to wait for + * @interval_us: polling interval in microseconds + * @timeout_ms: timeout in milliseconds * - * Returns -ETIMEDOUT on error, zero on success + * Return: + * -ETIMEDOUT on error, zero on success. */ int ufshcd_wait_for_register(struct ufs_hba *hba, u32 reg, u32 mask, u32 val, unsigned long interval_us, - unsigned long timeout_ms, bool can_sleep) + unsigned long timeout_ms) { int err = 0; unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms); @@ -585,10 +585,7 @@ int ufshcd_wait_for_register(struct ufs_hba *hba, u32 reg, u32 mask, val = val & mask; while ((ufshcd_readl(hba, reg) & mask) != val) { - if (can_sleep) - usleep_range(interval_us, interval_us + 50); - else - udelay(interval_us); + usleep_range(interval_us, interval_us + 50); if (time_after(jiffies, timeout)) { if ((ufshcd_readl(hba, reg) & mask) != val) err = -ETIMEDOUT; @@ -2577,7 +2574,7 @@ ufshcd_clear_cmd(struct ufs_hba *hba, int tag) */ err = ufshcd_wait_for_register(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL, - mask, ~mask, 1000, 1000, true); + mask, ~mask, 1000, 1000); return err; } @@ -4247,16 +4244,23 @@ EXPORT_SYMBOL_GPL(ufshcd_make_hba_operational); /** * ufshcd_hba_stop - Send controller to reset state * @hba: per adapter instance - * @can_sleep: perform sleep or just spin */ -static inline void ufshcd_hba_stop(struct ufs_hba *hba, bool can_sleep) +static inline void ufshcd_hba_stop(struct ufs_hba *hba) { + unsigned long flags; int err; + /* + * Obtain the host lock to prevent that the controller is disabled + * while the UFS interrupt handler is active on another CPU. + */ + spin_lock_irqsave(hba->host->host_lock, flags); ufshcd_writel(hba, CONTROLLER_DISABLE, REG_CONTROLLER_ENABLE); + spin_unlock_irqrestore(hba->host->host_lock, flags); + err = ufshcd_wait_for_register(hba, REG_CONTROLLER_ENABLE, CONTROLLER_ENABLE, CONTROLLER_DISABLE, - 10, 1, can_sleep); + 10, 1); if (err) dev_err(hba->dev, "%s: Controller disable failed\n", __func__); } @@ -4277,7 +4281,7 @@ int ufshcd_hba_enable(struct ufs_hba *hba) if (!ufshcd_is_hba_active(hba)) /* change controller state to "reset state" */ - ufshcd_hba_stop(hba, true); + ufshcd_hba_stop(hba); /* UniPro link is disabled at this point */ ufshcd_set_link_off(hba); @@ -5906,7 +5910,7 @@ static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag) /* poll for max. 1 sec to clear door bell register by h/w */ err = ufshcd_wait_for_register(hba, REG_UTP_TASK_REQ_DOOR_BELL, - mask, 0, 1000, 1000, true); + mask, 0, 1000, 1000); out: return err; } @@ -6482,8 +6486,9 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba) * Stop the host controller and complete the requests * cleared by h/w */ + ufshcd_hba_stop(hba); + spin_lock_irqsave(hba->host->host_lock, flags); - ufshcd_hba_stop(hba, false); hba->silence_err_logs = true; ufshcd_complete_requests(hba); hba->silence_err_logs = false; @@ -8049,7 +8054,7 @@ static int ufshcd_link_state_transition(struct ufs_hba *hba, * Change controller state to "reset state" which * should also put the link in off/reset state */ - ufshcd_hba_stop(hba, true); + ufshcd_hba_stop(hba); /* * TODO: Check if we need any delay to make sure that * controller is reset @@ -8608,7 +8613,7 @@ void ufshcd_remove(struct ufs_hba *hba) scsi_remove_host(hba->host); /* disable interrupts */ ufshcd_disable_intr(hba, hba->intr_mask); - ufshcd_hba_stop(hba, true); + ufshcd_hba_stop(hba); ufshcd_exit_clk_scaling(hba); ufshcd_exit_clk_gating(hba); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index e3dfb48e669e..88d420242782 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -831,7 +831,7 @@ int ufshcd_uic_hibern8_exit(struct ufs_hba *hba); void ufshcd_delay_us(unsigned long us, unsigned long tolerance); int ufshcd_wait_for_register(struct ufs_hba *hba, u32 reg, u32 mask, u32 val, unsigned long interval_us, - unsigned long timeout_ms, bool can_sleep); + unsigned long timeout_ms); void ufshcd_parse_dev_ref_clk_freq(struct ufs_hba *hba, struct clk *refclk); void ufshcd_update_reg_hist(struct ufs_err_reg_hist *reg_hist, u32 reg); From c5f8852273dd7df45d3fe12cf0e2ec68cacfad80 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 15 May 2020 14:48:56 +0900 Subject: [PATCH 401/562] scsi: sd: Add zoned capabilities device attribute Export through sysfs as a scsi_disk attribute the zoned capabilities of a disk ("zoned_cap" attribute file). This new attribute indicates in human readable form (i.e. a string) the zoned block capabilities implemented by the disk as found in the ZONED field of the disk block device characteristics VPD page. The possible values are: - "none": ZONED=00b (not reported), regular disk - "host-aware": ZONED=01b, host-aware ZBC disk - "drive-managed": ZONED=10b, drive-managed ZBC disk (regular disk interface) For completeness, also add the following value which is detected using the device type rather than the ZONED field: - "host-managed": device type = 0x14 (TYPE_ZBC), host-managed ZBC disk This new sysfs attribute is purely informational and complementary to the "zoned" device request queue sysfs attribute as it allows applications and user daemons (e.g. udev) to easily differentiate regular disks from drive-managed SMR disks without the need for direct access tools such as provided by sg3utils. Link: https://lore.kernel.org/r/20200515054856.1408575-1-damien.lemoal@wdc.com Reviewed-by: Johannes Thumshirn Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 8929e178c6f8..1bc2a061efa9 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -528,6 +528,21 @@ max_write_same_blocks_store(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RW(max_write_same_blocks); +static ssize_t +zoned_cap_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct scsi_disk *sdkp = to_scsi_disk(dev); + + if (sdkp->device->type == TYPE_ZBC) + return sprintf(buf, "host-managed\n"); + if (sdkp->zoned == 1) + return sprintf(buf, "host-aware\n"); + if (sdkp->zoned == 2) + return sprintf(buf, "drive-managed\n"); + return sprintf(buf, "none\n"); +} +static DEVICE_ATTR_RO(zoned_cap); + static struct attribute *sd_disk_attrs[] = { &dev_attr_cache_type.attr, &dev_attr_FUA.attr, @@ -541,6 +556,7 @@ static struct attribute *sd_disk_attrs[] = { &dev_attr_zeroing_mode.attr, &dev_attr_max_write_same_blocks.attr, &dev_attr_max_medium_access_timeouts.attr, + &dev_attr_zoned_cap.attr, NULL, }; ATTRIBUTE_GROUPS(sd_disk); From e16b9ed61e078d836a0f24a82080cf29d7539c7e Mon Sep 17 00:00:00 2001 From: Luo Jiaxing Date: Fri, 15 May 2020 22:13:42 +0800 Subject: [PATCH 402/562] scsi: hisi_sas: Do not reset phy timer to wait for stray phy up We found out that after phy up, the hardware reports another oob interrupt but did not follow a phy up interrupt: oob ready -> phy up -> DEV found -> oob read -> wait phy up -> timeout We run link reset when wait phy up timeout, and it send a normal disk into reset processing. So we made some circumvention action in the code, so that this abnormal oob interrupt will not start the timer to wait for phy up. Link: https://lore.kernel.org/r/1589552025-165012-2-git-send-email-john.garry@huawei.com Signed-off-by: Luo Jiaxing Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 9a6deb21fe4d..11caa4b0d797 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -898,8 +898,11 @@ void hisi_sas_phy_oob_ready(struct hisi_hba *hisi_hba, int phy_no) struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no]; struct device *dev = hisi_hba->dev; + dev_dbg(dev, "phy%d OOB ready\n", phy_no); + if (phy->phy_attached) + return; + if (!timer_pending(&phy->timer)) { - dev_dbg(dev, "phy%d OOB ready\n", phy_no); phy->timer.expires = jiffies + HISI_SAS_WAIT_PHYUP_TIMEOUT * HZ; add_timer(&phy->timer); } From 1e954d1f002db802937deb87a868c3f62fe1badf Mon Sep 17 00:00:00 2001 From: Luo Jiaxing Date: Fri, 15 May 2020 22:13:43 +0800 Subject: [PATCH 403/562] scsi: hisi_sas: Modify the commit information for DSM method Make it clear that BIOS may modify some register settings. Link: https://lore.kernel.org/r/1589552025-165012-3-git-send-email-john.garry@huawei.com Signed-off-by: Luo Jiaxing Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 59b1421607dd..edab8383b485 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -912,11 +912,15 @@ static int hw_init_v3_hw(struct hisi_hba *hisi_hba) return -EINVAL; } - /* Switch over to MSI handling , from PCI AER default */ + /* + * This DSM handles some hardware-related configurations: + * 1. Switch over to MSI error handling in kernel + * 2. BIOS *may* reset some register values through this method + */ obj = acpi_evaluate_dsm(ACPI_HANDLE(dev), &guid, 0, DSM_FUNC_ERR_HANDLE_MSI, NULL); if (!obj) - dev_warn(dev, "Switch over to MSI handling failed\n"); + dev_warn(dev, "can not find DSM method, ignore\n"); else ACPI_FREE(obj); From 1a0efb55b2bb9e970b8842030ce65d645ddba90c Mon Sep 17 00:00:00 2001 From: Luo Jiaxing Date: Fri, 15 May 2020 22:13:44 +0800 Subject: [PATCH 404/562] scsi: hisi_sas: Add SAS_RAS_INTR0 to debugfs register name list Register SAS_RAS_INTR0 can help us to figure out which ECC error has occurred. This register is helpful to identify RAS issue, so we add it to the list of debugfs register name list for easier retrieval. Link: https://lore.kernel.org/r/1589552025-165012-4-git-send-email-john.garry@huawei.com Signed-off-by: Luo Jiaxing Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index edab8383b485..ec5014baffeb 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2901,6 +2901,7 @@ static const struct hisi_sas_debugfs_reg debugfs_axi_reg = { }; static const struct hisi_sas_debugfs_reg_lu debugfs_ras_reg_lu[] = { + HISI_SAS_DEBUGFS_REG(SAS_RAS_INTR0), HISI_SAS_DEBUGFS_REG(SAS_RAS_INTR1), HISI_SAS_DEBUGFS_REG(SAS_RAS_INTR0_MASK), HISI_SAS_DEBUGFS_REG(SAS_RAS_INTR1_MASK), From 1cdee004426164d1b00b66d3f6e7308c3714def6 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 15 May 2020 22:13:45 +0800 Subject: [PATCH 405/562] scsi: hisi_sas: Stop returning error code from slot_complete_vX_hw() The error codes are never checked, stop returning them. Link: https://lore.kernel.org/r/1589552025-165012-5-git-send-email-john.garry@huawei.com Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_v1_hw.c | 12 ++++-------- drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 16 ++++++---------- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 16 ++++++---------- 3 files changed, 16 insertions(+), 28 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c index c205bff20943..2e1718f9ade2 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c @@ -1175,15 +1175,14 @@ static void slot_err_v1_hw(struct hisi_hba *hisi_hba, } -static int slot_complete_v1_hw(struct hisi_hba *hisi_hba, - struct hisi_sas_slot *slot) +static void slot_complete_v1_hw(struct hisi_hba *hisi_hba, + struct hisi_sas_slot *slot) { struct sas_task *task = slot->task; struct hisi_sas_device *sas_dev; struct device *dev = hisi_hba->dev; struct task_status_struct *ts; struct domain_device *device; - enum exec_status sts; struct hisi_sas_complete_v1_hdr *complete_queue = hisi_hba->complete_hdr[slot->cmplt_queue]; struct hisi_sas_complete_v1_hdr *complete_hdr; @@ -1194,7 +1193,7 @@ static int slot_complete_v1_hw(struct hisi_hba *hisi_hba, cmplt_hdr_data = le32_to_cpu(complete_hdr->data); if (unlikely(!task || !task->lldd_task || !task->dev)) - return -EINVAL; + return; ts = &task->task_status; device = task->dev; @@ -1260,7 +1259,7 @@ static int slot_complete_v1_hw(struct hisi_hba *hisi_hba, slot_err_v1_hw(hisi_hba, task, slot); if (unlikely(slot->abort)) - return ts->stat; + return; goto out; } @@ -1309,12 +1308,9 @@ static int slot_complete_v1_hw(struct hisi_hba *hisi_hba, out: hisi_sas_slot_task_free(hisi_hba, task, slot); - sts = ts->stat; if (task->task_done) task->task_done(task); - - return sts; } /* Interrupts */ diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index c725cffe141e..e7e7849a4c14 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -2318,8 +2318,8 @@ static void slot_err_v2_hw(struct hisi_hba *hisi_hba, } } -static int -slot_complete_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) +static void slot_complete_v2_hw(struct hisi_hba *hisi_hba, + struct hisi_sas_slot *slot) { struct sas_task *task = slot->task; struct hisi_sas_device *sas_dev; @@ -2327,7 +2327,6 @@ slot_complete_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) struct task_status_struct *ts; struct domain_device *device; struct sas_ha_struct *ha; - enum exec_status sts; struct hisi_sas_complete_v2_hdr *complete_queue = hisi_hba->complete_hdr[slot->cmplt_queue]; struct hisi_sas_complete_v2_hdr *complete_hdr = @@ -2337,7 +2336,7 @@ slot_complete_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) u32 dw0; if (unlikely(!task || !task->lldd_task || !task->dev)) - return -EINVAL; + return; ts = &task->task_status; device = task->dev; @@ -2406,7 +2405,7 @@ slot_complete_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) error_info[2], error_info[3]); if (unlikely(slot->abort)) - return ts->stat; + return; goto out; } @@ -2456,12 +2455,11 @@ slot_complete_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) } out: - sts = ts->stat; spin_lock_irqsave(&task->task_state_lock, flags); if (task->task_state_flags & SAS_TASK_STATE_ABORTED) { spin_unlock_irqrestore(&task->task_state_lock, flags); dev_info(dev, "slot complete: task(%pK) aborted\n", task); - return SAS_ABORTED_TASK; + return; } task->task_state_flags |= SAS_TASK_STATE_DONE; spin_unlock_irqrestore(&task->task_state_lock, flags); @@ -2473,15 +2471,13 @@ slot_complete_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) spin_unlock_irqrestore(&device->done_lock, flags); dev_info(dev, "slot complete: task(%pK) ignored\n", task); - return sts; + return; } spin_unlock_irqrestore(&device->done_lock, flags); } if (task->task_done) task->task_done(task); - - return sts; } static void prep_ata_v2_hw(struct hisi_hba *hisi_hba, diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index ec5014baffeb..3e6b78a1f993 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2156,8 +2156,8 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task, } } -static int -slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) +static void slot_complete_v3_hw(struct hisi_hba *hisi_hba, + struct hisi_sas_slot *slot) { struct sas_task *task = slot->task; struct hisi_sas_device *sas_dev; @@ -2165,7 +2165,6 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) struct task_status_struct *ts; struct domain_device *device; struct sas_ha_struct *ha; - enum exec_status sts; struct hisi_sas_complete_v3_hdr *complete_queue = hisi_hba->complete_hdr[slot->cmplt_queue]; struct hisi_sas_complete_v3_hdr *complete_hdr = @@ -2175,7 +2174,7 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) u32 dw0, dw1, dw3; if (unlikely(!task || !task->lldd_task || !task->dev)) - return -EINVAL; + return; ts = &task->task_status; device = task->dev; @@ -2237,7 +2236,7 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) error_info[0], error_info[1], error_info[2], error_info[3]); if (unlikely(slot->abort)) - return ts->stat; + return; goto out; } @@ -2282,12 +2281,11 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) } out: - sts = ts->stat; spin_lock_irqsave(&task->task_state_lock, flags); if (task->task_state_flags & SAS_TASK_STATE_ABORTED) { spin_unlock_irqrestore(&task->task_state_lock, flags); dev_info(dev, "slot complete: task(%pK) aborted\n", task); - return SAS_ABORTED_TASK; + return; } task->task_state_flags |= SAS_TASK_STATE_DONE; spin_unlock_irqrestore(&task->task_state_lock, flags); @@ -2299,15 +2297,13 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) spin_unlock_irqrestore(&device->done_lock, flags); dev_info(dev, "slot complete: task(%pK) ignored\n ", task); - return sts; + return; } spin_unlock_irqrestore(&device->done_lock, flags); } if (task->task_done) task->task_done(task); - - return sts; } static irqreturn_t cq_thread_v3_hw(int irq_no, void *p) From 5482d56bfedf1a2ce6450076e844676e20ab4e0a Mon Sep 17 00:00:00 2001 From: Lance Digby Date: Mon, 18 May 2020 11:02:16 +1000 Subject: [PATCH 406/562] scsi: target: core: Add initiatorname to NON_EXISTENT_LUN error The NON_EXISTENT_LUN error can be written without an error condition on the initiator responsible. Adding the initiatorname to this message will reduce the effort required to fix this when many initiators are supported by a target. This version ensures the initiator name is also printed on the same message in transport_lookup_tmr_lun for consistency. Link: https://lore.kernel.org/r/9b13bb2e1f52f1792cd81850ee95bf3781bb5363.1589759816.git.lance.digby@gmail.com Reviewed-by: Mike Christie Signed-off-by: Lance Digby Signed-off-by: Martin K. Petersen --- drivers/target/target_core_device.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index d5800cf22df7..46b0e1ceb77f 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -100,9 +100,10 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun) */ if (unpacked_lun != 0) { pr_err("TARGET_CORE[%s]: Detected NON_EXISTENT_LUN" - " Access for 0x%08llx\n", + " Access for 0x%08llx from %s\n", se_cmd->se_tfo->fabric_name, - unpacked_lun); + unpacked_lun, + nacl->initiatorname); return TCM_NON_EXISTENT_LUN; } @@ -174,9 +175,10 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u64 unpacked_lun) if (!se_lun) { pr_debug("TARGET_CORE[%s]: Detected NON_EXISTENT_LUN" - " Access for 0x%08llx\n", + " Access for 0x%08llx for %s\n", se_cmd->se_tfo->fabric_name, - unpacked_lun); + unpacked_lun, + nacl->initiatorname); return -ENODEV; } se_cmd->se_dev = rcu_dereference_raw(se_lun->lun_se_dev); From 61fb2482216679b9e1e797440c148bb143a5040a Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Mon, 18 May 2020 18:48:33 +0200 Subject: [PATCH 407/562] scsi: target: tcmu: Userspace must not complete queued commands When tcmu queues a new command - no matter whether in command ring or in qfull_queue - a cmd_id from IDR udev->commands is assigned to the command. If userspace sends a wrong command completion containing the cmd_id of a command on the qfull_queue, tcmu_handle_completions() finds the command in the IDR and calls tcmu_handle_completion() for it. This might do some nasty things because commands in qfull_queue do not have a valid dbi list. To fix this bug, we no longer add queued commands to the idr. Instead the cmd_id is assign when a command is written to the command ring. Due to this change I had to adapt the source code at several places where up to now an idr_for_each had been done. [mkp: fix checkpatch warnings] Link: https://lore.kernel.org/r/20200518164833.12775-1-bstroesser@ts.fujitsu.com Acked-by: Mike Christie Signed-off-by: Bodo Stroesser Signed-off-by: Martin K. Petersen --- drivers/target/target_core_user.c | 154 ++++++++++++++---------------- 1 file changed, 71 insertions(+), 83 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 72ffd359fc4d..1d030d57c28c 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -882,41 +882,24 @@ static inline size_t tcmu_cmd_get_cmd_size(struct tcmu_cmd *tcmu_cmd, return command_size; } -static int tcmu_setup_cmd_timer(struct tcmu_cmd *tcmu_cmd, unsigned int tmo, - struct timer_list *timer) +static void tcmu_setup_cmd_timer(struct tcmu_cmd *tcmu_cmd, unsigned int tmo, + struct timer_list *timer) { - struct tcmu_dev *udev = tcmu_cmd->tcmu_dev; - int cmd_id; - - if (tcmu_cmd->cmd_id) - goto setup_timer; - - cmd_id = idr_alloc(&udev->commands, tcmu_cmd, 1, USHRT_MAX, GFP_NOWAIT); - if (cmd_id < 0) { - pr_err("tcmu: Could not allocate cmd id.\n"); - return cmd_id; - } - tcmu_cmd->cmd_id = cmd_id; - - pr_debug("allocated cmd %u for dev %s tmo %lu\n", tcmu_cmd->cmd_id, - udev->name, tmo / MSEC_PER_SEC); - -setup_timer: if (!tmo) - return 0; + return; tcmu_cmd->deadline = round_jiffies_up(jiffies + msecs_to_jiffies(tmo)); if (!timer_pending(timer)) mod_timer(timer, tcmu_cmd->deadline); - return 0; + pr_debug("Timeout set up for cmd %p, dev = %s, tmo = %lu\n", tcmu_cmd, + tcmu_cmd->tcmu_dev->name, tmo / MSEC_PER_SEC); } static int add_to_qfull_queue(struct tcmu_cmd *tcmu_cmd) { struct tcmu_dev *udev = tcmu_cmd->tcmu_dev; unsigned int tmo; - int ret; /* * For backwards compat if qfull_time_out is not set use @@ -931,13 +914,11 @@ static int add_to_qfull_queue(struct tcmu_cmd *tcmu_cmd) else tmo = TCMU_TIME_OUT; - ret = tcmu_setup_cmd_timer(tcmu_cmd, tmo, &udev->qfull_timer); - if (ret) - return ret; + tcmu_setup_cmd_timer(tcmu_cmd, tmo, &udev->qfull_timer); list_add_tail(&tcmu_cmd->queue_entry, &udev->qfull_queue); - pr_debug("adding cmd %u on dev %s to ring space wait queue\n", - tcmu_cmd->cmd_id, udev->name); + pr_debug("adding cmd %p on dev %s to ring space wait queue\n", + tcmu_cmd, udev->name); return 0; } @@ -959,7 +940,7 @@ static int queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, sense_reason_t *scsi_err) struct tcmu_mailbox *mb; struct tcmu_cmd_entry *entry; struct iovec *iov; - int iov_cnt, ret; + int iov_cnt, cmd_id; uint32_t cmd_head; uint64_t cdb_off; bool copy_to_data_area; @@ -1060,14 +1041,21 @@ static int queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, sense_reason_t *scsi_err) } entry->req.iov_bidi_cnt = iov_cnt; - ret = tcmu_setup_cmd_timer(tcmu_cmd, udev->cmd_time_out, - &udev->cmd_timer); - if (ret) { - tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt); + cmd_id = idr_alloc(&udev->commands, tcmu_cmd, 1, USHRT_MAX, GFP_NOWAIT); + if (cmd_id < 0) { + pr_err("tcmu: Could not allocate cmd id.\n"); + tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt); *scsi_err = TCM_OUT_OF_RESOURCES; return -1; } + tcmu_cmd->cmd_id = cmd_id; + + pr_debug("allocated cmd id %u for cmd %p dev %s\n", tcmu_cmd->cmd_id, + tcmu_cmd, udev->name); + + tcmu_setup_cmd_timer(tcmu_cmd, udev->cmd_time_out, &udev->cmd_timer); + entry->hdr.cmd_id = tcmu_cmd->cmd_id; /* @@ -1279,50 +1267,39 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) return handled; } -static int tcmu_check_expired_cmd(int id, void *p, void *data) +static void tcmu_check_expired_ring_cmd(struct tcmu_cmd *cmd) { - struct tcmu_cmd *cmd = p; - struct tcmu_dev *udev = cmd->tcmu_dev; - u8 scsi_status; struct se_cmd *se_cmd; - bool is_running; - - if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) - return 0; if (!time_after(jiffies, cmd->deadline)) - return 0; + return; - is_running = test_bit(TCMU_CMD_BIT_INFLIGHT, &cmd->flags); + set_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags); + list_del_init(&cmd->queue_entry); se_cmd = cmd->se_cmd; + cmd->se_cmd = NULL; - if (is_running) { - /* - * If cmd_time_out is disabled but qfull is set deadline - * will only reflect the qfull timeout. Ignore it. - */ - if (!udev->cmd_time_out) - return 0; + pr_debug("Timing out inflight cmd %u on dev %s.\n", + cmd->cmd_id, cmd->tcmu_dev->name); - set_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags); - /* - * target_complete_cmd will translate this to LUN COMM FAILURE - */ - scsi_status = SAM_STAT_CHECK_CONDITION; - list_del_init(&cmd->queue_entry); - cmd->se_cmd = NULL; - } else { - list_del_init(&cmd->queue_entry); - idr_remove(&udev->commands, id); - tcmu_free_cmd(cmd); - scsi_status = SAM_STAT_TASK_SET_FULL; - } + target_complete_cmd(se_cmd, SAM_STAT_CHECK_CONDITION); +} - pr_debug("Timing out cmd %u on dev %s that is %s.\n", - id, udev->name, is_running ? "inflight" : "queued"); +static void tcmu_check_expired_queue_cmd(struct tcmu_cmd *cmd) +{ + struct se_cmd *se_cmd; - target_complete_cmd(se_cmd, scsi_status); - return 0; + if (!time_after(jiffies, cmd->deadline)) + return; + + list_del_init(&cmd->queue_entry); + se_cmd = cmd->se_cmd; + tcmu_free_cmd(cmd); + + pr_debug("Timing out queued cmd %p on dev %s.\n", + cmd, cmd->tcmu_dev->name); + + target_complete_cmd(se_cmd, SAM_STAT_TASK_SET_FULL); } static void tcmu_device_timedout(struct tcmu_dev *udev) @@ -1407,16 +1384,15 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) return &udev->se_dev; } -static bool run_qfull_queue(struct tcmu_dev *udev, bool fail) +static void run_qfull_queue(struct tcmu_dev *udev, bool fail) { struct tcmu_cmd *tcmu_cmd, *tmp_cmd; LIST_HEAD(cmds); - bool drained = true; sense_reason_t scsi_ret; int ret; if (list_empty(&udev->qfull_queue)) - return true; + return; pr_debug("running %s's cmdr queue forcefail %d\n", udev->name, fail); @@ -1425,11 +1401,10 @@ static bool run_qfull_queue(struct tcmu_dev *udev, bool fail) list_for_each_entry_safe(tcmu_cmd, tmp_cmd, &cmds, queue_entry) { list_del_init(&tcmu_cmd->queue_entry); - pr_debug("removing cmd %u on dev %s from queue\n", - tcmu_cmd->cmd_id, udev->name); + pr_debug("removing cmd %p on dev %s from queue\n", + tcmu_cmd, udev->name); if (fail) { - idr_remove(&udev->commands, tcmu_cmd->cmd_id); /* * We were not able to even start the command, so * fail with busy to allow a retry in case runner @@ -1444,10 +1419,8 @@ static bool run_qfull_queue(struct tcmu_dev *udev, bool fail) ret = queue_cmd_ring(tcmu_cmd, &scsi_ret); if (ret < 0) { - pr_debug("cmd %u on dev %s failed with %u\n", - tcmu_cmd->cmd_id, udev->name, scsi_ret); - - idr_remove(&udev->commands, tcmu_cmd->cmd_id); + pr_debug("cmd %p on dev %s failed with %u\n", + tcmu_cmd, udev->name, scsi_ret); /* * Ignore scsi_ret for now. target_complete_cmd * drops it. @@ -1462,13 +1435,11 @@ static bool run_qfull_queue(struct tcmu_dev *udev, bool fail) * the queue */ list_splice_tail(&cmds, &udev->qfull_queue); - drained = false; break; } } tcmu_set_next_deadline(&udev->qfull_queue, &udev->qfull_timer); - return drained; } static int tcmu_irqcontrol(struct uio_info *info, s32 irq_on) @@ -1652,6 +1623,8 @@ static void tcmu_dev_kref_release(struct kref *kref) if (tcmu_check_and_free_pending_cmd(cmd) != 0) all_expired = false; } + if (!list_empty(&udev->qfull_queue)) + all_expired = false; idr_destroy(&udev->commands); WARN_ON(!all_expired); @@ -2037,9 +2010,6 @@ static void tcmu_reset_ring(struct tcmu_dev *udev, u8 err_level) mutex_lock(&udev->cmdr_lock); idr_for_each_entry(&udev->commands, cmd, i) { - if (!test_bit(TCMU_CMD_BIT_INFLIGHT, &cmd->flags)) - continue; - pr_debug("removing cmd %u on dev %s from ring (is expired %d)\n", cmd->cmd_id, udev->name, test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)); @@ -2076,6 +2046,8 @@ static void tcmu_reset_ring(struct tcmu_dev *udev, u8 err_level) del_timer(&udev->cmd_timer); + run_qfull_queue(udev, false); + mutex_unlock(&udev->cmdr_lock); } @@ -2699,6 +2671,7 @@ static void find_free_blocks(void) static void check_timedout_devices(void) { struct tcmu_dev *udev, *tmp_dev; + struct tcmu_cmd *cmd, *tmp_cmd; LIST_HEAD(devs); spin_lock_bh(&timed_out_udevs_lock); @@ -2709,9 +2682,24 @@ static void check_timedout_devices(void) spin_unlock_bh(&timed_out_udevs_lock); mutex_lock(&udev->cmdr_lock); - idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL); - tcmu_set_next_deadline(&udev->inflight_queue, &udev->cmd_timer); + /* + * If cmd_time_out is disabled but qfull is set deadline + * will only reflect the qfull timeout. Ignore it. + */ + if (udev->cmd_time_out) { + list_for_each_entry_safe(cmd, tmp_cmd, + &udev->inflight_queue, + queue_entry) { + tcmu_check_expired_ring_cmd(cmd); + } + tcmu_set_next_deadline(&udev->inflight_queue, + &udev->cmd_timer); + } + list_for_each_entry_safe(cmd, tmp_cmd, &udev->qfull_queue, + queue_entry) { + tcmu_check_expired_queue_cmd(cmd); + } tcmu_set_next_deadline(&udev->qfull_queue, &udev->qfull_timer); mutex_unlock(&udev->cmdr_lock); From 246ee22583ed4847dcdd9a2f307eeca856f42882 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2020 14:16:58 -0700 Subject: [PATCH 408/562] scsi: qla2xxx: Fix spelling of a variable name Change "offet" into "offset" in a variable name. Link: https://lore.kernel.org/r/20200518211712.11395-2-bvanassche@acm.org Cc: Nilesh Javali Cc: Quinn Tran Cc: Martin Wilck Reviewed-by: Daniel Wagner Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Reviewed-by: Arun Easi Reviewed-by: Roman Bolshakov Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_fw.h | 2 +- drivers/scsi/qla2xxx/qla_init.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h index f9bad5bd7198..b364a497e33d 100644 --- a/drivers/scsi/qla2xxx/qla_fw.h +++ b/drivers/scsi/qla2xxx/qla_fw.h @@ -1292,7 +1292,7 @@ struct device_reg_24xx { }; /* RISC-RISC semaphore register PCI offet */ #define RISC_REGISTER_BASE_OFFSET 0x7010 -#define RISC_REGISTER_WINDOW_OFFET 0x6 +#define RISC_REGISTER_WINDOW_OFFSET 0x6 /* RISC-RISC semaphore/flag register (risc address 0x7016) */ diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 95b6166ae0cc..f8fe0334571f 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -2861,7 +2861,7 @@ qla25xx_read_risc_sema_reg(scsi_qla_host_t *vha, uint32_t *data) struct device_reg_24xx __iomem *reg = &vha->hw->iobase->isp24; WRT_REG_DWORD(®->iobase_addr, RISC_REGISTER_BASE_OFFSET); - *data = RD_REG_DWORD(®->iobase_window + RISC_REGISTER_WINDOW_OFFET); + *data = RD_REG_DWORD(®->iobase_window + RISC_REGISTER_WINDOW_OFFSET); } @@ -2871,7 +2871,7 @@ qla25xx_write_risc_sema_reg(scsi_qla_host_t *vha, uint32_t data) struct device_reg_24xx __iomem *reg = &vha->hw->iobase->isp24; WRT_REG_DWORD(®->iobase_addr, RISC_REGISTER_BASE_OFFSET); - WRT_REG_DWORD(®->iobase_window + RISC_REGISTER_WINDOW_OFFET, data); + WRT_REG_DWORD(®->iobase_window + RISC_REGISTER_WINDOW_OFFSET, data); } static void From fbbc95a49d5b00a66d82ea34b7f029963b6c91f3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2020 14:16:59 -0700 Subject: [PATCH 409/562] scsi: qla2xxx: Suppress two recently introduced compiler warnings Suppress the following two compiler warnings because these are not useful: In file included from ./include/trace/define_trace.h:102, from ./include/trace/events/qla.h:39, from drivers/scsi/qla2xxx/qla_dbg.c:77: ./include/trace/events/qla.h: In function 'trace_event_raw_event_qla_log_event': ./include/trace/trace_events.h:691:9: warning: function 'trace_event_raw_event_qla_log_event' might be a candidate for 'gnu_printf' format attribute [-Wsuggest-attribute=format] 691 | struct trace_event_raw_##call *entry; \ | ^~~~~~~~~~~~~~~~ ./include/trace/events/qla.h:12:1: note: in expansion of macro 'DECLARE_EVENT_CLASS' 12 | DECLARE_EVENT_CLASS(qla_log_event, | ^~~~~~~~~~~~~~~~~~~ In file included from ./include/trace/define_trace.h:103, from ./include/trace/events/qla.h:39, from drivers/scsi/qla2xxx/qla_dbg.c:77: ./include/trace/events/qla.h: In function 'perf_trace_qla_log_event': ./include/trace/perf.h:41:9: warning: function 'perf_trace_qla_log_event' might be a candidate for 'gnu_printf' format attribute [-Wsuggest-attribute=format] 41 | struct hlist_head *head; \ | ^~~~~~~~~~ ./include/trace/events/qla.h:12:1: note: in expansion of macro 'DECLARE_EVENT_CLASS' Link: https://lore.kernel.org/r/20200518211712.11395-3-bvanassche@acm.org Fixes: 598a90f2002c ("scsi: qla2xxx: add ring buffer for tracing debug logs") Cc: Rajan Shanmugavelu Cc: Joe Jin Cc: Nilesh Javali Cc: Quinn Tran Cc: Martin Wilck Reviewed-by: Daniel Wagner Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Reviewed-by: Arun Easi Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/trace/events/qla.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/trace/events/qla.h b/include/trace/events/qla.h index b71f680968eb..5857cf682ee7 100644 --- a/include/trace/events/qla.h +++ b/include/trace/events/qla.h @@ -9,6 +9,11 @@ #define QLA_MSG_MAX 256 +#pragma GCC diagnostic push +#ifndef __clang__ +#pragma GCC diagnostic ignored "-Wsuggest-attribute=format" +#endif + DECLARE_EVENT_CLASS(qla_log_event, TP_PROTO(const char *buf, struct va_format *vaf), @@ -27,6 +32,8 @@ DECLARE_EVENT_CLASS(qla_log_event, TP_printk("%s %s", __get_str(buf), __get_str(msg)) ); +#pragma GCC diagnostic pop + DEFINE_EVENT(qla_log_event, ql_dbg_log, TP_PROTO(const char *buf, struct va_format *vaf), TP_ARGS(buf, vaf) From 8ae178760b23e2055aecf8b8a54629cec7ccc58e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2020 14:17:00 -0700 Subject: [PATCH 410/562] scsi: qla2xxx: Simplify the functions for dumping firmware Instead of passing an argument to the firmware dumping functions that tells these functions whether or not to obtain the hardware lock, obtain that lock before calling these functions. This patch fixes the following recently introduced C=2 build error: CHECK drivers/scsi/qla2xxx/qla_tmpl.c drivers/scsi/qla2xxx/qla_tmpl.c:1133:1: error: Expected ; at end of statement drivers/scsi/qla2xxx/qla_tmpl.c:1133:1: error: got } drivers/scsi/qla2xxx/qla_tmpl.h:247:0: error: Expected } at end of function drivers/scsi/qla2xxx/qla_tmpl.h:247:0: error: got end-of-input Link: https://lore.kernel.org/r/20200518211712.11395-4-bvanassche@acm.org Fixes: cbb01c2f2f63 ("scsi: qla2xxx: Fix MPI failure AEN (8200) handling") Cc: Arun Easi Cc: Nilesh Javali Cc: Himanshu Madhani Cc: Martin Wilck Cc: Roman Bolshakov Reviewed-by: Hannes Reinecke Reviewed-by: Daniel Wagner Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_bsg.c | 4 +- drivers/scsi/qla2xxx/qla_dbg.c | 153 ++++++++---------------------- drivers/scsi/qla2xxx/qla_def.h | 2 +- drivers/scsi/qla2xxx/qla_gbl.h | 19 ++-- drivers/scsi/qla2xxx/qla_isr.c | 12 +-- drivers/scsi/qla2xxx/qla_mbx.c | 6 +- drivers/scsi/qla2xxx/qla_nx.c | 2 +- drivers/scsi/qla2xxx/qla_nx2.c | 2 +- drivers/scsi/qla2xxx/qla_os.c | 2 +- drivers/scsi/qla2xxx/qla_target.c | 4 +- drivers/scsi/qla2xxx/qla_tmpl.c | 19 +--- 11 files changed, 70 insertions(+), 155 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 97b51c477972..3af7ca68ec44 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -691,7 +691,7 @@ qla81xx_set_loopback_mode(scsi_qla_host_t *vha, uint16_t *config, * dump and reset the chip. */ if (ret) { - ha->isp_ops->fw_dump(vha, 0); + qla2xxx_dump_fw(vha); set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); } rval = -EINVAL; @@ -896,7 +896,7 @@ qla2x00_process_loopback(struct bsg_job *bsg_job) * doesn't work take FCoE dump and then * reset the chip. */ - ha->isp_ops->fw_dump(vha, 0); + qla2xxx_dump_fw(vha); set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); } diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 1206f7c1ce6a..07a8c674b741 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -716,35 +716,37 @@ qla2xxx_dump_post_process(scsi_qla_host_t *vha, int rval) } } +void qla2xxx_dump_fw(scsi_qla_host_t *vha) +{ + unsigned long flags; + + spin_lock_irqsave(&vha->hw->hardware_lock, flags); + vha->hw->isp_ops->fw_dump(vha); + spin_unlock_irqrestore(&vha->hw->hardware_lock, flags); +} + /** * qla2300_fw_dump() - Dumps binary data from the 2300 firmware. * @vha: HA context - * @hardware_locked: Called with the hardware_lock */ void -qla2300_fw_dump(scsi_qla_host_t *vha, int hardware_locked) +qla2300_fw_dump(scsi_qla_host_t *vha) { int rval; uint32_t cnt; struct qla_hw_data *ha = vha->hw; struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; uint16_t __iomem *dmp_reg; - unsigned long flags; struct qla2300_fw_dump *fw; void *nxt; struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); - flags = 0; - -#ifndef __CHECKER__ - if (!hardware_locked) - spin_lock_irqsave(&ha->hardware_lock, flags); -#endif + lockdep_assert_held(&ha->hardware_lock); if (!ha->fw_dump) { ql_log(ql_log_warn, vha, 0xd002, "No buffer available for dump.\n"); - goto qla2300_fw_dump_failed; + return; } if (ha->fw_dumped) { @@ -752,7 +754,7 @@ qla2300_fw_dump(scsi_qla_host_t *vha, int hardware_locked) "Firmware has been previously dumped (%p) " "-- ignoring request.\n", ha->fw_dump); - goto qla2300_fw_dump_failed; + return; } fw = &ha->fw_dump->isp.isp23; qla2xxx_prep_dump(ha, ha->fw_dump); @@ -876,48 +878,31 @@ qla2300_fw_dump(scsi_qla_host_t *vha, int hardware_locked) qla2xxx_copy_queues(ha, nxt); qla2xxx_dump_post_process(base_vha, rval); - -qla2300_fw_dump_failed: -#ifndef __CHECKER__ - if (!hardware_locked) - spin_unlock_irqrestore(&ha->hardware_lock, flags); -#else - ; -#endif } /** * qla2100_fw_dump() - Dumps binary data from the 2100/2200 firmware. * @vha: HA context - * @hardware_locked: Called with the hardware_lock */ void -qla2100_fw_dump(scsi_qla_host_t *vha, int hardware_locked) +qla2100_fw_dump(scsi_qla_host_t *vha) { int rval; uint32_t cnt, timer; - uint16_t risc_address; - uint16_t mb0, mb2; + uint16_t risc_address = 0; + uint16_t mb0 = 0, mb2 = 0; struct qla_hw_data *ha = vha->hw; struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; uint16_t __iomem *dmp_reg; - unsigned long flags; struct qla2100_fw_dump *fw; struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); - risc_address = 0; - mb0 = mb2 = 0; - flags = 0; - -#ifndef __CHECKER__ - if (!hardware_locked) - spin_lock_irqsave(&ha->hardware_lock, flags); -#endif + lockdep_assert_held(&ha->hardware_lock); if (!ha->fw_dump) { ql_log(ql_log_warn, vha, 0xd004, "No buffer available for dump.\n"); - goto qla2100_fw_dump_failed; + return; } if (ha->fw_dumped) { @@ -925,7 +910,7 @@ qla2100_fw_dump(scsi_qla_host_t *vha, int hardware_locked) "Firmware has been previously dumped (%p) " "-- ignoring request.\n", ha->fw_dump); - goto qla2100_fw_dump_failed; + return; } fw = &ha->fw_dump->isp.isp21; qla2xxx_prep_dump(ha, ha->fw_dump); @@ -1080,18 +1065,10 @@ qla2100_fw_dump(scsi_qla_host_t *vha, int hardware_locked) qla2xxx_copy_queues(ha, &fw->risc_ram[cnt]); qla2xxx_dump_post_process(base_vha, rval); - -qla2100_fw_dump_failed: -#ifndef __CHECKER__ - if (!hardware_locked) - spin_unlock_irqrestore(&ha->hardware_lock, flags); -#else - ; -#endif } void -qla24xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) +qla24xx_fw_dump(scsi_qla_host_t *vha) { int rval; uint32_t cnt; @@ -1100,28 +1077,23 @@ qla24xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) uint32_t __iomem *dmp_reg; uint32_t *iter_reg; uint16_t __iomem *mbx_reg; - unsigned long flags; struct qla24xx_fw_dump *fw; void *nxt; void *nxt_chain; uint32_t *last_chain = NULL; struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); + lockdep_assert_held(&ha->hardware_lock); + if (IS_P3P_TYPE(ha)) return; - flags = 0; ha->fw_dump_cap_flags = 0; -#ifndef __CHECKER__ - if (!hardware_locked) - spin_lock_irqsave(&ha->hardware_lock, flags); -#endif - if (!ha->fw_dump) { ql_log(ql_log_warn, vha, 0xd006, "No buffer available for dump.\n"); - goto qla24xx_fw_dump_failed; + return; } if (ha->fw_dumped) { @@ -1129,7 +1101,7 @@ qla24xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) "Firmware has been previously dumped (%p) " "-- ignoring request.\n", ha->fw_dump); - goto qla24xx_fw_dump_failed; + return; } QLA_FW_STOPPED(ha); fw = &ha->fw_dump->isp.isp24; @@ -1339,18 +1311,10 @@ qla24xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) qla24xx_fw_dump_failed_0: qla2xxx_dump_post_process(base_vha, rval); - -qla24xx_fw_dump_failed: -#ifndef __CHECKER__ - if (!hardware_locked) - spin_unlock_irqrestore(&ha->hardware_lock, flags); -#else - ; -#endif } void -qla25xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) +qla25xx_fw_dump(scsi_qla_host_t *vha) { int rval; uint32_t cnt; @@ -1359,24 +1323,19 @@ qla25xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) uint32_t __iomem *dmp_reg; uint32_t *iter_reg; uint16_t __iomem *mbx_reg; - unsigned long flags; struct qla25xx_fw_dump *fw; void *nxt, *nxt_chain; uint32_t *last_chain = NULL; struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); - flags = 0; - ha->fw_dump_cap_flags = 0; + lockdep_assert_held(&ha->hardware_lock); -#ifndef __CHECKER__ - if (!hardware_locked) - spin_lock_irqsave(&ha->hardware_lock, flags); -#endif + ha->fw_dump_cap_flags = 0; if (!ha->fw_dump) { ql_log(ql_log_warn, vha, 0xd008, "No buffer available for dump.\n"); - goto qla25xx_fw_dump_failed; + return; } if (ha->fw_dumped) { @@ -1384,7 +1343,7 @@ qla25xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) "Firmware has been previously dumped (%p) " "-- ignoring request.\n", ha->fw_dump); - goto qla25xx_fw_dump_failed; + return; } QLA_FW_STOPPED(ha); fw = &ha->fw_dump->isp.isp25; @@ -1665,18 +1624,10 @@ qla25xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) qla25xx_fw_dump_failed_0: qla2xxx_dump_post_process(base_vha, rval); - -qla25xx_fw_dump_failed: -#ifndef __CHECKER__ - if (!hardware_locked) - spin_unlock_irqrestore(&ha->hardware_lock, flags); -#else - ; -#endif } void -qla81xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) +qla81xx_fw_dump(scsi_qla_host_t *vha) { int rval; uint32_t cnt; @@ -1685,24 +1636,19 @@ qla81xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) uint32_t __iomem *dmp_reg; uint32_t *iter_reg; uint16_t __iomem *mbx_reg; - unsigned long flags; struct qla81xx_fw_dump *fw; void *nxt, *nxt_chain; uint32_t *last_chain = NULL; struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); - flags = 0; - ha->fw_dump_cap_flags = 0; + lockdep_assert_held(&ha->hardware_lock); -#ifndef __CHECKER__ - if (!hardware_locked) - spin_lock_irqsave(&ha->hardware_lock, flags); -#endif + ha->fw_dump_cap_flags = 0; if (!ha->fw_dump) { ql_log(ql_log_warn, vha, 0xd00a, "No buffer available for dump.\n"); - goto qla81xx_fw_dump_failed; + return; } if (ha->fw_dumped) { @@ -1710,7 +1656,7 @@ qla81xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) "Firmware has been previously dumped (%p) " "-- ignoring request.\n", ha->fw_dump); - goto qla81xx_fw_dump_failed; + return; } fw = &ha->fw_dump->isp.isp81; qla2xxx_prep_dump(ha, ha->fw_dump); @@ -1993,18 +1939,10 @@ qla81xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) qla81xx_fw_dump_failed_0: qla2xxx_dump_post_process(base_vha, rval); - -qla81xx_fw_dump_failed: -#ifndef __CHECKER__ - if (!hardware_locked) - spin_unlock_irqrestore(&ha->hardware_lock, flags); -#else - ; -#endif } void -qla83xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) +qla83xx_fw_dump(scsi_qla_host_t *vha) { int rval; uint32_t cnt; @@ -2013,31 +1951,26 @@ qla83xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) uint32_t __iomem *dmp_reg; uint32_t *iter_reg; uint16_t __iomem *mbx_reg; - unsigned long flags; struct qla83xx_fw_dump *fw; void *nxt, *nxt_chain; uint32_t *last_chain = NULL; struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); - flags = 0; - ha->fw_dump_cap_flags = 0; + lockdep_assert_held(&ha->hardware_lock); -#ifndef __CHECKER__ - if (!hardware_locked) - spin_lock_irqsave(&ha->hardware_lock, flags); -#endif + ha->fw_dump_cap_flags = 0; if (!ha->fw_dump) { ql_log(ql_log_warn, vha, 0xd00c, "No buffer available for dump!!!\n"); - goto qla83xx_fw_dump_failed; + return; } if (ha->fw_dumped) { ql_log(ql_log_warn, vha, 0xd00d, "Firmware has been previously dumped (%p) -- ignoring " "request...\n", ha->fw_dump); - goto qla83xx_fw_dump_failed; + return; } QLA_FW_STOPPED(ha); fw = &ha->fw_dump->isp.isp83; @@ -2507,14 +2440,6 @@ qla83xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) qla83xx_fw_dump_failed_0: qla2xxx_dump_post_process(base_vha, rval); - -qla83xx_fw_dump_failed: -#ifndef __CHECKER__ - if (!hardware_locked) - spin_unlock_irqrestore(&ha->hardware_lock, flags); -#else - ; -#endif } /****************************************************************************/ diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 172ea4e5887d..5ca46b15ca3c 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -3222,7 +3222,7 @@ struct isp_operations { int (*write_nvram)(struct scsi_qla_host *, void *, uint32_t, uint32_t); - void (*fw_dump) (struct scsi_qla_host *, int); + void (*fw_dump)(struct scsi_qla_host *vha); void (*mpi_fw_dump)(struct scsi_qla_host *, int); int (*beacon_on) (struct scsi_qla_host *); diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index f62b71e47581..061f91b521b3 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -637,15 +637,16 @@ extern int qla24xx_read_fcp_prio_cfg(scsi_qla_host_t *); /* * Global Function Prototypes in qla_dbg.c source file. */ -extern void qla2100_fw_dump(scsi_qla_host_t *, int); -extern void qla2300_fw_dump(scsi_qla_host_t *, int); -extern void qla24xx_fw_dump(scsi_qla_host_t *, int); -extern void qla25xx_fw_dump(scsi_qla_host_t *, int); -extern void qla81xx_fw_dump(scsi_qla_host_t *, int); -extern void qla82xx_fw_dump(scsi_qla_host_t *, int); -extern void qla8044_fw_dump(scsi_qla_host_t *, int); +void qla2xxx_dump_fw(scsi_qla_host_t *vha); +void qla2100_fw_dump(scsi_qla_host_t *vha); +void qla2300_fw_dump(scsi_qla_host_t *vha); +void qla24xx_fw_dump(scsi_qla_host_t *vha); +void qla25xx_fw_dump(scsi_qla_host_t *vha); +void qla81xx_fw_dump(scsi_qla_host_t *vha); +void qla82xx_fw_dump(scsi_qla_host_t *vha); +void qla8044_fw_dump(scsi_qla_host_t *vha); -extern void qla27xx_fwdump(scsi_qla_host_t *, int); +void qla27xx_fwdump(scsi_qla_host_t *vha); extern void qla27xx_mpi_fwdump(scsi_qla_host_t *, int); extern ulong qla27xx_fwdt_calculate_dump_size(struct scsi_qla_host *, void *); extern int qla27xx_fwdt_template_valid(void *); @@ -873,7 +874,7 @@ extern int qla2x00_get_idma_speed(scsi_qla_host_t *, uint16_t, uint16_t *, uint16_t *); /* 83xx related functions */ -extern void qla83xx_fw_dump(scsi_qla_host_t *, int); +void qla83xx_fw_dump(scsi_qla_host_t *vha); /* Minidump related functions */ extern int qla82xx_md_get_template_size(scsi_qla_host_t *); diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index a9e8513e1cf1..54e1ecdc0cdb 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -220,7 +220,7 @@ qla2100_intr_handler(int irq, void *dev_id) WRT_REG_WORD(®->hccr, HCCR_RESET_RISC); RD_REG_WORD(®->hccr); - ha->isp_ops->fw_dump(vha, 1); + ha->isp_ops->fw_dump(vha); set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); break; } else if ((RD_REG_WORD(®->istatus) & ISR_RISC_INT) == 0) @@ -351,7 +351,7 @@ qla2300_intr_handler(int irq, void *dev_id) WRT_REG_WORD(®->hccr, HCCR_RESET_RISC); RD_REG_WORD(®->hccr); - ha->isp_ops->fw_dump(vha, 1); + ha->isp_ops->fw_dump(vha); set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); break; } else if ((stat & HSR_RISC_INT) == 0) @@ -777,7 +777,7 @@ qla27xx_handle_8200_aen(scsi_qla_host_t *vha, uint16_t *mb) "MPI Heartbeat stop. FW dump needed\n"); if (ql2xfulldump_on_mpifail) { - ha->isp_ops->fw_dump(vha, 1); + ha->isp_ops->fw_dump(vha); reset_isp_needed = 1; } @@ -908,7 +908,7 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb) if ((IS_QLA27XX(ha) || IS_QLA28XX(ha)) && RD_REG_WORD(®24->mailbox7) & BIT_8) ha->isp_ops->mpi_fw_dump(vha, 1); - ha->isp_ops->fw_dump(vha, 1); + ha->isp_ops->fw_dump(vha); ha->flags.fw_init_done = 0; QLA_FW_STOPPED(ha); @@ -3473,7 +3473,7 @@ qla24xx_intr_handler(int irq, void *dev_id) qla2xxx_check_risc_status(vha); - ha->isp_ops->fw_dump(vha, 1); + ha->isp_ops->fw_dump(vha); set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); break; } else if ((stat & HSRX_RISC_INT) == 0) @@ -3602,7 +3602,7 @@ qla24xx_msix_default(int irq, void *dev_id) qla2xxx_check_risc_status(vha); - ha->isp_ops->fw_dump(vha, 1); + ha->isp_ops->fw_dump(vha); set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); break; } else if ((stat & HSRX_RISC_INT) == 0) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 9fd83d1bffe0..fb3e481bfa0c 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -462,7 +462,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) * a dump */ if (mcp->mb[0] != MBC_GEN_SYSTEM_ERROR) - ha->isp_ops->fw_dump(vha, 0); + qla2xxx_dump_fw(vha); rval = QLA_FUNCTION_TIMEOUT; } } @@ -6213,7 +6213,7 @@ qla83xx_restart_nic_firmware(scsi_qla_host_t *vha) ql_dbg(ql_dbg_mbx, vha, 0x1144, "Failed=%x mb[0]=%x mb[1]=%x.\n", rval, mcp->mb[0], mcp->mb[1]); - ha->isp_ops->fw_dump(vha, 0); + qla2xxx_dump_fw(vha); } else { ql_dbg(ql_dbg_mbx, vha, 0x1145, "Done %s.\n", __func__); } @@ -6258,7 +6258,7 @@ qla83xx_access_control(scsi_qla_host_t *vha, uint16_t options, "Failed=%x mb[0]=%x mb[1]=%x mb[2]=%x mb[3]=%x mb[4]=%x.\n", rval, mcp->mb[0], mcp->mb[1], mcp->mb[2], mcp->mb[3], mcp->mb[4]); - ha->isp_ops->fw_dump(vha, 0); + qla2xxx_dump_fw(vha); } else { if (subcode & BIT_5) *sector_size = mcp->mb[1]; diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index d2037253e2d7..ec4d6675c62f 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c @@ -4514,7 +4514,7 @@ qla82xx_beacon_off(struct scsi_qla_host *vha) } void -qla82xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) +qla82xx_fw_dump(scsi_qla_host_t *vha) { struct qla_hw_data *ha = vha->hw; diff --git a/drivers/scsi/qla2xxx/qla_nx2.c b/drivers/scsi/qla2xxx/qla_nx2.c index b5c3e56edaba..df9429428316 100644 --- a/drivers/scsi/qla2xxx/qla_nx2.c +++ b/drivers/scsi/qla2xxx/qla_nx2.c @@ -4070,7 +4070,7 @@ qla8044_abort_isp(scsi_qla_host_t *vha) } void -qla8044_fw_dump(scsi_qla_host_t *vha, int hardware_locked) +qla8044_fw_dump(scsi_qla_host_t *vha) { struct qla_hw_data *ha = vha->hw; diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 382e1f977d01..8f96d37a866c 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -7575,7 +7575,7 @@ qla2xxx_pci_mmio_enabled(struct pci_dev *pdev) if (risc_paused) { ql_log(ql_log_info, base_vha, 0x9003, "RISC paused -- mmio_enabled, Dumping firmware.\n"); - ha->isp_ops->fw_dump(base_vha, 0); + qla2xxx_dump_fw(base_vha); return PCI_ERS_RESULT_NEED_RESET; } else diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index f3255aa70dcc..3af8a8a7f997 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -5670,9 +5670,9 @@ static int qlt_chk_unresolv_exchg(struct scsi_qla_host *vha, vha, 0xffff, (uint8_t *)entry, sizeof(*entry)); if (qpair == ha->base_qpair) - ha->isp_ops->fw_dump(vha, 1); + ha->isp_ops->fw_dump(vha); else - ha->isp_ops->fw_dump(vha, 0); + qla2xxx_dump_fw(vha); set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); qla2xxx_wake_dpc(vha); diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c index 281973b317a8..4a4d92046cbf 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.c +++ b/drivers/scsi/qla2xxx/qla_tmpl.c @@ -1081,14 +1081,9 @@ qla27xx_mpi_fwdump(scsi_qla_host_t *vha, int hardware_locked) } void -qla27xx_fwdump(scsi_qla_host_t *vha, int hardware_locked) +qla27xx_fwdump(scsi_qla_host_t *vha) { - ulong flags = 0; - -#ifndef __CHECKER__ - if (!hardware_locked) - spin_lock_irqsave(&vha->hw->hardware_lock, flags); -#endif + lockdep_assert_held(&vha->hw->hardware_lock); if (!vha->hw->fw_dump) { ql_log(ql_log_warn, vha, 0xd01e, "-> fwdump no buffer\n"); @@ -1105,11 +1100,11 @@ qla27xx_fwdump(scsi_qla_host_t *vha, int hardware_locked) if (!fwdt->template) { ql_log(ql_log_warn, vha, 0xd012, "-> fwdt0 no template\n"); - goto bailout; + return; } len = qla27xx_execute_fwdt_template(vha, fwdt->template, buf); if (len == 0) { - goto bailout; + return; } else if (len != fwdt->dump_size) { ql_log(ql_log_warn, vha, 0xd013, "-> fwdt0 fwdump residual=%+ld\n", @@ -1124,10 +1119,4 @@ qla27xx_fwdump(scsi_qla_host_t *vha, int hardware_locked) vha->host_no, vha->hw->fw_dump, vha->hw->fw_dump_cap_flags); qla2x00_post_uevent_work(vha, QLA_UEVENT_CODE_FW_DUMP); } - -bailout: -#ifndef __CHECKER__ - if (!hardware_locked) - spin_unlock_irqrestore(&vha->hw->hardware_lock, flags); -#endif } From 59d23cf3f2e4c100ac21f7e00e5817d5e8a3a2e2 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2020 14:17:01 -0700 Subject: [PATCH 411/562] scsi: qla2xxx: Sort BUILD_BUG_ON() statements alphabetically Before adding more BUILD_BUG_ON() statements, sort the existing statements alphabetically. Link: https://lore.kernel.org/r/20200518211712.11395-5-bvanassche@acm.org Cc: Nilesh Javali Cc: Quinn Tran Cc: Martin Wilck Cc: Roman Bolshakov Reviewed-by: Daniel Wagner Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Reviewed-by: Arun Easi Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 8f96d37a866c..bc8ae0c81980 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -7842,11 +7842,11 @@ qla2x00_module_init(void) BUILD_BUG_ON(sizeof(struct init_cb_24xx) != 128); BUILD_BUG_ON(sizeof(struct init_cb_81xx) != 128); BUILD_BUG_ON(sizeof(struct pt_ls4_request) != 64); + BUILD_BUG_ON(sizeof(struct qla_flt_header) != 8); + BUILD_BUG_ON(sizeof(struct qla_flt_region) != 16); BUILD_BUG_ON(sizeof(struct sns_cmd_pkt) != 2064); BUILD_BUG_ON(sizeof(struct verify_chip_entry_84xx) != 64); BUILD_BUG_ON(sizeof(struct vf_evfp_entry_24xx) != 56); - BUILD_BUG_ON(sizeof(struct qla_flt_region) != 16); - BUILD_BUG_ON(sizeof(struct qla_flt_header) != 8); /* Allocate cache for SRBs. */ srb_cachep = kmem_cache_create("qla2xxx_srbs", sizeof(srb_t), 0, From 8a73a0e002b318d8ad79fe0e6b48e27ca30e527a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2020 14:17:02 -0700 Subject: [PATCH 412/562] scsi: qla2xxx: Add more BUILD_BUG_ON() statements Before fixing the endianness annotations in data structures, make the compiler verify the size of FC protocol and firmware data structures. Link: https://lore.kernel.org/r/20200518211712.11395-6-bvanassche@acm.org Cc: Nilesh Javali Cc: Quinn Tran Cc: Martin Wilck Cc: Roman Bolshakov Reviewed-by: Daniel Wagner Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 58 ++++++++++++++++++++++++++++++ drivers/scsi/qla2xxx/tcm_qla2xxx.c | 14 ++++++++ 2 files changed, 72 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index bc8ae0c81980..5199169c4ce0 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -7822,13 +7822,19 @@ qla2x00_module_init(void) { int ret = 0; + BUILD_BUG_ON(sizeof(cmd_a64_entry_t) != 64); BUILD_BUG_ON(sizeof(cmd_entry_t) != 64); BUILD_BUG_ON(sizeof(cont_a64_entry_t) != 64); BUILD_BUG_ON(sizeof(cont_entry_t) != 64); BUILD_BUG_ON(sizeof(init_cb_t) != 96); + BUILD_BUG_ON(sizeof(mrk_entry_t) != 64); BUILD_BUG_ON(sizeof(ms_iocb_entry_t) != 64); BUILD_BUG_ON(sizeof(request_t) != 64); + BUILD_BUG_ON(sizeof(struct abort_entry_24xx) != 64); + BUILD_BUG_ON(sizeof(struct abort_iocb_entry_fx00) != 64); + BUILD_BUG_ON(sizeof(struct abts_entry_24xx) != 64); BUILD_BUG_ON(sizeof(struct access_chip_84xx) != 64); + BUILD_BUG_ON(sizeof(struct access_chip_rsp_84xx) != 64); BUILD_BUG_ON(sizeof(struct cmd_bidir) != 64); BUILD_BUG_ON(sizeof(struct cmd_nvme) != 64); BUILD_BUG_ON(sizeof(struct cmd_type_6) != 64); @@ -7836,17 +7842,69 @@ qla2x00_module_init(void) BUILD_BUG_ON(sizeof(struct cmd_type_7_fx00) != 64); BUILD_BUG_ON(sizeof(struct cmd_type_crc_2) != 64); BUILD_BUG_ON(sizeof(struct ct_entry_24xx) != 64); + BUILD_BUG_ON(sizeof(struct ct_fdmi1_hba_attributes) != 2344); + BUILD_BUG_ON(sizeof(struct ct_fdmi2_hba_attributes) != 4424); + BUILD_BUG_ON(sizeof(struct ct_fdmi2_port_attributes) != 4164); + BUILD_BUG_ON(sizeof(struct ct_fdmi_hba_attr) != 260); + BUILD_BUG_ON(sizeof(struct ct_fdmi_port_attr) != 260); + BUILD_BUG_ON(sizeof(struct ct_rsp_hdr) != 16); BUILD_BUG_ON(sizeof(struct ctio_crc2_to_fw) != 64); + BUILD_BUG_ON(sizeof(struct device_reg_24xx) != 256); + BUILD_BUG_ON(sizeof(struct device_reg_25xxmq) != 24); + BUILD_BUG_ON(sizeof(struct device_reg_2xxx) != 256); + BUILD_BUG_ON(sizeof(struct device_reg_82xx) != 1288); + BUILD_BUG_ON(sizeof(struct device_reg_fx00) != 216); BUILD_BUG_ON(sizeof(struct els_entry_24xx) != 64); + BUILD_BUG_ON(sizeof(struct els_sts_entry_24xx) != 64); BUILD_BUG_ON(sizeof(struct fxdisc_entry_fx00) != 64); + BUILD_BUG_ON(sizeof(struct imm_ntfy_from_isp) != 64); BUILD_BUG_ON(sizeof(struct init_cb_24xx) != 128); BUILD_BUG_ON(sizeof(struct init_cb_81xx) != 128); + BUILD_BUG_ON(sizeof(struct logio_entry_24xx) != 64); + BUILD_BUG_ON(sizeof(struct mbx_entry) != 64); + BUILD_BUG_ON(sizeof(struct mid_init_cb_24xx) != 5252); + BUILD_BUG_ON(sizeof(struct mrk_entry_24xx) != 64); + BUILD_BUG_ON(sizeof(struct nvram_24xx) != 512); + BUILD_BUG_ON(sizeof(struct nvram_81xx) != 512); BUILD_BUG_ON(sizeof(struct pt_ls4_request) != 64); + BUILD_BUG_ON(sizeof(struct pt_ls4_rx_unsol) != 64); + BUILD_BUG_ON(sizeof(struct purex_entry_24xx) != 64); + BUILD_BUG_ON(sizeof(struct qla2100_fw_dump) != 123634); + BUILD_BUG_ON(sizeof(struct qla2300_fw_dump) != 136100); + BUILD_BUG_ON(sizeof(struct qla24xx_fw_dump) != 37976); + BUILD_BUG_ON(sizeof(struct qla25xx_fw_dump) != 39228); + BUILD_BUG_ON(sizeof(struct qla2xxx_fce_chain) != 52); + BUILD_BUG_ON(sizeof(struct qla2xxx_fw_dump) != 136172); + BUILD_BUG_ON(sizeof(struct qla2xxx_mq_chain) != 524); + BUILD_BUG_ON(sizeof(struct qla2xxx_mqueue_chain) != 8); + BUILD_BUG_ON(sizeof(struct qla2xxx_mqueue_header) != 12); + BUILD_BUG_ON(sizeof(struct qla2xxx_offld_chain) != 24); + BUILD_BUG_ON(sizeof(struct qla81xx_fw_dump) != 39420); + BUILD_BUG_ON(sizeof(struct qla82xx_uri_data_desc) != 28); + BUILD_BUG_ON(sizeof(struct qla82xx_uri_table_desc) != 32); + BUILD_BUG_ON(sizeof(struct qla83xx_fw_dump) != 51196); + BUILD_BUG_ON(sizeof(struct qla_fdt_layout) != 128); BUILD_BUG_ON(sizeof(struct qla_flt_header) != 8); BUILD_BUG_ON(sizeof(struct qla_flt_region) != 16); + BUILD_BUG_ON(sizeof(struct qla_npiv_entry) != 24); + BUILD_BUG_ON(sizeof(struct qla_npiv_header) != 16); + BUILD_BUG_ON(sizeof(struct rdp_rsp_payload) != 336); BUILD_BUG_ON(sizeof(struct sns_cmd_pkt) != 2064); + BUILD_BUG_ON(sizeof(struct sts_entry_24xx) != 64); + BUILD_BUG_ON(sizeof(struct tsk_mgmt_entry) != 64); + BUILD_BUG_ON(sizeof(struct tsk_mgmt_entry_fx00) != 64); BUILD_BUG_ON(sizeof(struct verify_chip_entry_84xx) != 64); + BUILD_BUG_ON(sizeof(struct verify_chip_rsp_84xx) != 52); BUILD_BUG_ON(sizeof(struct vf_evfp_entry_24xx) != 56); + BUILD_BUG_ON(sizeof(struct vp_config_entry_24xx) != 64); + BUILD_BUG_ON(sizeof(struct vp_ctrl_entry_24xx) != 64); + BUILD_BUG_ON(sizeof(struct vp_rpt_id_entry_24xx) != 64); + BUILD_BUG_ON(sizeof(sts21_entry_t) != 64); + BUILD_BUG_ON(sizeof(sts22_entry_t) != 64); + BUILD_BUG_ON(sizeof(sts_cont_entry_t) != 64); + BUILD_BUG_ON(sizeof(sts_entry_t) != 64); + BUILD_BUG_ON(sizeof(sw_info_t) != 32); + BUILD_BUG_ON(sizeof(target_id_t) != 2); /* Allocate cache for SRBs. */ srb_cachep = kmem_cache_create("qla2xxx_srbs", sizeof(srb_t), 0, diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index bf00ae16b487..68183a96a417 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -1960,6 +1960,20 @@ static int __init tcm_qla2xxx_init(void) { int ret; + BUILD_BUG_ON(sizeof(struct abts_recv_from_24xx) != 64); + BUILD_BUG_ON(sizeof(struct abts_resp_from_24xx_fw) != 64); + BUILD_BUG_ON(sizeof(struct atio7_fcp_cmnd) != 32); + BUILD_BUG_ON(sizeof(struct atio_from_isp) != 64); + BUILD_BUG_ON(sizeof(struct ba_acc_le) != 12); + BUILD_BUG_ON(sizeof(struct ba_rjt_le) != 4); + BUILD_BUG_ON(sizeof(struct ctio7_from_24xx) != 64); + BUILD_BUG_ON(sizeof(struct ctio7_to_24xx) != 64); + BUILD_BUG_ON(sizeof(struct ctio_crc2_to_fw) != 64); + BUILD_BUG_ON(sizeof(struct ctio_crc_from_fw) != 64); + BUILD_BUG_ON(sizeof(struct ctio_to_2xxx) != 64); + BUILD_BUG_ON(sizeof(struct fcp_hdr_le) != 24); + BUILD_BUG_ON(sizeof(struct nack_to_isp) != 64); + ret = tcm_qla2xxx_register_configfs(); if (ret < 0) return ret; From 66f86367771507c88a970644d13646f6b57e82e5 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2020 14:17:03 -0700 Subject: [PATCH 413/562] scsi: qla2xxx: Make a gap in struct qla2xxx_offld_chain explicit This patch makes struct qla2xxx_offld_chain compatible with ARCH=i386. Link: https://lore.kernel.org/r/20200518211712.11395-7-bvanassche@acm.org Cc: Nilesh Javali Cc: Quinn Tran Cc: Martin Wilck Cc: Roman Bolshakov Reviewed-by: Daniel Wagner Reviewed-by: Hannes Reinecke Reviewed-by: Arun Easi Reviewed-by: Himanshu Madhani Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_dbg.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h index 433e95502808..b106b6808d34 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.h +++ b/drivers/scsi/qla2xxx/qla_dbg.h @@ -238,6 +238,7 @@ struct qla2xxx_offld_chain { uint32_t chain_size; uint32_t size; + uint32_t reserved; u64 addr; }; From d9ab5f1f05fc147682d4fd1f1f1c5af00b766e33 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2020 14:17:04 -0700 Subject: [PATCH 414/562] scsi: qla2xxx: Increase the size of struct qla_fcp_prio_cfg to FCP_PRIO_CFG_SIZE This patch fixes the following Coverity complaint without changing any functionality: CID 337793 (#1 of 1): Wrong size argument (SIZEOF_MISMATCH) suspicious_sizeof: Passing argument ha->fcp_prio_cfg of type struct qla_fcp_prio_cfg * and argument 32768UL to function memset is suspicious because a multiple of sizeof (struct qla_fcp_prio_cfg) /*48*/ is expected. memset(ha->fcp_prio_cfg, 0, FCP_PRIO_CFG_SIZE); Link: https://lore.kernel.org/r/20200518211712.11395-8-bvanassche@acm.org Cc: Nilesh Javali Cc: Quinn Tran Cc: Martin Wilck Cc: Roman Bolshakov Reviewed-by: Daniel Wagner Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_fw.h | 3 ++- drivers/scsi/qla2xxx/qla_os.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h index b364a497e33d..4fa34374f34f 100644 --- a/drivers/scsi/qla2xxx/qla_fw.h +++ b/drivers/scsi/qla2xxx/qla_fw.h @@ -2217,8 +2217,9 @@ struct qla_fcp_prio_cfg { #define FCP_PRIO_ATTR_PERSIST 0x2 uint8_t reserved; /* Reserved for future use */ #define FCP_PRIO_CFG_HDR_SIZE 0x10 - struct qla_fcp_prio_entry entry[1]; /* fcp priority entries */ + struct qla_fcp_prio_entry entry[1023]; /* fcp priority entries */ #define FCP_PRIO_CFG_ENTRY_SIZE 0x20 + uint8_t reserved2[16]; }; #define FCP_PRIO_CFG_SIZE (32*1024) /* fcp prio data per port*/ diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 5199169c4ce0..743c0df18fa0 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -7883,6 +7883,7 @@ qla2x00_module_init(void) BUILD_BUG_ON(sizeof(struct qla82xx_uri_data_desc) != 28); BUILD_BUG_ON(sizeof(struct qla82xx_uri_table_desc) != 32); BUILD_BUG_ON(sizeof(struct qla83xx_fw_dump) != 51196); + BUILD_BUG_ON(sizeof(struct qla_fcp_prio_cfg) != FCP_PRIO_CFG_SIZE); BUILD_BUG_ON(sizeof(struct qla_fdt_layout) != 128); BUILD_BUG_ON(sizeof(struct qla_flt_header) != 8); BUILD_BUG_ON(sizeof(struct qla_flt_region) != 16); From e544b720ef3191cdb6d3ea2915f82973d6372bca Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2020 14:17:05 -0700 Subject: [PATCH 415/562] scsi: qla2xxx: Change two hardcoded constants into offsetof() / sizeof() expressions This patch does not change any functionality. Link: https://lore.kernel.org/r/20200518211712.11395-9-bvanassche@acm.org Cc: Nilesh Javali Cc: Quinn Tran Cc: Martin Wilck Cc: Roman Bolshakov Reviewed-by: Daniel Wagner Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Reviewed-by: Arun Easi Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_fw.h | 3 +-- drivers/scsi/qla2xxx/qla_sup.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h index 4fa34374f34f..f18d2d00d28c 100644 --- a/drivers/scsi/qla2xxx/qla_fw.h +++ b/drivers/scsi/qla2xxx/qla_fw.h @@ -2216,9 +2216,8 @@ struct qla_fcp_prio_cfg { #define FCP_PRIO_ATTR_ENABLE 0x1 #define FCP_PRIO_ATTR_PERSIST 0x2 uint8_t reserved; /* Reserved for future use */ -#define FCP_PRIO_CFG_HDR_SIZE 0x10 +#define FCP_PRIO_CFG_HDR_SIZE offsetof(struct qla_fcp_prio_cfg, entry) struct qla_fcp_prio_entry entry[1023]; /* fcp priority entries */ -#define FCP_PRIO_CFG_ENTRY_SIZE 0x20 uint8_t reserved2[16]; }; diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c index 3da79ee1d88e..57ffbf9d7dbf 100644 --- a/drivers/scsi/qla2xxx/qla_sup.c +++ b/drivers/scsi/qla2xxx/qla_sup.c @@ -3617,7 +3617,7 @@ qla24xx_read_fcp_prio_cfg(scsi_qla_host_t *vha) /* read remaining FCP CMD config data from flash */ fcp_prio_addr += (FCP_PRIO_CFG_HDR_SIZE >> 2); - len = ha->fcp_prio_cfg->num_entries * FCP_PRIO_CFG_ENTRY_SIZE; + len = ha->fcp_prio_cfg->num_entries * sizeof(struct qla_fcp_prio_entry); max_len = FCP_PRIO_CFG_SIZE - FCP_PRIO_CFG_HDR_SIZE; ha->isp_ops->read_optrom(vha, &ha->fcp_prio_cfg->entry[0], From c3888416221849ed46fd35413c7a1d00ee291cbe Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2020 14:17:06 -0700 Subject: [PATCH 416/562] scsi: qla2xxx: Use register names instead of register offsets Make qla27xx_write_remote_reg() easier to read by using register names instead of register offsets. The 'pahole' tool has been used to convert register offsets into register names. See also commit cbb01c2f2f63 ("scsi: qla2xxx: Fix MPI failure AEN (8200) handling"). Link: https://lore.kernel.org/r/20200518211712.11395-10-bvanassche@acm.org Cc: Arun Easi Cc: Nilesh Javali Cc: Martin Wilck Cc: Roman Bolshakov Reviewed-by: Hannes Reinecke Reviewed-by: Daniel Wagner Reviewed-by: Himanshu Madhani Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_tmpl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c index 4a4d92046cbf..645496091186 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.c +++ b/drivers/scsi/qla2xxx/qla_tmpl.c @@ -17,14 +17,14 @@ static void qla27xx_write_remote_reg(struct scsi_qla_host *vha, u32 addr, u32 data) { - char *reg = (char *)ISPREG(vha); + struct device_reg_24xx __iomem *reg = &vha->hw->iobase->isp24; ql_dbg(ql_dbg_misc, vha, 0xd300, "%s: addr/data = %xh/%xh\n", __func__, addr, data); - WRT_REG_DWORD(reg + IOBASE(vha), 0x40); - WRT_REG_DWORD(reg + 0xc4, data); - WRT_REG_DWORD(reg + 0xc0, addr); + WRT_REG_DWORD(®->iobase_addr, 0x40); + WRT_REG_DWORD(®->iobase_c4, data); + WRT_REG_DWORD(®->iobase_window, addr); } void From 37139da1b097e06841d40a6055db64c78755aea9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2020 14:17:07 -0700 Subject: [PATCH 417/562] scsi: qla2xxx: Fix the code that reads from mailbox registers Make the MMIO accessors strongly typed such that the compiler checks whether the accessor function is used that matches the register width. Fix those MMIO accesses where another number of bits was read or written than the size of the register. Link: https://lore.kernel.org/r/20200518211712.11395-11-bvanassche@acm.org Cc: Nilesh Javali Cc: Quinn Tran Cc: Martin Wilck Cc: Roman Bolshakov Reviewed-by: Daniel Wagner Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_def.h | 53 +++++++++++++++++++++++++++------ drivers/scsi/qla2xxx/qla_init.c | 6 ++-- drivers/scsi/qla2xxx/qla_iocb.c | 2 +- drivers/scsi/qla2xxx/qla_isr.c | 4 +-- drivers/scsi/qla2xxx/qla_mbx.c | 2 +- drivers/scsi/qla2xxx/qla_mr.c | 26 ++++++++-------- drivers/scsi/qla2xxx/qla_nx.c | 4 +-- drivers/scsi/qla2xxx/qla_os.c | 2 +- 8 files changed, 67 insertions(+), 32 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 5ca46b15ca3c..4b02b48af85d 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -128,15 +128,50 @@ static inline uint32_t make_handle(uint16_t x, uint16_t y) * I/O register */ -#define RD_REG_BYTE(addr) readb(addr) -#define RD_REG_WORD(addr) readw(addr) -#define RD_REG_DWORD(addr) readl(addr) -#define RD_REG_BYTE_RELAXED(addr) readb_relaxed(addr) -#define RD_REG_WORD_RELAXED(addr) readw_relaxed(addr) -#define RD_REG_DWORD_RELAXED(addr) readl_relaxed(addr) -#define WRT_REG_BYTE(addr, data) writeb(data, addr) -#define WRT_REG_WORD(addr, data) writew(data, addr) -#define WRT_REG_DWORD(addr, data) writel(data, addr) +static inline u8 RD_REG_BYTE(const volatile u8 __iomem *addr) +{ + return readb(addr); +} + +static inline u16 RD_REG_WORD(const volatile __le16 __iomem *addr) +{ + return readw(addr); +} + +static inline u32 RD_REG_DWORD(const volatile __le32 __iomem *addr) +{ + return readl(addr); +} + +static inline u8 RD_REG_BYTE_RELAXED(const volatile u8 __iomem *addr) +{ + return readb_relaxed(addr); +} + +static inline u16 RD_REG_WORD_RELAXED(const volatile __le16 __iomem *addr) +{ + return readw_relaxed(addr); +} + +static inline u32 RD_REG_DWORD_RELAXED(const volatile __le32 __iomem *addr) +{ + return readl_relaxed(addr); +} + +static inline void WRT_REG_BYTE(volatile u8 __iomem *addr, u8 data) +{ + return writeb(data, addr); +} + +static inline void WRT_REG_WORD(volatile __le16 __iomem *addr, u16 data) +{ + return writew(data, addr); +} + +static inline void WRT_REG_DWORD(volatile __le32 __iomem *addr, u32 data) +{ + return writel(data, addr); +} /* * ISP83XX specific remote register addresses diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index f8fe0334571f..a1018f5f53de 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -2219,7 +2219,7 @@ qla2x00_initialize_adapter(scsi_qla_host_t *vha) /* Check for secure flash support */ if (IS_QLA28XX(ha)) { - if (RD_REG_DWORD(®->mailbox12) & BIT_0) + if (RD_REG_WORD(®->mailbox12) & BIT_0) ha->flags.secure_adapter = 1; ql_log(ql_log_info, vha, 0xffff, "Secure Adapter: %s\n", (ha->flags.secure_adapter) ? "Yes" : "No"); @@ -2780,7 +2780,7 @@ qla24xx_reset_risc(scsi_qla_host_t *vha) ql_dbg(ql_dbg_init + ql_dbg_verbose, vha, 0x017f, "HCCR: 0x%x, MailBox0 Status 0x%x\n", RD_REG_DWORD(®->hccr), - RD_REG_DWORD(®->mailbox0)); + RD_REG_WORD(®->mailbox0)); /* Wait for soft-reset to complete. */ RD_REG_DWORD(®->ctrl_status); @@ -4098,7 +4098,7 @@ qla24xx_config_rings(struct scsi_qla_host *vha) } /* PCI posting */ - RD_REG_DWORD(&ioreg->hccr); + RD_REG_WORD(&ioreg->hccr); } /** diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 182bd68c79ac..4d8039fc02e7 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2268,7 +2268,7 @@ __qla2x00_alloc_iocbs(struct qla_qpair *qpair, srb_t *sp) IS_QLA28XX(ha)) cnt = RD_REG_DWORD(®->isp25mq.req_q_out); else if (IS_P3P_TYPE(ha)) - cnt = RD_REG_DWORD(®->isp82.req_q_out); + cnt = RD_REG_DWORD(reg->isp82.req_q_out); else if (IS_FWI2_CAPABLE(ha)) cnt = RD_REG_DWORD(®->isp24.req_q_out); else if (IS_QLAFX00(ha)) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 54e1ecdc0cdb..e7d94ac7e073 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -452,7 +452,7 @@ qla81xx_idc_event(scsi_qla_host_t *vha, uint16_t aen, uint16_t descr) int rval; struct device_reg_24xx __iomem *reg24 = &vha->hw->iobase->isp24; struct device_reg_82xx __iomem *reg82 = &vha->hw->iobase->isp82; - uint16_t __iomem *wptr; + __le16 __iomem *wptr; uint16_t cnt, timeout, mb[QLA_IDC_ACK_REGS]; /* Seed data -- mailbox1 -> mailbox7. */ @@ -3164,7 +3164,7 @@ qla24xx_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0) { uint16_t cnt; uint32_t mboxes; - uint16_t __iomem *wptr; + __le16 __iomem *wptr; struct qla_hw_data *ha = vha->hw; struct device_reg_24xx __iomem *reg = &ha->iobase->isp24; diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index fb3e481bfa0c..357fc5aaecd8 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -106,7 +106,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) uint8_t io_lock_on; uint16_t command = 0; uint16_t *iptr; - uint16_t __iomem *optr; + __le16 __iomem *optr; uint32_t cnt; uint32_t mboxes; unsigned long wait_time; diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c index ce98189c7872..0e15bce82fc1 100644 --- a/drivers/scsi/qla2xxx/qla_mr.c +++ b/drivers/scsi/qla2xxx/qla_mr.c @@ -46,7 +46,7 @@ qlafx00_mailbox_command(scsi_qla_host_t *vha, struct mbx_cmd_32 *mcp) uint8_t io_lock_on; uint16_t command = 0; uint32_t *iptr; - uint32_t __iomem *optr; + __le32 __iomem *optr; uint32_t cnt; uint32_t mboxes; unsigned long wait_time; @@ -109,7 +109,7 @@ qlafx00_mailbox_command(scsi_qla_host_t *vha, struct mbx_cmd_32 *mcp) spin_lock_irqsave(&ha->hardware_lock, flags); /* Load mailbox registers. */ - optr = (uint32_t __iomem *)®->ispfx00.mailbox0; + optr = ®->ispfx00.mailbox0; iptr = mcp->mb; command = mcp->mb[0]; @@ -2843,13 +2843,13 @@ qlafx00_async_event(scsi_qla_host_t *vha) break; default: - ha->aenmb[1] = RD_REG_WORD(®->aenmailbox1); - ha->aenmb[2] = RD_REG_WORD(®->aenmailbox2); - ha->aenmb[3] = RD_REG_WORD(®->aenmailbox3); - ha->aenmb[4] = RD_REG_WORD(®->aenmailbox4); - ha->aenmb[5] = RD_REG_WORD(®->aenmailbox5); - ha->aenmb[6] = RD_REG_WORD(®->aenmailbox6); - ha->aenmb[7] = RD_REG_WORD(®->aenmailbox7); + ha->aenmb[1] = RD_REG_DWORD(®->aenmailbox1); + ha->aenmb[2] = RD_REG_DWORD(®->aenmailbox2); + ha->aenmb[3] = RD_REG_DWORD(®->aenmailbox3); + ha->aenmb[4] = RD_REG_DWORD(®->aenmailbox4); + ha->aenmb[5] = RD_REG_DWORD(®->aenmailbox5); + ha->aenmb[6] = RD_REG_DWORD(®->aenmailbox6); + ha->aenmb[7] = RD_REG_DWORD(®->aenmailbox7); ql_dbg(ql_dbg_async, vha, 0x5078, "AEN:%04x %04x %04x %04x :%04x %04x %04x %04x\n", ha->aenmb[0], ha->aenmb[1], ha->aenmb[2], ha->aenmb[3], @@ -2869,7 +2869,7 @@ static void qlafx00_mbx_completion(scsi_qla_host_t *vha, uint32_t mb0) { uint16_t cnt; - uint32_t __iomem *wptr; + __le32 __iomem *wptr; struct qla_hw_data *ha = vha->hw; struct device_reg_fx00 __iomem *reg = &ha->iobase->ispfx00; @@ -2879,7 +2879,7 @@ qlafx00_mbx_completion(scsi_qla_host_t *vha, uint32_t mb0) /* Load return mailbox registers. */ ha->flags.mbox_int = 1; ha->mailbox_out32[0] = mb0; - wptr = (uint32_t __iomem *)®->mailbox17; + wptr = ®->mailbox17; for (cnt = 1; cnt < ha->mbx_count; cnt++) { ha->mailbox_out32[cnt] = RD_REG_DWORD(wptr); @@ -2936,13 +2936,13 @@ qlafx00_intr_handler(int irq, void *dev_id) break; if (stat & QLAFX00_INTR_MB_CMPLT) { - mb[0] = RD_REG_WORD(®->mailbox16); + mb[0] = RD_REG_DWORD(®->mailbox16); qlafx00_mbx_completion(vha, mb[0]); status |= MBX_INTERRUPT; clr_intr |= QLAFX00_INTR_MB_CMPLT; } if (intr_stat & QLAFX00_INTR_ASYNC_CMPLT) { - ha->aenmb[0] = RD_REG_WORD(®->aenmailbox0); + ha->aenmb[0] = RD_REG_DWORD(®->aenmailbox0); qlafx00_async_event(vha); clr_intr |= QLAFX00_INTR_ASYNC_CMPLT; } diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index ec4d6675c62f..9cc6ad62265c 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c @@ -1996,11 +1996,11 @@ void qla82xx_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0) { uint16_t cnt; - uint16_t __iomem *wptr; + __le16 __iomem *wptr; struct qla_hw_data *ha = vha->hw; struct device_reg_82xx __iomem *reg = &ha->iobase->isp82; - wptr = (uint16_t __iomem *)®->mailbox_out[1]; + wptr = ®->mailbox_out[1]; /* Load return mailbox registers. */ ha->flags.mbox_int = 1; diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 743c0df18fa0..017f4e0f1b58 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -7558,7 +7558,7 @@ qla2xxx_pci_mmio_enabled(struct pci_dev *pdev) spin_lock_irqsave(&ha->hardware_lock, flags); if (IS_QLA2100(ha) || IS_QLA2200(ha)){ - stat = RD_REG_DWORD(®->hccr); + stat = RD_REG_WORD(®->hccr); if (stat & HCCR_RISC_PAUSE) risc_paused = 1; } else if (IS_QLA23XX(ha)) { From 04474d3a1c968119e7214c312b273dee01258cad Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2020 14:17:08 -0700 Subject: [PATCH 418/562] scsi: qla2xxx: Change {RD,WRT}_REG_*() function names from upper case into lower case This was suggested by Daniel Wagner. Link: https://lore.kernel.org/r/20200518211712.11395-12-bvanassche@acm.org Cc: Nilesh Javali Cc: Quinn Tran Cc: Martin Wilck Cc: Roman Bolshakov Reviewed-by: Daniel Wagner Reviewed-by: Himanshu Madhani Reviewed-by: Arun Easi Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_dbg.c | 582 +++++++++++++++--------------- drivers/scsi/qla2xxx/qla_def.h | 26 +- drivers/scsi/qla2xxx/qla_init.c | 205 ++++++----- drivers/scsi/qla2xxx/qla_inline.h | 6 +- drivers/scsi/qla2xxx/qla_iocb.c | 64 ++-- drivers/scsi/qla2xxx/qla_isr.c | 128 +++---- drivers/scsi/qla2xxx/qla_mbx.c | 74 ++-- drivers/scsi/qla2xxx/qla_mr.c | 94 ++--- drivers/scsi/qla2xxx/qla_mr.h | 24 +- drivers/scsi/qla2xxx/qla_nvme.c | 4 +- drivers/scsi/qla2xxx/qla_nx.c | 68 ++-- drivers/scsi/qla2xxx/qla_nx2.c | 12 +- drivers/scsi/qla2xxx/qla_os.c | 26 +- drivers/scsi/qla2xxx/qla_sup.c | 244 ++++++------- drivers/scsi/qla2xxx/qla_target.c | 10 +- drivers/scsi/qla2xxx/qla_tmpl.c | 14 +- 16 files changed, 790 insertions(+), 791 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 07a8c674b741..fbd8cb5647b6 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -126,26 +126,26 @@ qla27xx_dump_mpi_ram(struct qla_hw_data *ha, uint32_t addr, uint32_t *ram, if (i + dwords > ram_dwords) dwords = ram_dwords - i; - WRT_REG_WORD(®->mailbox0, MBC_LOAD_DUMP_MPI_RAM); - WRT_REG_WORD(®->mailbox1, LSW(addr)); - WRT_REG_WORD(®->mailbox8, MSW(addr)); + wrt_reg_word(®->mailbox0, MBC_LOAD_DUMP_MPI_RAM); + wrt_reg_word(®->mailbox1, LSW(addr)); + wrt_reg_word(®->mailbox8, MSW(addr)); - WRT_REG_WORD(®->mailbox2, MSW(LSD(dump_dma))); - WRT_REG_WORD(®->mailbox3, LSW(LSD(dump_dma))); - WRT_REG_WORD(®->mailbox6, MSW(MSD(dump_dma))); - WRT_REG_WORD(®->mailbox7, LSW(MSD(dump_dma))); + wrt_reg_word(®->mailbox2, MSW(LSD(dump_dma))); + wrt_reg_word(®->mailbox3, LSW(LSD(dump_dma))); + wrt_reg_word(®->mailbox6, MSW(MSD(dump_dma))); + wrt_reg_word(®->mailbox7, LSW(MSD(dump_dma))); - WRT_REG_WORD(®->mailbox4, MSW(dwords)); - WRT_REG_WORD(®->mailbox5, LSW(dwords)); + wrt_reg_word(®->mailbox4, MSW(dwords)); + wrt_reg_word(®->mailbox5, LSW(dwords)); - WRT_REG_WORD(®->mailbox9, 0); - WRT_REG_DWORD(®->hccr, HCCRX_SET_HOST_INT); + wrt_reg_word(®->mailbox9, 0); + wrt_reg_dword(®->hccr, HCCRX_SET_HOST_INT); ha->flags.mbox_int = 0; while (timer--) { udelay(5); - stat = RD_REG_DWORD(®->host_status); + stat = rd_reg_dword(®->host_status); /* Check for pending interrupts. */ if (!(stat & HSRX_RISC_INT)) continue; @@ -155,15 +155,15 @@ qla27xx_dump_mpi_ram(struct qla_hw_data *ha, uint32_t addr, uint32_t *ram, stat != 0x10 && stat != 0x11) { /* Clear this intr; it wasn't a mailbox intr */ - WRT_REG_DWORD(®->hccr, HCCRX_CLR_RISC_INT); - RD_REG_DWORD(®->hccr); + wrt_reg_dword(®->hccr, HCCRX_CLR_RISC_INT); + rd_reg_dword(®->hccr); continue; } set_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags); - rval = RD_REG_WORD(®->mailbox0) & MBS_MASK; - WRT_REG_DWORD(®->hccr, HCCRX_CLR_RISC_INT); - RD_REG_DWORD(®->hccr); + rval = rd_reg_word(®->mailbox0) & MBS_MASK; + wrt_reg_dword(®->hccr, HCCRX_CLR_RISC_INT); + rd_reg_dword(®->hccr); break; } ha->flags.mbox_int = 1; @@ -206,23 +206,23 @@ qla24xx_dump_ram(struct qla_hw_data *ha, uint32_t addr, uint32_t *ram, if (i + dwords > ram_dwords) dwords = ram_dwords - i; - WRT_REG_WORD(®->mailbox0, MBC_DUMP_RISC_RAM_EXTENDED); - WRT_REG_WORD(®->mailbox1, LSW(addr)); - WRT_REG_WORD(®->mailbox8, MSW(addr)); + wrt_reg_word(®->mailbox0, MBC_DUMP_RISC_RAM_EXTENDED); + wrt_reg_word(®->mailbox1, LSW(addr)); + wrt_reg_word(®->mailbox8, MSW(addr)); - WRT_REG_WORD(®->mailbox2, MSW(LSD(dump_dma))); - WRT_REG_WORD(®->mailbox3, LSW(LSD(dump_dma))); - WRT_REG_WORD(®->mailbox6, MSW(MSD(dump_dma))); - WRT_REG_WORD(®->mailbox7, LSW(MSD(dump_dma))); + wrt_reg_word(®->mailbox2, MSW(LSD(dump_dma))); + wrt_reg_word(®->mailbox3, LSW(LSD(dump_dma))); + wrt_reg_word(®->mailbox6, MSW(MSD(dump_dma))); + wrt_reg_word(®->mailbox7, LSW(MSD(dump_dma))); - WRT_REG_WORD(®->mailbox4, MSW(dwords)); - WRT_REG_WORD(®->mailbox5, LSW(dwords)); - WRT_REG_DWORD(®->hccr, HCCRX_SET_HOST_INT); + wrt_reg_word(®->mailbox4, MSW(dwords)); + wrt_reg_word(®->mailbox5, LSW(dwords)); + wrt_reg_dword(®->hccr, HCCRX_SET_HOST_INT); ha->flags.mbox_int = 0; while (timer--) { udelay(5); - stat = RD_REG_DWORD(®->host_status); + stat = rd_reg_dword(®->host_status); /* Check for pending interrupts. */ if (!(stat & HSRX_RISC_INT)) @@ -231,15 +231,15 @@ qla24xx_dump_ram(struct qla_hw_data *ha, uint32_t addr, uint32_t *ram, stat &= 0xff; if (stat != 0x1 && stat != 0x2 && stat != 0x10 && stat != 0x11) { - WRT_REG_DWORD(®->hccr, HCCRX_CLR_RISC_INT); - RD_REG_DWORD(®->hccr); + wrt_reg_dword(®->hccr, HCCRX_CLR_RISC_INT); + rd_reg_dword(®->hccr); continue; } set_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags); - rval = RD_REG_WORD(®->mailbox0) & MBS_MASK; - WRT_REG_DWORD(®->hccr, HCCRX_CLR_RISC_INT); - RD_REG_DWORD(®->hccr); + rval = rd_reg_word(®->mailbox0) & MBS_MASK; + wrt_reg_dword(®->hccr, HCCRX_CLR_RISC_INT); + rd_reg_dword(®->hccr); break; } ha->flags.mbox_int = 1; @@ -292,10 +292,10 @@ qla24xx_read_window(struct device_reg_24xx __iomem *reg, uint32_t iobase, { uint32_t __iomem *dmp_reg; - WRT_REG_DWORD(®->iobase_addr, iobase); + wrt_reg_dword(®->iobase_addr, iobase); dmp_reg = ®->iobase_window; for ( ; count--; dmp_reg++) - *buf++ = htonl(RD_REG_DWORD(dmp_reg)); + *buf++ = htonl(rd_reg_dword(dmp_reg)); return buf; } @@ -303,11 +303,11 @@ qla24xx_read_window(struct device_reg_24xx __iomem *reg, uint32_t iobase, void qla24xx_pause_risc(struct device_reg_24xx __iomem *reg, struct qla_hw_data *ha) { - WRT_REG_DWORD(®->hccr, HCCRX_SET_RISC_PAUSE); + wrt_reg_dword(®->hccr, HCCRX_SET_RISC_PAUSE); /* 100 usec delay is sufficient enough for hardware to pause RISC */ udelay(100); - if (RD_REG_DWORD(®->host_status) & HSRX_RISC_PAUSED) + if (rd_reg_dword(®->host_status) & HSRX_RISC_PAUSED) set_bit(RISC_PAUSE_CMPL, &ha->fw_dump_cap_flags); } @@ -324,17 +324,17 @@ qla24xx_soft_reset(struct qla_hw_data *ha) * Driver can proceed with the reset sequence after waiting * for a timeout period. */ - WRT_REG_DWORD(®->ctrl_status, CSRX_DMA_SHUTDOWN|MWB_4096_BYTES); + wrt_reg_dword(®->ctrl_status, CSRX_DMA_SHUTDOWN|MWB_4096_BYTES); for (cnt = 0; cnt < 30000; cnt++) { - if ((RD_REG_DWORD(®->ctrl_status) & CSRX_DMA_ACTIVE) == 0) + if ((rd_reg_dword(®->ctrl_status) & CSRX_DMA_ACTIVE) == 0) break; udelay(10); } - if (!(RD_REG_DWORD(®->ctrl_status) & CSRX_DMA_ACTIVE)) + if (!(rd_reg_dword(®->ctrl_status) & CSRX_DMA_ACTIVE)) set_bit(DMA_SHUTDOWN_CMPL, &ha->fw_dump_cap_flags); - WRT_REG_DWORD(®->ctrl_status, + wrt_reg_dword(®->ctrl_status, CSRX_ISP_SOFT_RESET|CSRX_DMA_SHUTDOWN|MWB_4096_BYTES); pci_read_config_word(ha->pdev, PCI_COMMAND, &wd); @@ -342,19 +342,19 @@ qla24xx_soft_reset(struct qla_hw_data *ha) /* Wait for soft-reset to complete. */ for (cnt = 0; cnt < 30000; cnt++) { - if ((RD_REG_DWORD(®->ctrl_status) & + if ((rd_reg_dword(®->ctrl_status) & CSRX_ISP_SOFT_RESET) == 0) break; udelay(10); } - if (!(RD_REG_DWORD(®->ctrl_status) & CSRX_ISP_SOFT_RESET)) + if (!(rd_reg_dword(®->ctrl_status) & CSRX_ISP_SOFT_RESET)) set_bit(ISP_RESET_CMPL, &ha->fw_dump_cap_flags); - WRT_REG_DWORD(®->hccr, HCCRX_CLR_RISC_RESET); - RD_REG_DWORD(®->hccr); /* PCI Posting. */ + wrt_reg_dword(®->hccr, HCCRX_CLR_RISC_RESET); + rd_reg_dword(®->hccr); /* PCI Posting. */ - for (cnt = 10000; RD_REG_WORD(®->mailbox0) != 0 && + for (cnt = 10000; rd_reg_word(®->mailbox0) != 0 && rval == QLA_SUCCESS; cnt--) { if (cnt) udelay(10); @@ -399,11 +399,11 @@ qla2xxx_dump_ram(struct qla_hw_data *ha, uint32_t addr, uint16_t *ram, WRT_MAILBOX_REG(ha, reg, 7, LSW(MSD(dump_dma))); WRT_MAILBOX_REG(ha, reg, 4, words); - WRT_REG_WORD(®->hccr, HCCR_SET_HOST_INT); + wrt_reg_word(®->hccr, HCCR_SET_HOST_INT); for (timer = 6000000; timer; timer--) { /* Check for pending interrupts. */ - stat = RD_REG_DWORD(®->u.isp2300.host_status); + stat = rd_reg_dword(®->u.isp2300.host_status); if (stat & HSR_RISC_INT) { stat &= 0xff; @@ -414,10 +414,10 @@ qla2xxx_dump_ram(struct qla_hw_data *ha, uint32_t addr, uint16_t *ram, mb0 = RD_MAILBOX_REG(ha, reg, 0); /* Release mailbox registers. */ - WRT_REG_WORD(®->semaphore, 0); - WRT_REG_WORD(®->hccr, + wrt_reg_word(®->semaphore, 0); + wrt_reg_word(®->hccr, HCCR_CLR_RISC_INT); - RD_REG_WORD(®->hccr); + rd_reg_word(®->hccr); break; } else if (stat == 0x10 || stat == 0x11) { set_bit(MBX_INTERRUPT, @@ -425,15 +425,15 @@ qla2xxx_dump_ram(struct qla_hw_data *ha, uint32_t addr, uint16_t *ram, mb0 = RD_MAILBOX_REG(ha, reg, 0); - WRT_REG_WORD(®->hccr, + wrt_reg_word(®->hccr, HCCR_CLR_RISC_INT); - RD_REG_WORD(®->hccr); + rd_reg_word(®->hccr); break; } /* clear this intr; it wasn't a mailbox intr */ - WRT_REG_WORD(®->hccr, HCCR_CLR_RISC_INT); - RD_REG_WORD(®->hccr); + wrt_reg_word(®->hccr, HCCR_CLR_RISC_INT); + rd_reg_word(®->hccr); } udelay(5); } @@ -458,7 +458,7 @@ qla2xxx_read_window(struct device_reg_2xxx __iomem *reg, uint32_t count, uint16_t __iomem *dmp_reg = ®->u.isp2300.fb_cmd; for ( ; count--; dmp_reg++) - *buf++ = htons(RD_REG_WORD(dmp_reg)); + *buf++ = htons(rd_reg_word(dmp_reg)); } static inline void * @@ -685,13 +685,13 @@ qla25xx_copy_mq(struct qla_hw_data *ha, void *ptr, uint32_t **last_chain) reg = ISP_QUE_REG(ha, cnt); que_idx = cnt * 4; mq->qregs[que_idx] = - htonl(RD_REG_DWORD(®->isp25mq.req_q_in)); + htonl(rd_reg_dword(®->isp25mq.req_q_in)); mq->qregs[que_idx+1] = - htonl(RD_REG_DWORD(®->isp25mq.req_q_out)); + htonl(rd_reg_dword(®->isp25mq.req_q_out)); mq->qregs[que_idx+2] = - htonl(RD_REG_DWORD(®->isp25mq.rsp_q_in)); + htonl(rd_reg_dword(®->isp25mq.rsp_q_in)); mq->qregs[que_idx+3] = - htonl(RD_REG_DWORD(®->isp25mq.rsp_q_out)); + htonl(rd_reg_dword(®->isp25mq.rsp_q_out)); } return ptr + sizeof(struct qla2xxx_mq_chain); @@ -760,13 +760,13 @@ qla2300_fw_dump(scsi_qla_host_t *vha) qla2xxx_prep_dump(ha, ha->fw_dump); rval = QLA_SUCCESS; - fw->hccr = htons(RD_REG_WORD(®->hccr)); + fw->hccr = htons(rd_reg_word(®->hccr)); /* Pause RISC. */ - WRT_REG_WORD(®->hccr, HCCR_PAUSE_RISC); + wrt_reg_word(®->hccr, HCCR_PAUSE_RISC); if (IS_QLA2300(ha)) { for (cnt = 30000; - (RD_REG_WORD(®->hccr) & HCCR_RISC_PAUSE) == 0 && + (rd_reg_word(®->hccr) & HCCR_RISC_PAUSE) == 0 && rval == QLA_SUCCESS; cnt--) { if (cnt) udelay(100); @@ -774,74 +774,74 @@ qla2300_fw_dump(scsi_qla_host_t *vha) rval = QLA_FUNCTION_TIMEOUT; } } else { - RD_REG_WORD(®->hccr); /* PCI Posting. */ + rd_reg_word(®->hccr); /* PCI Posting. */ udelay(10); } if (rval == QLA_SUCCESS) { dmp_reg = ®->flash_address; for (cnt = 0; cnt < ARRAY_SIZE(fw->pbiu_reg); cnt++, dmp_reg++) - fw->pbiu_reg[cnt] = htons(RD_REG_WORD(dmp_reg)); + fw->pbiu_reg[cnt] = htons(rd_reg_word(dmp_reg)); dmp_reg = ®->u.isp2300.req_q_in; for (cnt = 0; cnt < ARRAY_SIZE(fw->risc_host_reg); cnt++, dmp_reg++) - fw->risc_host_reg[cnt] = htons(RD_REG_WORD(dmp_reg)); + fw->risc_host_reg[cnt] = htons(rd_reg_word(dmp_reg)); dmp_reg = ®->u.isp2300.mailbox0; for (cnt = 0; cnt < ARRAY_SIZE(fw->mailbox_reg); cnt++, dmp_reg++) - fw->mailbox_reg[cnt] = htons(RD_REG_WORD(dmp_reg)); + fw->mailbox_reg[cnt] = htons(rd_reg_word(dmp_reg)); - WRT_REG_WORD(®->ctrl_status, 0x40); + wrt_reg_word(®->ctrl_status, 0x40); qla2xxx_read_window(reg, 32, fw->resp_dma_reg); - WRT_REG_WORD(®->ctrl_status, 0x50); + wrt_reg_word(®->ctrl_status, 0x50); qla2xxx_read_window(reg, 48, fw->dma_reg); - WRT_REG_WORD(®->ctrl_status, 0x00); + wrt_reg_word(®->ctrl_status, 0x00); dmp_reg = ®->risc_hw; for (cnt = 0; cnt < ARRAY_SIZE(fw->risc_hdw_reg); cnt++, dmp_reg++) - fw->risc_hdw_reg[cnt] = htons(RD_REG_WORD(dmp_reg)); + fw->risc_hdw_reg[cnt] = htons(rd_reg_word(dmp_reg)); - WRT_REG_WORD(®->pcr, 0x2000); + wrt_reg_word(®->pcr, 0x2000); qla2xxx_read_window(reg, 16, fw->risc_gp0_reg); - WRT_REG_WORD(®->pcr, 0x2200); + wrt_reg_word(®->pcr, 0x2200); qla2xxx_read_window(reg, 16, fw->risc_gp1_reg); - WRT_REG_WORD(®->pcr, 0x2400); + wrt_reg_word(®->pcr, 0x2400); qla2xxx_read_window(reg, 16, fw->risc_gp2_reg); - WRT_REG_WORD(®->pcr, 0x2600); + wrt_reg_word(®->pcr, 0x2600); qla2xxx_read_window(reg, 16, fw->risc_gp3_reg); - WRT_REG_WORD(®->pcr, 0x2800); + wrt_reg_word(®->pcr, 0x2800); qla2xxx_read_window(reg, 16, fw->risc_gp4_reg); - WRT_REG_WORD(®->pcr, 0x2A00); + wrt_reg_word(®->pcr, 0x2A00); qla2xxx_read_window(reg, 16, fw->risc_gp5_reg); - WRT_REG_WORD(®->pcr, 0x2C00); + wrt_reg_word(®->pcr, 0x2C00); qla2xxx_read_window(reg, 16, fw->risc_gp6_reg); - WRT_REG_WORD(®->pcr, 0x2E00); + wrt_reg_word(®->pcr, 0x2E00); qla2xxx_read_window(reg, 16, fw->risc_gp7_reg); - WRT_REG_WORD(®->ctrl_status, 0x10); + wrt_reg_word(®->ctrl_status, 0x10); qla2xxx_read_window(reg, 64, fw->frame_buf_hdw_reg); - WRT_REG_WORD(®->ctrl_status, 0x20); + wrt_reg_word(®->ctrl_status, 0x20); qla2xxx_read_window(reg, 64, fw->fpm_b0_reg); - WRT_REG_WORD(®->ctrl_status, 0x30); + wrt_reg_word(®->ctrl_status, 0x30); qla2xxx_read_window(reg, 64, fw->fpm_b1_reg); /* Reset RISC. */ - WRT_REG_WORD(®->ctrl_status, CSR_ISP_SOFT_RESET); + wrt_reg_word(®->ctrl_status, CSR_ISP_SOFT_RESET); for (cnt = 0; cnt < 30000; cnt++) { - if ((RD_REG_WORD(®->ctrl_status) & + if ((rd_reg_word(®->ctrl_status) & CSR_ISP_SOFT_RESET) == 0) break; @@ -916,11 +916,11 @@ qla2100_fw_dump(scsi_qla_host_t *vha) qla2xxx_prep_dump(ha, ha->fw_dump); rval = QLA_SUCCESS; - fw->hccr = htons(RD_REG_WORD(®->hccr)); + fw->hccr = htons(rd_reg_word(®->hccr)); /* Pause RISC. */ - WRT_REG_WORD(®->hccr, HCCR_PAUSE_RISC); - for (cnt = 30000; (RD_REG_WORD(®->hccr) & HCCR_RISC_PAUSE) == 0 && + wrt_reg_word(®->hccr, HCCR_PAUSE_RISC); + for (cnt = 30000; (rd_reg_word(®->hccr) & HCCR_RISC_PAUSE) == 0 && rval == QLA_SUCCESS; cnt--) { if (cnt) udelay(100); @@ -930,60 +930,60 @@ qla2100_fw_dump(scsi_qla_host_t *vha) if (rval == QLA_SUCCESS) { dmp_reg = ®->flash_address; for (cnt = 0; cnt < ARRAY_SIZE(fw->pbiu_reg); cnt++, dmp_reg++) - fw->pbiu_reg[cnt] = htons(RD_REG_WORD(dmp_reg)); + fw->pbiu_reg[cnt] = htons(rd_reg_word(dmp_reg)); dmp_reg = ®->u.isp2100.mailbox0; for (cnt = 0; cnt < ha->mbx_count; cnt++, dmp_reg++) { if (cnt == 8) dmp_reg = ®->u_end.isp2200.mailbox8; - fw->mailbox_reg[cnt] = htons(RD_REG_WORD(dmp_reg)); + fw->mailbox_reg[cnt] = htons(rd_reg_word(dmp_reg)); } dmp_reg = ®->u.isp2100.unused_2[0]; for (cnt = 0; cnt < ARRAY_SIZE(fw->dma_reg); cnt++, dmp_reg++) - fw->dma_reg[cnt] = htons(RD_REG_WORD(dmp_reg)); + fw->dma_reg[cnt] = htons(rd_reg_word(dmp_reg)); - WRT_REG_WORD(®->ctrl_status, 0x00); + wrt_reg_word(®->ctrl_status, 0x00); dmp_reg = ®->risc_hw; for (cnt = 0; cnt < ARRAY_SIZE(fw->risc_hdw_reg); cnt++, dmp_reg++) - fw->risc_hdw_reg[cnt] = htons(RD_REG_WORD(dmp_reg)); + fw->risc_hdw_reg[cnt] = htons(rd_reg_word(dmp_reg)); - WRT_REG_WORD(®->pcr, 0x2000); + wrt_reg_word(®->pcr, 0x2000); qla2xxx_read_window(reg, 16, fw->risc_gp0_reg); - WRT_REG_WORD(®->pcr, 0x2100); + wrt_reg_word(®->pcr, 0x2100); qla2xxx_read_window(reg, 16, fw->risc_gp1_reg); - WRT_REG_WORD(®->pcr, 0x2200); + wrt_reg_word(®->pcr, 0x2200); qla2xxx_read_window(reg, 16, fw->risc_gp2_reg); - WRT_REG_WORD(®->pcr, 0x2300); + wrt_reg_word(®->pcr, 0x2300); qla2xxx_read_window(reg, 16, fw->risc_gp3_reg); - WRT_REG_WORD(®->pcr, 0x2400); + wrt_reg_word(®->pcr, 0x2400); qla2xxx_read_window(reg, 16, fw->risc_gp4_reg); - WRT_REG_WORD(®->pcr, 0x2500); + wrt_reg_word(®->pcr, 0x2500); qla2xxx_read_window(reg, 16, fw->risc_gp5_reg); - WRT_REG_WORD(®->pcr, 0x2600); + wrt_reg_word(®->pcr, 0x2600); qla2xxx_read_window(reg, 16, fw->risc_gp6_reg); - WRT_REG_WORD(®->pcr, 0x2700); + wrt_reg_word(®->pcr, 0x2700); qla2xxx_read_window(reg, 16, fw->risc_gp7_reg); - WRT_REG_WORD(®->ctrl_status, 0x10); + wrt_reg_word(®->ctrl_status, 0x10); qla2xxx_read_window(reg, 16, fw->frame_buf_hdw_reg); - WRT_REG_WORD(®->ctrl_status, 0x20); + wrt_reg_word(®->ctrl_status, 0x20); qla2xxx_read_window(reg, 64, fw->fpm_b0_reg); - WRT_REG_WORD(®->ctrl_status, 0x30); + wrt_reg_word(®->ctrl_status, 0x30); qla2xxx_read_window(reg, 64, fw->fpm_b1_reg); /* Reset the ISP. */ - WRT_REG_WORD(®->ctrl_status, CSR_ISP_SOFT_RESET); + wrt_reg_word(®->ctrl_status, CSR_ISP_SOFT_RESET); } for (cnt = 30000; RD_MAILBOX_REG(ha, reg, 0) != 0 && @@ -996,11 +996,11 @@ qla2100_fw_dump(scsi_qla_host_t *vha) /* Pause RISC. */ if (rval == QLA_SUCCESS && (IS_QLA2200(ha) || (IS_QLA2100(ha) && - (RD_REG_WORD(®->mctr) & (BIT_1 | BIT_0)) != 0))) { + (rd_reg_word(®->mctr) & (BIT_1 | BIT_0)) != 0))) { - WRT_REG_WORD(®->hccr, HCCR_PAUSE_RISC); + wrt_reg_word(®->hccr, HCCR_PAUSE_RISC); for (cnt = 30000; - (RD_REG_WORD(®->hccr) & HCCR_RISC_PAUSE) == 0 && + (rd_reg_word(®->hccr) & HCCR_RISC_PAUSE) == 0 && rval == QLA_SUCCESS; cnt--) { if (cnt) udelay(100); @@ -1010,13 +1010,13 @@ qla2100_fw_dump(scsi_qla_host_t *vha) if (rval == QLA_SUCCESS) { /* Set memory configuration and timing. */ if (IS_QLA2100(ha)) - WRT_REG_WORD(®->mctr, 0xf1); + wrt_reg_word(®->mctr, 0xf1); else - WRT_REG_WORD(®->mctr, 0xf2); - RD_REG_WORD(®->mctr); /* PCI Posting. */ + wrt_reg_word(®->mctr, 0xf2); + rd_reg_word(®->mctr); /* PCI Posting. */ /* Release RISC. */ - WRT_REG_WORD(®->hccr, HCCR_RELEASE_RISC); + wrt_reg_word(®->hccr, HCCR_RELEASE_RISC); } } @@ -1029,26 +1029,26 @@ qla2100_fw_dump(scsi_qla_host_t *vha) for (cnt = 0; cnt < ARRAY_SIZE(fw->risc_ram) && rval == QLA_SUCCESS; cnt++, risc_address++) { WRT_MAILBOX_REG(ha, reg, 1, risc_address); - WRT_REG_WORD(®->hccr, HCCR_SET_HOST_INT); + wrt_reg_word(®->hccr, HCCR_SET_HOST_INT); for (timer = 6000000; timer != 0; timer--) { /* Check for pending interrupts. */ - if (RD_REG_WORD(®->istatus) & ISR_RISC_INT) { - if (RD_REG_WORD(®->semaphore) & BIT_0) { + if (rd_reg_word(®->istatus) & ISR_RISC_INT) { + if (rd_reg_word(®->semaphore) & BIT_0) { set_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags); mb0 = RD_MAILBOX_REG(ha, reg, 0); mb2 = RD_MAILBOX_REG(ha, reg, 2); - WRT_REG_WORD(®->semaphore, 0); - WRT_REG_WORD(®->hccr, + wrt_reg_word(®->semaphore, 0); + wrt_reg_word(®->hccr, HCCR_CLR_RISC_INT); - RD_REG_WORD(®->hccr); + rd_reg_word(®->hccr); break; } - WRT_REG_WORD(®->hccr, HCCR_CLR_RISC_INT); - RD_REG_WORD(®->hccr); + wrt_reg_word(®->hccr, HCCR_CLR_RISC_INT); + rd_reg_word(®->hccr); } udelay(5); } @@ -1107,7 +1107,7 @@ qla24xx_fw_dump(scsi_qla_host_t *vha) fw = &ha->fw_dump->isp.isp24; qla2xxx_prep_dump(ha, ha->fw_dump); - fw->host_status = htonl(RD_REG_DWORD(®->host_status)); + fw->host_status = htonl(rd_reg_dword(®->host_status)); /* * Pause RISC. No need to track timeout, as resetting the chip @@ -1118,40 +1118,40 @@ qla24xx_fw_dump(scsi_qla_host_t *vha) /* Host interface registers. */ dmp_reg = ®->flash_addr; for (cnt = 0; cnt < ARRAY_SIZE(fw->host_reg); cnt++, dmp_reg++) - fw->host_reg[cnt] = htonl(RD_REG_DWORD(dmp_reg)); + fw->host_reg[cnt] = htonl(rd_reg_dword(dmp_reg)); /* Disable interrupts. */ - WRT_REG_DWORD(®->ictrl, 0); - RD_REG_DWORD(®->ictrl); + wrt_reg_dword(®->ictrl, 0); + rd_reg_dword(®->ictrl); /* Shadow registers. */ - WRT_REG_DWORD(®->iobase_addr, 0x0F70); - RD_REG_DWORD(®->iobase_addr); - WRT_REG_DWORD(®->iobase_select, 0xB0000000); - fw->shadow_reg[0] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_addr, 0x0F70); + rd_reg_dword(®->iobase_addr); + wrt_reg_dword(®->iobase_select, 0xB0000000); + fw->shadow_reg[0] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0100000); - fw->shadow_reg[1] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0100000); + fw->shadow_reg[1] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0200000); - fw->shadow_reg[2] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0200000); + fw->shadow_reg[2] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0300000); - fw->shadow_reg[3] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0300000); + fw->shadow_reg[3] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0400000); - fw->shadow_reg[4] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0400000); + fw->shadow_reg[4] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0500000); - fw->shadow_reg[5] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0500000); + fw->shadow_reg[5] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0600000); - fw->shadow_reg[6] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0600000); + fw->shadow_reg[6] = htonl(rd_reg_dword(®->iobase_sdata)); /* Mailbox registers. */ mbx_reg = ®->mailbox0; for (cnt = 0; cnt < ARRAY_SIZE(fw->mailbox_reg); cnt++, mbx_reg++) - fw->mailbox_reg[cnt] = htons(RD_REG_WORD(mbx_reg)); + fw->mailbox_reg[cnt] = htons(rd_reg_word(mbx_reg)); /* Transfer sequence registers. */ iter_reg = fw->xseq_gp_reg; @@ -1190,19 +1190,19 @@ qla24xx_fw_dump(scsi_qla_host_t *vha) iter_reg = qla24xx_read_window(reg, 0x7200, 8, iter_reg); dmp_reg = ®->iobase_q; for (cnt = 0; cnt < 7; cnt++, dmp_reg++) - *iter_reg++ = htonl(RD_REG_DWORD(dmp_reg)); + *iter_reg++ = htonl(rd_reg_dword(dmp_reg)); iter_reg = fw->resp0_dma_reg; iter_reg = qla24xx_read_window(reg, 0x7300, 8, iter_reg); dmp_reg = ®->iobase_q; for (cnt = 0; cnt < 7; cnt++, dmp_reg++) - *iter_reg++ = htonl(RD_REG_DWORD(dmp_reg)); + *iter_reg++ = htonl(rd_reg_dword(dmp_reg)); iter_reg = fw->req1_dma_reg; iter_reg = qla24xx_read_window(reg, 0x7400, 8, iter_reg); dmp_reg = ®->iobase_q; for (cnt = 0; cnt < 7; cnt++, dmp_reg++) - *iter_reg++ = htonl(RD_REG_DWORD(dmp_reg)); + *iter_reg++ = htonl(rd_reg_dword(dmp_reg)); /* Transmit DMA registers. */ iter_reg = fw->xmt0_dma_reg; @@ -1350,7 +1350,7 @@ qla25xx_fw_dump(scsi_qla_host_t *vha) qla2xxx_prep_dump(ha, ha->fw_dump); ha->fw_dump->version = htonl(2); - fw->host_status = htonl(RD_REG_DWORD(®->host_status)); + fw->host_status = htonl(rd_reg_dword(®->host_status)); /* * Pause RISC. No need to track timeout, as resetting the chip @@ -1364,73 +1364,73 @@ qla25xx_fw_dump(scsi_qla_host_t *vha) qla24xx_read_window(reg, 0x7010, 16, iter_reg); /* PCIe registers. */ - WRT_REG_DWORD(®->iobase_addr, 0x7C00); - RD_REG_DWORD(®->iobase_addr); - WRT_REG_DWORD(®->iobase_window, 0x01); + wrt_reg_dword(®->iobase_addr, 0x7C00); + rd_reg_dword(®->iobase_addr); + wrt_reg_dword(®->iobase_window, 0x01); dmp_reg = ®->iobase_c4; - fw->pcie_regs[0] = htonl(RD_REG_DWORD(dmp_reg)); + fw->pcie_regs[0] = htonl(rd_reg_dword(dmp_reg)); dmp_reg++; - fw->pcie_regs[1] = htonl(RD_REG_DWORD(dmp_reg)); + fw->pcie_regs[1] = htonl(rd_reg_dword(dmp_reg)); dmp_reg++; - fw->pcie_regs[2] = htonl(RD_REG_DWORD(dmp_reg)); - fw->pcie_regs[3] = htonl(RD_REG_DWORD(®->iobase_window)); + fw->pcie_regs[2] = htonl(rd_reg_dword(dmp_reg)); + fw->pcie_regs[3] = htonl(rd_reg_dword(®->iobase_window)); - WRT_REG_DWORD(®->iobase_window, 0x00); - RD_REG_DWORD(®->iobase_window); + wrt_reg_dword(®->iobase_window, 0x00); + rd_reg_dword(®->iobase_window); /* Host interface registers. */ dmp_reg = ®->flash_addr; for (cnt = 0; cnt < ARRAY_SIZE(fw->host_reg); cnt++, dmp_reg++) - fw->host_reg[cnt] = htonl(RD_REG_DWORD(dmp_reg)); + fw->host_reg[cnt] = htonl(rd_reg_dword(dmp_reg)); /* Disable interrupts. */ - WRT_REG_DWORD(®->ictrl, 0); - RD_REG_DWORD(®->ictrl); + wrt_reg_dword(®->ictrl, 0); + rd_reg_dword(®->ictrl); /* Shadow registers. */ - WRT_REG_DWORD(®->iobase_addr, 0x0F70); - RD_REG_DWORD(®->iobase_addr); - WRT_REG_DWORD(®->iobase_select, 0xB0000000); - fw->shadow_reg[0] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_addr, 0x0F70); + rd_reg_dword(®->iobase_addr); + wrt_reg_dword(®->iobase_select, 0xB0000000); + fw->shadow_reg[0] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0100000); - fw->shadow_reg[1] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0100000); + fw->shadow_reg[1] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0200000); - fw->shadow_reg[2] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0200000); + fw->shadow_reg[2] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0300000); - fw->shadow_reg[3] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0300000); + fw->shadow_reg[3] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0400000); - fw->shadow_reg[4] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0400000); + fw->shadow_reg[4] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0500000); - fw->shadow_reg[5] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0500000); + fw->shadow_reg[5] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0600000); - fw->shadow_reg[6] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0600000); + fw->shadow_reg[6] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0700000); - fw->shadow_reg[7] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0700000); + fw->shadow_reg[7] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0800000); - fw->shadow_reg[8] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0800000); + fw->shadow_reg[8] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0900000); - fw->shadow_reg[9] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0900000); + fw->shadow_reg[9] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0A00000); - fw->shadow_reg[10] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0A00000); + fw->shadow_reg[10] = htonl(rd_reg_dword(®->iobase_sdata)); /* RISC I/O register. */ - WRT_REG_DWORD(®->iobase_addr, 0x0010); - fw->risc_io_reg = htonl(RD_REG_DWORD(®->iobase_window)); + wrt_reg_dword(®->iobase_addr, 0x0010); + fw->risc_io_reg = htonl(rd_reg_dword(®->iobase_window)); /* Mailbox registers. */ mbx_reg = ®->mailbox0; for (cnt = 0; cnt < ARRAY_SIZE(fw->mailbox_reg); cnt++, mbx_reg++) - fw->mailbox_reg[cnt] = htons(RD_REG_WORD(mbx_reg)); + fw->mailbox_reg[cnt] = htons(rd_reg_word(mbx_reg)); /* Transfer sequence registers. */ iter_reg = fw->xseq_gp_reg; @@ -1494,19 +1494,19 @@ qla25xx_fw_dump(scsi_qla_host_t *vha) iter_reg = qla24xx_read_window(reg, 0x7200, 8, iter_reg); dmp_reg = ®->iobase_q; for (cnt = 0; cnt < 7; cnt++, dmp_reg++) - *iter_reg++ = htonl(RD_REG_DWORD(dmp_reg)); + *iter_reg++ = htonl(rd_reg_dword(dmp_reg)); iter_reg = fw->resp0_dma_reg; iter_reg = qla24xx_read_window(reg, 0x7300, 8, iter_reg); dmp_reg = ®->iobase_q; for (cnt = 0; cnt < 7; cnt++, dmp_reg++) - *iter_reg++ = htonl(RD_REG_DWORD(dmp_reg)); + *iter_reg++ = htonl(rd_reg_dword(dmp_reg)); iter_reg = fw->req1_dma_reg; iter_reg = qla24xx_read_window(reg, 0x7400, 8, iter_reg); dmp_reg = ®->iobase_q; for (cnt = 0; cnt < 7; cnt++, dmp_reg++) - *iter_reg++ = htonl(RD_REG_DWORD(dmp_reg)); + *iter_reg++ = htonl(rd_reg_dword(dmp_reg)); /* Transmit DMA registers. */ iter_reg = fw->xmt0_dma_reg; @@ -1661,7 +1661,7 @@ qla81xx_fw_dump(scsi_qla_host_t *vha) fw = &ha->fw_dump->isp.isp81; qla2xxx_prep_dump(ha, ha->fw_dump); - fw->host_status = htonl(RD_REG_DWORD(®->host_status)); + fw->host_status = htonl(rd_reg_dword(®->host_status)); /* * Pause RISC. No need to track timeout, as resetting the chip @@ -1675,73 +1675,73 @@ qla81xx_fw_dump(scsi_qla_host_t *vha) qla24xx_read_window(reg, 0x7010, 16, iter_reg); /* PCIe registers. */ - WRT_REG_DWORD(®->iobase_addr, 0x7C00); - RD_REG_DWORD(®->iobase_addr); - WRT_REG_DWORD(®->iobase_window, 0x01); + wrt_reg_dword(®->iobase_addr, 0x7C00); + rd_reg_dword(®->iobase_addr); + wrt_reg_dword(®->iobase_window, 0x01); dmp_reg = ®->iobase_c4; - fw->pcie_regs[0] = htonl(RD_REG_DWORD(dmp_reg)); + fw->pcie_regs[0] = htonl(rd_reg_dword(dmp_reg)); dmp_reg++; - fw->pcie_regs[1] = htonl(RD_REG_DWORD(dmp_reg)); + fw->pcie_regs[1] = htonl(rd_reg_dword(dmp_reg)); dmp_reg++; - fw->pcie_regs[2] = htonl(RD_REG_DWORD(dmp_reg)); - fw->pcie_regs[3] = htonl(RD_REG_DWORD(®->iobase_window)); + fw->pcie_regs[2] = htonl(rd_reg_dword(dmp_reg)); + fw->pcie_regs[3] = htonl(rd_reg_dword(®->iobase_window)); - WRT_REG_DWORD(®->iobase_window, 0x00); - RD_REG_DWORD(®->iobase_window); + wrt_reg_dword(®->iobase_window, 0x00); + rd_reg_dword(®->iobase_window); /* Host interface registers. */ dmp_reg = ®->flash_addr; for (cnt = 0; cnt < ARRAY_SIZE(fw->host_reg); cnt++, dmp_reg++) - fw->host_reg[cnt] = htonl(RD_REG_DWORD(dmp_reg)); + fw->host_reg[cnt] = htonl(rd_reg_dword(dmp_reg)); /* Disable interrupts. */ - WRT_REG_DWORD(®->ictrl, 0); - RD_REG_DWORD(®->ictrl); + wrt_reg_dword(®->ictrl, 0); + rd_reg_dword(®->ictrl); /* Shadow registers. */ - WRT_REG_DWORD(®->iobase_addr, 0x0F70); - RD_REG_DWORD(®->iobase_addr); - WRT_REG_DWORD(®->iobase_select, 0xB0000000); - fw->shadow_reg[0] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_addr, 0x0F70); + rd_reg_dword(®->iobase_addr); + wrt_reg_dword(®->iobase_select, 0xB0000000); + fw->shadow_reg[0] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0100000); - fw->shadow_reg[1] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0100000); + fw->shadow_reg[1] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0200000); - fw->shadow_reg[2] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0200000); + fw->shadow_reg[2] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0300000); - fw->shadow_reg[3] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0300000); + fw->shadow_reg[3] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0400000); - fw->shadow_reg[4] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0400000); + fw->shadow_reg[4] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0500000); - fw->shadow_reg[5] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0500000); + fw->shadow_reg[5] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0600000); - fw->shadow_reg[6] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0600000); + fw->shadow_reg[6] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0700000); - fw->shadow_reg[7] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0700000); + fw->shadow_reg[7] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0800000); - fw->shadow_reg[8] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0800000); + fw->shadow_reg[8] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0900000); - fw->shadow_reg[9] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0900000); + fw->shadow_reg[9] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0A00000); - fw->shadow_reg[10] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0A00000); + fw->shadow_reg[10] = htonl(rd_reg_dword(®->iobase_sdata)); /* RISC I/O register. */ - WRT_REG_DWORD(®->iobase_addr, 0x0010); - fw->risc_io_reg = htonl(RD_REG_DWORD(®->iobase_window)); + wrt_reg_dword(®->iobase_addr, 0x0010); + fw->risc_io_reg = htonl(rd_reg_dword(®->iobase_window)); /* Mailbox registers. */ mbx_reg = ®->mailbox0; for (cnt = 0; cnt < ARRAY_SIZE(fw->mailbox_reg); cnt++, mbx_reg++) - fw->mailbox_reg[cnt] = htons(RD_REG_WORD(mbx_reg)); + fw->mailbox_reg[cnt] = htons(rd_reg_word(mbx_reg)); /* Transfer sequence registers. */ iter_reg = fw->xseq_gp_reg; @@ -1805,19 +1805,19 @@ qla81xx_fw_dump(scsi_qla_host_t *vha) iter_reg = qla24xx_read_window(reg, 0x7200, 8, iter_reg); dmp_reg = ®->iobase_q; for (cnt = 0; cnt < 7; cnt++, dmp_reg++) - *iter_reg++ = htonl(RD_REG_DWORD(dmp_reg)); + *iter_reg++ = htonl(rd_reg_dword(dmp_reg)); iter_reg = fw->resp0_dma_reg; iter_reg = qla24xx_read_window(reg, 0x7300, 8, iter_reg); dmp_reg = ®->iobase_q; for (cnt = 0; cnt < 7; cnt++, dmp_reg++) - *iter_reg++ = htonl(RD_REG_DWORD(dmp_reg)); + *iter_reg++ = htonl(rd_reg_dword(dmp_reg)); iter_reg = fw->req1_dma_reg; iter_reg = qla24xx_read_window(reg, 0x7400, 8, iter_reg); dmp_reg = ®->iobase_q; for (cnt = 0; cnt < 7; cnt++, dmp_reg++) - *iter_reg++ = htonl(RD_REG_DWORD(dmp_reg)); + *iter_reg++ = htonl(rd_reg_dword(dmp_reg)); /* Transmit DMA registers. */ iter_reg = fw->xmt0_dma_reg; @@ -1976,7 +1976,7 @@ qla83xx_fw_dump(scsi_qla_host_t *vha) fw = &ha->fw_dump->isp.isp83; qla2xxx_prep_dump(ha, ha->fw_dump); - fw->host_status = htonl(RD_REG_DWORD(®->host_status)); + fw->host_status = htonl(rd_reg_dword(®->host_status)); /* * Pause RISC. No need to track timeout, as resetting the chip @@ -1984,24 +1984,24 @@ qla83xx_fw_dump(scsi_qla_host_t *vha) */ qla24xx_pause_risc(reg, ha); - WRT_REG_DWORD(®->iobase_addr, 0x6000); + wrt_reg_dword(®->iobase_addr, 0x6000); dmp_reg = ®->iobase_window; - RD_REG_DWORD(dmp_reg); - WRT_REG_DWORD(dmp_reg, 0); + rd_reg_dword(dmp_reg); + wrt_reg_dword(dmp_reg, 0); dmp_reg = ®->unused_4_1[0]; - RD_REG_DWORD(dmp_reg); - WRT_REG_DWORD(dmp_reg, 0); + rd_reg_dword(dmp_reg); + wrt_reg_dword(dmp_reg, 0); - WRT_REG_DWORD(®->iobase_addr, 0x6010); + wrt_reg_dword(®->iobase_addr, 0x6010); dmp_reg = ®->unused_4_1[2]; - RD_REG_DWORD(dmp_reg); - WRT_REG_DWORD(dmp_reg, 0); + rd_reg_dword(dmp_reg); + wrt_reg_dword(dmp_reg, 0); /* select PCR and disable ecc checking and correction */ - WRT_REG_DWORD(®->iobase_addr, 0x0F70); - RD_REG_DWORD(®->iobase_addr); - WRT_REG_DWORD(®->iobase_select, 0x60000000); /* write to F0h = PCR */ + wrt_reg_dword(®->iobase_addr, 0x0F70); + rd_reg_dword(®->iobase_addr); + wrt_reg_dword(®->iobase_select, 0x60000000); /* write to F0h = PCR */ /* Host/Risc registers. */ iter_reg = fw->host_risc_reg; @@ -2010,73 +2010,73 @@ qla83xx_fw_dump(scsi_qla_host_t *vha) qla24xx_read_window(reg, 0x7040, 16, iter_reg); /* PCIe registers. */ - WRT_REG_DWORD(®->iobase_addr, 0x7C00); - RD_REG_DWORD(®->iobase_addr); - WRT_REG_DWORD(®->iobase_window, 0x01); + wrt_reg_dword(®->iobase_addr, 0x7C00); + rd_reg_dword(®->iobase_addr); + wrt_reg_dword(®->iobase_window, 0x01); dmp_reg = ®->iobase_c4; - fw->pcie_regs[0] = htonl(RD_REG_DWORD(dmp_reg)); + fw->pcie_regs[0] = htonl(rd_reg_dword(dmp_reg)); dmp_reg++; - fw->pcie_regs[1] = htonl(RD_REG_DWORD(dmp_reg)); + fw->pcie_regs[1] = htonl(rd_reg_dword(dmp_reg)); dmp_reg++; - fw->pcie_regs[2] = htonl(RD_REG_DWORD(dmp_reg)); - fw->pcie_regs[3] = htonl(RD_REG_DWORD(®->iobase_window)); + fw->pcie_regs[2] = htonl(rd_reg_dword(dmp_reg)); + fw->pcie_regs[3] = htonl(rd_reg_dword(®->iobase_window)); - WRT_REG_DWORD(®->iobase_window, 0x00); - RD_REG_DWORD(®->iobase_window); + wrt_reg_dword(®->iobase_window, 0x00); + rd_reg_dword(®->iobase_window); /* Host interface registers. */ dmp_reg = ®->flash_addr; for (cnt = 0; cnt < ARRAY_SIZE(fw->host_reg); cnt++, dmp_reg++) - fw->host_reg[cnt] = htonl(RD_REG_DWORD(dmp_reg)); + fw->host_reg[cnt] = htonl(rd_reg_dword(dmp_reg)); /* Disable interrupts. */ - WRT_REG_DWORD(®->ictrl, 0); - RD_REG_DWORD(®->ictrl); + wrt_reg_dword(®->ictrl, 0); + rd_reg_dword(®->ictrl); /* Shadow registers. */ - WRT_REG_DWORD(®->iobase_addr, 0x0F70); - RD_REG_DWORD(®->iobase_addr); - WRT_REG_DWORD(®->iobase_select, 0xB0000000); - fw->shadow_reg[0] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_addr, 0x0F70); + rd_reg_dword(®->iobase_addr); + wrt_reg_dword(®->iobase_select, 0xB0000000); + fw->shadow_reg[0] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0100000); - fw->shadow_reg[1] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0100000); + fw->shadow_reg[1] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0200000); - fw->shadow_reg[2] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0200000); + fw->shadow_reg[2] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0300000); - fw->shadow_reg[3] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0300000); + fw->shadow_reg[3] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0400000); - fw->shadow_reg[4] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0400000); + fw->shadow_reg[4] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0500000); - fw->shadow_reg[5] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0500000); + fw->shadow_reg[5] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0600000); - fw->shadow_reg[6] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0600000); + fw->shadow_reg[6] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0700000); - fw->shadow_reg[7] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0700000); + fw->shadow_reg[7] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0800000); - fw->shadow_reg[8] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0800000); + fw->shadow_reg[8] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0900000); - fw->shadow_reg[9] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0900000); + fw->shadow_reg[9] = htonl(rd_reg_dword(®->iobase_sdata)); - WRT_REG_DWORD(®->iobase_select, 0xB0A00000); - fw->shadow_reg[10] = htonl(RD_REG_DWORD(®->iobase_sdata)); + wrt_reg_dword(®->iobase_select, 0xB0A00000); + fw->shadow_reg[10] = htonl(rd_reg_dword(®->iobase_sdata)); /* RISC I/O register. */ - WRT_REG_DWORD(®->iobase_addr, 0x0010); - fw->risc_io_reg = htonl(RD_REG_DWORD(®->iobase_window)); + wrt_reg_dword(®->iobase_addr, 0x0010); + fw->risc_io_reg = htonl(rd_reg_dword(®->iobase_window)); /* Mailbox registers. */ mbx_reg = ®->mailbox0; for (cnt = 0; cnt < ARRAY_SIZE(fw->mailbox_reg); cnt++, mbx_reg++) - fw->mailbox_reg[cnt] = htons(RD_REG_WORD(mbx_reg)); + fw->mailbox_reg[cnt] = htons(rd_reg_word(mbx_reg)); /* Transfer sequence registers. */ iter_reg = fw->xseq_gp_reg; @@ -2172,19 +2172,19 @@ qla83xx_fw_dump(scsi_qla_host_t *vha) iter_reg = qla24xx_read_window(reg, 0x7200, 8, iter_reg); dmp_reg = ®->iobase_q; for (cnt = 0; cnt < 7; cnt++, dmp_reg++) - *iter_reg++ = htonl(RD_REG_DWORD(dmp_reg)); + *iter_reg++ = htonl(rd_reg_dword(dmp_reg)); iter_reg = fw->resp0_dma_reg; iter_reg = qla24xx_read_window(reg, 0x7300, 8, iter_reg); dmp_reg = ®->iobase_q; for (cnt = 0; cnt < 7; cnt++, dmp_reg++) - *iter_reg++ = htonl(RD_REG_DWORD(dmp_reg)); + *iter_reg++ = htonl(rd_reg_dword(dmp_reg)); iter_reg = fw->req1_dma_reg; iter_reg = qla24xx_read_window(reg, 0x7400, 8, iter_reg); dmp_reg = ®->iobase_q; for (cnt = 0; cnt < 7; cnt++, dmp_reg++) - *iter_reg++ = htonl(RD_REG_DWORD(dmp_reg)); + *iter_reg++ = htonl(rd_reg_dword(dmp_reg)); /* Transmit DMA registers. */ iter_reg = fw->xmt0_dma_reg; @@ -2390,16 +2390,16 @@ qla83xx_fw_dump(scsi_qla_host_t *vha) ql_log(ql_log_warn, vha, 0xd00f, "try a bigger hammer!!!\n"); - WRT_REG_DWORD(®->hccr, HCCRX_SET_RISC_RESET); - RD_REG_DWORD(®->hccr); + wrt_reg_dword(®->hccr, HCCRX_SET_RISC_RESET); + rd_reg_dword(®->hccr); - WRT_REG_DWORD(®->hccr, HCCRX_REL_RISC_PAUSE); - RD_REG_DWORD(®->hccr); + wrt_reg_dword(®->hccr, HCCRX_REL_RISC_PAUSE); + rd_reg_dword(®->hccr); - WRT_REG_DWORD(®->hccr, HCCRX_CLR_RISC_RESET); - RD_REG_DWORD(®->hccr); + wrt_reg_dword(®->hccr, HCCRX_CLR_RISC_RESET); + rd_reg_dword(®->hccr); - for (cnt = 30000; cnt && (RD_REG_WORD(®->mailbox0)); cnt--) + for (cnt = 30000; cnt && (rd_reg_word(®->mailbox0)); cnt--) udelay(5); if (!cnt) { @@ -2674,7 +2674,7 @@ ql_dump_regs(uint level, scsi_qla_host_t *vha, uint id) ql_dbg(level, vha, id, "Mailbox registers:\n"); for (i = 0; i < 6; i++, mbx_reg++) ql_dbg(level, vha, id, - "mbox[%d] %#04x\n", i, RD_REG_WORD(mbx_reg)); + "mbox[%d] %#04x\n", i, rd_reg_word(mbx_reg)); } diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 4b02b48af85d..3368fdf8b2dd 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -128,47 +128,47 @@ static inline uint32_t make_handle(uint16_t x, uint16_t y) * I/O register */ -static inline u8 RD_REG_BYTE(const volatile u8 __iomem *addr) +static inline u8 rd_reg_byte(const volatile u8 __iomem *addr) { return readb(addr); } -static inline u16 RD_REG_WORD(const volatile __le16 __iomem *addr) +static inline u16 rd_reg_word(const volatile __le16 __iomem *addr) { return readw(addr); } -static inline u32 RD_REG_DWORD(const volatile __le32 __iomem *addr) +static inline u32 rd_reg_dword(const volatile __le32 __iomem *addr) { return readl(addr); } -static inline u8 RD_REG_BYTE_RELAXED(const volatile u8 __iomem *addr) +static inline u8 rd_reg_byte_relaxed(const volatile u8 __iomem *addr) { return readb_relaxed(addr); } -static inline u16 RD_REG_WORD_RELAXED(const volatile __le16 __iomem *addr) +static inline u16 rd_reg_word_relaxed(const volatile __le16 __iomem *addr) { return readw_relaxed(addr); } -static inline u32 RD_REG_DWORD_RELAXED(const volatile __le32 __iomem *addr) +static inline u32 rd_reg_dword_relaxed(const volatile __le32 __iomem *addr) { return readl_relaxed(addr); } -static inline void WRT_REG_BYTE(volatile u8 __iomem *addr, u8 data) +static inline void wrt_reg_byte(volatile u8 __iomem *addr, u8 data) { return writeb(data, addr); } -static inline void WRT_REG_WORD(volatile __le16 __iomem *addr, u16 data) +static inline void wrt_reg_word(volatile __le16 __iomem *addr, u16 data) { return writew(data, addr); } -static inline void WRT_REG_DWORD(volatile __le32 __iomem *addr, u32 data) +static inline void wrt_reg_dword(volatile __le32 __iomem *addr, u32 data) { return writel(data, addr); } @@ -956,18 +956,18 @@ typedef union { &(reg)->u_end.isp2200.mailbox8 + (num) - 8) : \ &(reg)->u.isp2300.mailbox0 + (num)) #define RD_MAILBOX_REG(ha, reg, num) \ - RD_REG_WORD(MAILBOX_REG(ha, reg, num)) + rd_reg_word(MAILBOX_REG(ha, reg, num)) #define WRT_MAILBOX_REG(ha, reg, num, data) \ - WRT_REG_WORD(MAILBOX_REG(ha, reg, num), data) + wrt_reg_word(MAILBOX_REG(ha, reg, num), data) #define FB_CMD_REG(ha, reg) \ (IS_QLA2100(ha) || IS_QLA2200(ha) ? \ &(reg)->fb_cmd_2100 : \ &(reg)->u.isp2300.fb_cmd) #define RD_FB_CMD_REG(ha, reg) \ - RD_REG_WORD(FB_CMD_REG(ha, reg)) + rd_reg_word(FB_CMD_REG(ha, reg)) #define WRT_FB_CMD_REG(ha, reg, data) \ - WRT_REG_WORD(FB_CMD_REG(ha, reg), data) + wrt_reg_word(FB_CMD_REG(ha, reg), data) typedef struct { uint32_t out_mb; /* outbound from driver */ diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index a1018f5f53de..02614e28451b 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -2219,7 +2219,7 @@ qla2x00_initialize_adapter(scsi_qla_host_t *vha) /* Check for secure flash support */ if (IS_QLA28XX(ha)) { - if (RD_REG_WORD(®->mailbox12) & BIT_0) + if (rd_reg_word(®->mailbox12) & BIT_0) ha->flags.secure_adapter = 1; ql_log(ql_log_info, vha, 0xffff, "Secure Adapter: %s\n", (ha->flags.secure_adapter) ? "Yes" : "No"); @@ -2357,7 +2357,7 @@ qla2100_pci_config(scsi_qla_host_t *vha) /* Get PCI bus information. */ spin_lock_irqsave(&ha->hardware_lock, flags); - ha->pci_attr = RD_REG_WORD(®->ctrl_status); + ha->pci_attr = rd_reg_word(®->ctrl_status); spin_unlock_irqrestore(&ha->hardware_lock, flags); return QLA_SUCCESS; @@ -2399,17 +2399,17 @@ qla2300_pci_config(scsi_qla_host_t *vha) spin_lock_irqsave(&ha->hardware_lock, flags); /* Pause RISC. */ - WRT_REG_WORD(®->hccr, HCCR_PAUSE_RISC); + wrt_reg_word(®->hccr, HCCR_PAUSE_RISC); for (cnt = 0; cnt < 30000; cnt++) { - if ((RD_REG_WORD(®->hccr) & HCCR_RISC_PAUSE) != 0) + if ((rd_reg_word(®->hccr) & HCCR_RISC_PAUSE) != 0) break; udelay(10); } /* Select FPM registers. */ - WRT_REG_WORD(®->ctrl_status, 0x20); - RD_REG_WORD(®->ctrl_status); + wrt_reg_word(®->ctrl_status, 0x20); + rd_reg_word(®->ctrl_status); /* Get the fb rev level */ ha->fb_rev = RD_FB_CMD_REG(ha, reg); @@ -2418,13 +2418,13 @@ qla2300_pci_config(scsi_qla_host_t *vha) pci_clear_mwi(ha->pdev); /* Deselect FPM registers. */ - WRT_REG_WORD(®->ctrl_status, 0x0); - RD_REG_WORD(®->ctrl_status); + wrt_reg_word(®->ctrl_status, 0x0); + rd_reg_word(®->ctrl_status); /* Release RISC module. */ - WRT_REG_WORD(®->hccr, HCCR_RELEASE_RISC); + wrt_reg_word(®->hccr, HCCR_RELEASE_RISC); for (cnt = 0; cnt < 30000; cnt++) { - if ((RD_REG_WORD(®->hccr) & HCCR_RISC_PAUSE) == 0) + if ((rd_reg_word(®->hccr) & HCCR_RISC_PAUSE) == 0) break; udelay(10); @@ -2439,7 +2439,7 @@ qla2300_pci_config(scsi_qla_host_t *vha) /* Get PCI bus information. */ spin_lock_irqsave(&ha->hardware_lock, flags); - ha->pci_attr = RD_REG_WORD(®->ctrl_status); + ha->pci_attr = rd_reg_word(®->ctrl_status); spin_unlock_irqrestore(&ha->hardware_lock, flags); return QLA_SUCCESS; @@ -2483,7 +2483,7 @@ qla24xx_pci_config(scsi_qla_host_t *vha) /* Get PCI bus information. */ spin_lock_irqsave(&ha->hardware_lock, flags); - ha->pci_attr = RD_REG_DWORD(®->ctrl_status); + ha->pci_attr = rd_reg_dword(®->ctrl_status); spin_unlock_irqrestore(&ha->hardware_lock, flags); return QLA_SUCCESS; @@ -2587,36 +2587,36 @@ qla2x00_reset_chip(scsi_qla_host_t *vha) if (!IS_QLA2100(ha)) { /* Pause RISC. */ - WRT_REG_WORD(®->hccr, HCCR_PAUSE_RISC); + wrt_reg_word(®->hccr, HCCR_PAUSE_RISC); if (IS_QLA2200(ha) || IS_QLA2300(ha)) { for (cnt = 0; cnt < 30000; cnt++) { - if ((RD_REG_WORD(®->hccr) & + if ((rd_reg_word(®->hccr) & HCCR_RISC_PAUSE) != 0) break; udelay(100); } } else { - RD_REG_WORD(®->hccr); /* PCI Posting. */ + rd_reg_word(®->hccr); /* PCI Posting. */ udelay(10); } /* Select FPM registers. */ - WRT_REG_WORD(®->ctrl_status, 0x20); - RD_REG_WORD(®->ctrl_status); /* PCI Posting. */ + wrt_reg_word(®->ctrl_status, 0x20); + rd_reg_word(®->ctrl_status); /* PCI Posting. */ /* FPM Soft Reset. */ - WRT_REG_WORD(®->fpm_diag_config, 0x100); - RD_REG_WORD(®->fpm_diag_config); /* PCI Posting. */ + wrt_reg_word(®->fpm_diag_config, 0x100); + rd_reg_word(®->fpm_diag_config); /* PCI Posting. */ /* Toggle Fpm Reset. */ if (!IS_QLA2200(ha)) { - WRT_REG_WORD(®->fpm_diag_config, 0x0); - RD_REG_WORD(®->fpm_diag_config); /* PCI Posting. */ + wrt_reg_word(®->fpm_diag_config, 0x0); + rd_reg_word(®->fpm_diag_config); /* PCI Posting. */ } /* Select frame buffer registers. */ - WRT_REG_WORD(®->ctrl_status, 0x10); - RD_REG_WORD(®->ctrl_status); /* PCI Posting. */ + wrt_reg_word(®->ctrl_status, 0x10); + rd_reg_word(®->ctrl_status); /* PCI Posting. */ /* Reset frame buffer FIFOs. */ if (IS_QLA2200(ha)) { @@ -2634,23 +2634,23 @@ qla2x00_reset_chip(scsi_qla_host_t *vha) } /* Select RISC module registers. */ - WRT_REG_WORD(®->ctrl_status, 0); - RD_REG_WORD(®->ctrl_status); /* PCI Posting. */ + wrt_reg_word(®->ctrl_status, 0); + rd_reg_word(®->ctrl_status); /* PCI Posting. */ /* Reset RISC processor. */ - WRT_REG_WORD(®->hccr, HCCR_RESET_RISC); - RD_REG_WORD(®->hccr); /* PCI Posting. */ + wrt_reg_word(®->hccr, HCCR_RESET_RISC); + rd_reg_word(®->hccr); /* PCI Posting. */ /* Release RISC processor. */ - WRT_REG_WORD(®->hccr, HCCR_RELEASE_RISC); - RD_REG_WORD(®->hccr); /* PCI Posting. */ + wrt_reg_word(®->hccr, HCCR_RELEASE_RISC); + rd_reg_word(®->hccr); /* PCI Posting. */ } - WRT_REG_WORD(®->hccr, HCCR_CLR_RISC_INT); - WRT_REG_WORD(®->hccr, HCCR_CLR_HOST_INT); + wrt_reg_word(®->hccr, HCCR_CLR_RISC_INT); + wrt_reg_word(®->hccr, HCCR_CLR_HOST_INT); /* Reset ISP chip. */ - WRT_REG_WORD(®->ctrl_status, CSR_ISP_SOFT_RESET); + wrt_reg_word(®->ctrl_status, CSR_ISP_SOFT_RESET); /* Wait for RISC to recover from reset. */ if (IS_QLA2100(ha) || IS_QLA2200(ha) || IS_QLA2300(ha)) { @@ -2661,7 +2661,7 @@ qla2x00_reset_chip(scsi_qla_host_t *vha) */ udelay(20); for (cnt = 30000; cnt; cnt--) { - if ((RD_REG_WORD(®->ctrl_status) & + if ((rd_reg_word(®->ctrl_status) & CSR_ISP_SOFT_RESET) == 0) break; udelay(100); @@ -2670,13 +2670,13 @@ qla2x00_reset_chip(scsi_qla_host_t *vha) udelay(10); /* Reset RISC processor. */ - WRT_REG_WORD(®->hccr, HCCR_RESET_RISC); + wrt_reg_word(®->hccr, HCCR_RESET_RISC); - WRT_REG_WORD(®->semaphore, 0); + wrt_reg_word(®->semaphore, 0); /* Release RISC processor. */ - WRT_REG_WORD(®->hccr, HCCR_RELEASE_RISC); - RD_REG_WORD(®->hccr); /* PCI Posting. */ + wrt_reg_word(®->hccr, HCCR_RELEASE_RISC); + rd_reg_word(®->hccr); /* PCI Posting. */ if (IS_QLA2100(ha) || IS_QLA2200(ha) || IS_QLA2300(ha)) { for (cnt = 0; cnt < 30000; cnt++) { @@ -2694,8 +2694,8 @@ qla2x00_reset_chip(scsi_qla_host_t *vha) /* Disable RISC pause on FPM parity error. */ if (!IS_QLA2100(ha)) { - WRT_REG_WORD(®->hccr, HCCR_DISABLE_PARITY_PAUSE); - RD_REG_WORD(®->hccr); /* PCI Posting. */ + wrt_reg_word(®->hccr, HCCR_DISABLE_PARITY_PAUSE); + rd_reg_word(®->hccr); /* PCI Posting. */ } spin_unlock_irqrestore(&ha->hardware_lock, flags); @@ -2740,32 +2740,32 @@ qla24xx_reset_risc(scsi_qla_host_t *vha) spin_lock_irqsave(&ha->hardware_lock, flags); /* Reset RISC. */ - WRT_REG_DWORD(®->ctrl_status, CSRX_DMA_SHUTDOWN|MWB_4096_BYTES); + wrt_reg_dword(®->ctrl_status, CSRX_DMA_SHUTDOWN|MWB_4096_BYTES); for (cnt = 0; cnt < 30000; cnt++) { - if ((RD_REG_DWORD(®->ctrl_status) & CSRX_DMA_ACTIVE) == 0) + if ((rd_reg_dword(®->ctrl_status) & CSRX_DMA_ACTIVE) == 0) break; udelay(10); } - if (!(RD_REG_DWORD(®->ctrl_status) & CSRX_DMA_ACTIVE)) + if (!(rd_reg_dword(®->ctrl_status) & CSRX_DMA_ACTIVE)) set_bit(DMA_SHUTDOWN_CMPL, &ha->fw_dump_cap_flags); ql_dbg(ql_dbg_init + ql_dbg_verbose, vha, 0x017e, "HCCR: 0x%x, Control Status %x, DMA active status:0x%x\n", - RD_REG_DWORD(®->hccr), - RD_REG_DWORD(®->ctrl_status), - (RD_REG_DWORD(®->ctrl_status) & CSRX_DMA_ACTIVE)); + rd_reg_dword(®->hccr), + rd_reg_dword(®->ctrl_status), + (rd_reg_dword(®->ctrl_status) & CSRX_DMA_ACTIVE)); - WRT_REG_DWORD(®->ctrl_status, + wrt_reg_dword(®->ctrl_status, CSRX_ISP_SOFT_RESET|CSRX_DMA_SHUTDOWN|MWB_4096_BYTES); pci_read_config_word(ha->pdev, PCI_COMMAND, &wd); udelay(100); /* Wait for firmware to complete NVRAM accesses. */ - RD_REG_WORD(®->mailbox0); - for (cnt = 10000; RD_REG_WORD(®->mailbox0) != 0 && + rd_reg_word(®->mailbox0); + for (cnt = 10000; rd_reg_word(®->mailbox0) != 0 && rval == QLA_SUCCESS; cnt--) { barrier(); if (cnt) @@ -2779,26 +2779,26 @@ qla24xx_reset_risc(scsi_qla_host_t *vha) ql_dbg(ql_dbg_init + ql_dbg_verbose, vha, 0x017f, "HCCR: 0x%x, MailBox0 Status 0x%x\n", - RD_REG_DWORD(®->hccr), - RD_REG_WORD(®->mailbox0)); + rd_reg_dword(®->hccr), + rd_reg_word(®->mailbox0)); /* Wait for soft-reset to complete. */ - RD_REG_DWORD(®->ctrl_status); + rd_reg_dword(®->ctrl_status); for (cnt = 0; cnt < 60; cnt++) { barrier(); - if ((RD_REG_DWORD(®->ctrl_status) & + if ((rd_reg_dword(®->ctrl_status) & CSRX_ISP_SOFT_RESET) == 0) break; udelay(5); } - if (!(RD_REG_DWORD(®->ctrl_status) & CSRX_ISP_SOFT_RESET)) + if (!(rd_reg_dword(®->ctrl_status) & CSRX_ISP_SOFT_RESET)) set_bit(ISP_SOFT_RESET_CMPL, &ha->fw_dump_cap_flags); ql_dbg(ql_dbg_init + ql_dbg_verbose, vha, 0x015d, "HCCR: 0x%x, Soft Reset status: 0x%x\n", - RD_REG_DWORD(®->hccr), - RD_REG_DWORD(®->ctrl_status)); + rd_reg_dword(®->hccr), + rd_reg_dword(®->ctrl_status)); /* If required, do an MPI FW reset now */ if (test_and_clear_bit(MPI_RESET_NEEDED, &vha->dpc_flags)) { @@ -2817,17 +2817,17 @@ qla24xx_reset_risc(scsi_qla_host_t *vha) } } - WRT_REG_DWORD(®->hccr, HCCRX_SET_RISC_RESET); - RD_REG_DWORD(®->hccr); + wrt_reg_dword(®->hccr, HCCRX_SET_RISC_RESET); + rd_reg_dword(®->hccr); - WRT_REG_DWORD(®->hccr, HCCRX_REL_RISC_PAUSE); - RD_REG_DWORD(®->hccr); + wrt_reg_dword(®->hccr, HCCRX_REL_RISC_PAUSE); + rd_reg_dword(®->hccr); - WRT_REG_DWORD(®->hccr, HCCRX_CLR_RISC_RESET); - RD_REG_DWORD(®->hccr); + wrt_reg_dword(®->hccr, HCCRX_CLR_RISC_RESET); + rd_reg_dword(®->hccr); - RD_REG_WORD(®->mailbox0); - for (cnt = 60; RD_REG_WORD(®->mailbox0) != 0 && + rd_reg_word(®->mailbox0); + for (cnt = 60; rd_reg_word(®->mailbox0) != 0 && rval == QLA_SUCCESS; cnt--) { barrier(); if (cnt) @@ -2840,8 +2840,8 @@ qla24xx_reset_risc(scsi_qla_host_t *vha) ql_dbg(ql_dbg_init + ql_dbg_verbose, vha, 0x015e, "Host Risc 0x%x, mailbox0 0x%x\n", - RD_REG_DWORD(®->hccr), - RD_REG_WORD(®->mailbox0)); + rd_reg_dword(®->hccr), + rd_reg_word(®->mailbox0)); spin_unlock_irqrestore(&ha->hardware_lock, flags); @@ -2860,9 +2860,8 @@ qla25xx_read_risc_sema_reg(scsi_qla_host_t *vha, uint32_t *data) { struct device_reg_24xx __iomem *reg = &vha->hw->iobase->isp24; - WRT_REG_DWORD(®->iobase_addr, RISC_REGISTER_BASE_OFFSET); - *data = RD_REG_DWORD(®->iobase_window + RISC_REGISTER_WINDOW_OFFSET); - + wrt_reg_dword(®->iobase_addr, RISC_REGISTER_BASE_OFFSET); + *data = rd_reg_dword(®->iobase_window + RISC_REGISTER_WINDOW_OFFSET); } static void @@ -2870,8 +2869,8 @@ qla25xx_write_risc_sema_reg(scsi_qla_host_t *vha, uint32_t data) { struct device_reg_24xx __iomem *reg = &vha->hw->iobase->isp24; - WRT_REG_DWORD(®->iobase_addr, RISC_REGISTER_BASE_OFFSET); - WRT_REG_DWORD(®->iobase_window + RISC_REGISTER_WINDOW_OFFSET, data); + wrt_reg_dword(®->iobase_addr, RISC_REGISTER_BASE_OFFSET); + wrt_reg_dword(®->iobase_window + RISC_REGISTER_WINDOW_OFFSET, data); } static void @@ -2887,7 +2886,7 @@ qla25xx_manipulate_risc_semaphore(scsi_qla_host_t *vha) vha->hw->pdev->subsystem_device != 0x0240) return; - WRT_REG_DWORD(&vha->hw->iobase->isp24.hccr, HCCRX_SET_RISC_PAUSE); + wrt_reg_dword(&vha->hw->iobase->isp24.hccr, HCCRX_SET_RISC_PAUSE); udelay(100); attempt: @@ -2989,7 +2988,7 @@ qla2x00_chip_diag(scsi_qla_host_t *vha) spin_lock_irqsave(&ha->hardware_lock, flags); /* Reset ISP chip. */ - WRT_REG_WORD(®->ctrl_status, CSR_ISP_SOFT_RESET); + wrt_reg_word(®->ctrl_status, CSR_ISP_SOFT_RESET); /* * We need to have a delay here since the card will not respond while @@ -2999,7 +2998,7 @@ qla2x00_chip_diag(scsi_qla_host_t *vha) data = qla2x00_debounce_register(®->ctrl_status); for (cnt = 6000000 ; cnt && (data & CSR_ISP_SOFT_RESET); cnt--) { udelay(5); - data = RD_REG_WORD(®->ctrl_status); + data = rd_reg_word(®->ctrl_status); barrier(); } @@ -3010,8 +3009,8 @@ qla2x00_chip_diag(scsi_qla_host_t *vha) "Reset register cleared by chip reset.\n"); /* Reset RISC processor. */ - WRT_REG_WORD(®->hccr, HCCR_RESET_RISC); - WRT_REG_WORD(®->hccr, HCCR_RELEASE_RISC); + wrt_reg_word(®->hccr, HCCR_RESET_RISC); + wrt_reg_word(®->hccr, HCCR_RELEASE_RISC); /* Workaround for QLA2312 PCI parity error */ if (IS_QLA2100(ha) || IS_QLA2200(ha) || IS_QLA2300(ha)) { @@ -3654,8 +3653,8 @@ qla2x00_setup_chip(scsi_qla_host_t *vha) if (!IS_FWI2_CAPABLE(ha) && !IS_QLA2100(ha) && !IS_QLA2200(ha)) { /* Disable SRAM, Instruction RAM and GP RAM parity. */ spin_lock_irqsave(&ha->hardware_lock, flags); - WRT_REG_WORD(®->hccr, (HCCR_ENABLE_PARITY + 0x0)); - RD_REG_WORD(®->hccr); + wrt_reg_word(®->hccr, (HCCR_ENABLE_PARITY + 0x0)); + rd_reg_word(®->hccr); spin_unlock_irqrestore(&ha->hardware_lock, flags); } @@ -3762,11 +3761,11 @@ qla2x00_setup_chip(scsi_qla_host_t *vha) spin_lock_irqsave(&ha->hardware_lock, flags); if (IS_QLA2300(ha)) /* SRAM parity */ - WRT_REG_WORD(®->hccr, HCCR_ENABLE_PARITY + 0x1); + wrt_reg_word(®->hccr, HCCR_ENABLE_PARITY + 0x1); else /* SRAM, Instruction RAM and GP RAM parity */ - WRT_REG_WORD(®->hccr, HCCR_ENABLE_PARITY + 0x7); - RD_REG_WORD(®->hccr); + wrt_reg_word(®->hccr, HCCR_ENABLE_PARITY + 0x7); + rd_reg_word(®->hccr); spin_unlock_irqrestore(&ha->hardware_lock, flags); } @@ -4010,11 +4009,11 @@ qla2x00_config_rings(struct scsi_qla_host *vha) put_unaligned_le64(req->dma, &ha->init_cb->request_q_address); put_unaligned_le64(rsp->dma, &ha->init_cb->response_q_address); - WRT_REG_WORD(ISP_REQ_Q_IN(ha, reg), 0); - WRT_REG_WORD(ISP_REQ_Q_OUT(ha, reg), 0); - WRT_REG_WORD(ISP_RSP_Q_IN(ha, reg), 0); - WRT_REG_WORD(ISP_RSP_Q_OUT(ha, reg), 0); - RD_REG_WORD(ISP_RSP_Q_OUT(ha, reg)); /* PCI Posting. */ + wrt_reg_word(ISP_REQ_Q_IN(ha, reg), 0); + wrt_reg_word(ISP_REQ_Q_OUT(ha, reg), 0); + wrt_reg_word(ISP_RSP_Q_IN(ha, reg), 0); + wrt_reg_word(ISP_RSP_Q_OUT(ha, reg), 0); + rd_reg_word(ISP_RSP_Q_OUT(ha, reg)); /* PCI Posting. */ } void @@ -4076,15 +4075,15 @@ qla24xx_config_rings(struct scsi_qla_host *vha) } icb->firmware_options_2 |= cpu_to_le32(BIT_23); - WRT_REG_DWORD(®->isp25mq.req_q_in, 0); - WRT_REG_DWORD(®->isp25mq.req_q_out, 0); - WRT_REG_DWORD(®->isp25mq.rsp_q_in, 0); - WRT_REG_DWORD(®->isp25mq.rsp_q_out, 0); + wrt_reg_dword(®->isp25mq.req_q_in, 0); + wrt_reg_dword(®->isp25mq.req_q_out, 0); + wrt_reg_dword(®->isp25mq.rsp_q_in, 0); + wrt_reg_dword(®->isp25mq.rsp_q_out, 0); } else { - WRT_REG_DWORD(®->isp24.req_q_in, 0); - WRT_REG_DWORD(®->isp24.req_q_out, 0); - WRT_REG_DWORD(®->isp24.rsp_q_in, 0); - WRT_REG_DWORD(®->isp24.rsp_q_out, 0); + wrt_reg_dword(®->isp24.req_q_in, 0); + wrt_reg_dword(®->isp24.req_q_out, 0); + wrt_reg_dword(®->isp24.rsp_q_in, 0); + wrt_reg_dword(®->isp24.rsp_q_out, 0); } qlt_24xx_config_rings(vha); @@ -4098,7 +4097,7 @@ qla24xx_config_rings(struct scsi_qla_host *vha) } /* PCI posting */ - RD_REG_WORD(&ioreg->hccr); + rd_reg_word(&ioreg->hccr); } /** @@ -4569,7 +4568,7 @@ qla2x00_nvram_config(scsi_qla_host_t *vha) ha->nvram_size = sizeof(*nv); ha->nvram_base = 0; if (!IS_QLA2100(ha) && !IS_QLA2200(ha) && !IS_QLA2300(ha)) - if ((RD_REG_WORD(®->ctrl_status) >> 14) == 1) + if ((rd_reg_word(®->ctrl_status) >> 14) == 1) ha->nvram_base = 0x80; /* Get NVRAM data and calculate checksum. */ @@ -7090,10 +7089,10 @@ qla2x00_reset_adapter(scsi_qla_host_t *vha) ha->isp_ops->disable_intrs(ha); spin_lock_irqsave(&ha->hardware_lock, flags); - WRT_REG_WORD(®->hccr, HCCR_RESET_RISC); - RD_REG_WORD(®->hccr); /* PCI Posting. */ - WRT_REG_WORD(®->hccr, HCCR_RELEASE_RISC); - RD_REG_WORD(®->hccr); /* PCI Posting. */ + wrt_reg_word(®->hccr, HCCR_RESET_RISC); + rd_reg_word(®->hccr); /* PCI Posting. */ + wrt_reg_word(®->hccr, HCCR_RELEASE_RISC); + rd_reg_word(®->hccr); /* PCI Posting. */ spin_unlock_irqrestore(&ha->hardware_lock, flags); return QLA_SUCCESS; @@ -7114,10 +7113,10 @@ qla24xx_reset_adapter(scsi_qla_host_t *vha) ha->isp_ops->disable_intrs(ha); spin_lock_irqsave(&ha->hardware_lock, flags); - WRT_REG_DWORD(®->hccr, HCCRX_SET_RISC_RESET); - RD_REG_DWORD(®->hccr); - WRT_REG_DWORD(®->hccr, HCCRX_REL_RISC_PAUSE); - RD_REG_DWORD(®->hccr); + wrt_reg_dword(®->hccr, HCCRX_SET_RISC_RESET); + rd_reg_dword(®->hccr); + wrt_reg_dword(®->hccr, HCCRX_REL_RISC_PAUSE); + rd_reg_dword(®->hccr); spin_unlock_irqrestore(&ha->hardware_lock, flags); if (IS_NOPOLLING_TYPE(ha)) diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h index 364b3db8b2dc..cd3c15086c70 100644 --- a/drivers/scsi/qla2xxx/qla_inline.h +++ b/drivers/scsi/qla2xxx/qla_inline.h @@ -46,10 +46,10 @@ qla2x00_debounce_register(volatile uint16_t __iomem *addr) volatile uint16_t second; do { - first = RD_REG_WORD(addr); + first = rd_reg_word(addr); barrier(); cpu_relax(); - second = RD_REG_WORD(addr); + second = rd_reg_word(addr); } while (first != second); return (first); @@ -329,7 +329,7 @@ qla_83xx_start_iocbs(struct qla_qpair *qpair) } else req->ring_ptr++; - WRT_REG_DWORD(req->req_q_in, req->ring_index); + wrt_reg_dword(req->req_q_in, req->ring_index); } static inline int diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 4d8039fc02e7..3e31a175304c 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -376,7 +376,7 @@ qla2x00_start_scsi(srb_t *sp) /* Calculate the number of request entries needed. */ req_cnt = ha->isp_ops->calc_req_entries(tot_dsds); if (req->cnt < (req_cnt + 2)) { - cnt = RD_REG_WORD_RELAXED(ISP_REQ_Q_OUT(ha, reg)); + cnt = rd_reg_word_relaxed(ISP_REQ_Q_OUT(ha, reg)); if (req->ring_index < cnt) req->cnt = cnt - req->ring_index; else @@ -428,8 +428,8 @@ qla2x00_start_scsi(srb_t *sp) sp->flags |= SRB_DMA_VALID; /* Set chip new ring index. */ - WRT_REG_WORD(ISP_REQ_Q_IN(ha, reg), req->ring_index); - RD_REG_WORD_RELAXED(ISP_REQ_Q_IN(ha, reg)); /* PCI Posting. */ + wrt_reg_word(ISP_REQ_Q_IN(ha, reg), req->ring_index); + rd_reg_word_relaxed(ISP_REQ_Q_IN(ha, reg)); /* PCI Posting. */ /* Manage unprocessed RIO/ZIO commands in response queue. */ if (vha->flags.process_response_queue && @@ -472,21 +472,21 @@ qla2x00_start_iocbs(struct scsi_qla_host *vha, struct req_que *req) /* Set chip new ring index. */ if (ha->mqenable || IS_QLA27XX(ha) || IS_QLA28XX(ha)) { - WRT_REG_DWORD(req->req_q_in, req->ring_index); + wrt_reg_dword(req->req_q_in, req->ring_index); } else if (IS_QLA83XX(ha)) { - WRT_REG_DWORD(req->req_q_in, req->ring_index); - RD_REG_DWORD_RELAXED(&ha->iobase->isp24.hccr); + wrt_reg_dword(req->req_q_in, req->ring_index); + rd_reg_dword_relaxed(&ha->iobase->isp24.hccr); } else if (IS_QLAFX00(ha)) { - WRT_REG_DWORD(®->ispfx00.req_q_in, req->ring_index); - RD_REG_DWORD_RELAXED(®->ispfx00.req_q_in); + wrt_reg_dword(®->ispfx00.req_q_in, req->ring_index); + rd_reg_dword_relaxed(®->ispfx00.req_q_in); QLAFX00_SET_HST_INTR(ha, ha->rqstq_intr_code); } else if (IS_FWI2_CAPABLE(ha)) { - WRT_REG_DWORD(®->isp24.req_q_in, req->ring_index); - RD_REG_DWORD_RELAXED(®->isp24.req_q_in); + wrt_reg_dword(®->isp24.req_q_in, req->ring_index); + rd_reg_dword_relaxed(®->isp24.req_q_in); } else { - WRT_REG_WORD(ISP_REQ_Q_IN(ha, ®->isp), + wrt_reg_word(ISP_REQ_Q_IN(ha, ®->isp), req->ring_index); - RD_REG_WORD_RELAXED(ISP_REQ_Q_IN(ha, ®->isp)); + rd_reg_word_relaxed(ISP_REQ_Q_IN(ha, ®->isp)); } } } @@ -1637,7 +1637,7 @@ qla24xx_start_scsi(srb_t *sp) req_cnt = qla24xx_calc_iocbs(vha, tot_dsds); if (req->cnt < (req_cnt + 2)) { cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr : - RD_REG_DWORD_RELAXED(req->req_q_out); + rd_reg_dword_relaxed(req->req_q_out); if (req->ring_index < cnt) req->cnt = cnt - req->ring_index; else @@ -1698,7 +1698,7 @@ qla24xx_start_scsi(srb_t *sp) sp->flags |= SRB_DMA_VALID; /* Set chip new ring index. */ - WRT_REG_DWORD(req->req_q_in, req->ring_index); + wrt_reg_dword(req->req_q_in, req->ring_index); spin_unlock_irqrestore(&ha->hardware_lock, flags); return QLA_SUCCESS; @@ -1822,7 +1822,7 @@ qla24xx_dif_start_scsi(srb_t *sp) tot_dsds += nseg; if (req->cnt < (req_cnt + 2)) { cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr : - RD_REG_DWORD_RELAXED(req->req_q_out); + rd_reg_dword_relaxed(req->req_q_out); if (req->ring_index < cnt) req->cnt = cnt - req->ring_index; else @@ -1881,7 +1881,7 @@ qla24xx_dif_start_scsi(srb_t *sp) req->ring_ptr++; /* Set chip new ring index. */ - WRT_REG_DWORD(req->req_q_in, req->ring_index); + wrt_reg_dword(req->req_q_in, req->ring_index); spin_unlock_irqrestore(&ha->hardware_lock, flags); @@ -1957,7 +1957,7 @@ qla2xxx_start_scsi_mq(srb_t *sp) req_cnt = qla24xx_calc_iocbs(vha, tot_dsds); if (req->cnt < (req_cnt + 2)) { cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr : - RD_REG_DWORD_RELAXED(req->req_q_out); + rd_reg_dword_relaxed(req->req_q_out); if (req->ring_index < cnt) req->cnt = cnt - req->ring_index; else @@ -2018,7 +2018,7 @@ qla2xxx_start_scsi_mq(srb_t *sp) sp->flags |= SRB_DMA_VALID; /* Set chip new ring index. */ - WRT_REG_DWORD(req->req_q_in, req->ring_index); + wrt_reg_dword(req->req_q_in, req->ring_index); spin_unlock_irqrestore(&qpair->qp_lock, flags); return QLA_SUCCESS; @@ -2157,7 +2157,7 @@ qla2xxx_dif_start_scsi_mq(srb_t *sp) tot_dsds += nseg; if (req->cnt < (req_cnt + 2)) { cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr : - RD_REG_DWORD_RELAXED(req->req_q_out); + rd_reg_dword_relaxed(req->req_q_out); if (req->ring_index < cnt) req->cnt = cnt - req->ring_index; else @@ -2214,7 +2214,7 @@ qla2xxx_dif_start_scsi_mq(srb_t *sp) req->ring_ptr++; /* Set chip new ring index. */ - WRT_REG_DWORD(req->req_q_in, req->ring_index); + wrt_reg_dword(req->req_q_in, req->ring_index); /* Manage unprocessed RIO/ZIO commands in response queue. */ if (vha->flags.process_response_queue && @@ -2266,13 +2266,13 @@ __qla2x00_alloc_iocbs(struct qla_qpair *qpair, srb_t *sp) cnt = *req->out_ptr; else if (ha->mqenable || IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) - cnt = RD_REG_DWORD(®->isp25mq.req_q_out); + cnt = rd_reg_dword(®->isp25mq.req_q_out); else if (IS_P3P_TYPE(ha)) - cnt = RD_REG_DWORD(reg->isp82.req_q_out); + cnt = rd_reg_dword(reg->isp82.req_q_out); else if (IS_FWI2_CAPABLE(ha)) - cnt = RD_REG_DWORD(®->isp24.req_q_out); + cnt = rd_reg_dword(®->isp24.req_q_out); else if (IS_QLAFX00(ha)) - cnt = RD_REG_DWORD(®->ispfx00.req_q_out); + cnt = rd_reg_dword(®->ispfx00.req_q_out); else cnt = qla2x00_debounce_register( ISP_REQ_Q_OUT(ha, ®->isp)); @@ -2305,8 +2305,8 @@ __qla2x00_alloc_iocbs(struct qla_qpair *qpair, srb_t *sp) pkt = req->ring_ptr; memset(pkt, 0, REQUEST_ENTRY_SIZE); if (IS_QLAFX00(ha)) { - WRT_REG_BYTE((void __iomem *)&pkt->entry_count, req_cnt); - WRT_REG_WORD((void __iomem *)&pkt->handle, handle); + wrt_reg_byte((void __iomem *)&pkt->entry_count, req_cnt); + wrt_reg_word((void __iomem *)&pkt->handle, handle); } else { pkt->entry_count = req_cnt; pkt->handle = handle; @@ -3310,7 +3310,7 @@ qla82xx_start_scsi(srb_t *sp) req_cnt = 1; if (req->cnt < (req_cnt + 2)) { - cnt = (uint16_t)RD_REG_DWORD_RELAXED( + cnt = (uint16_t)rd_reg_dword_relaxed( ®->req_q_out[0]); if (req->ring_index < cnt) req->cnt = cnt - req->ring_index; @@ -3419,7 +3419,7 @@ qla82xx_start_scsi(srb_t *sp) req_cnt = qla24xx_calc_iocbs(vha, tot_dsds); if (req->cnt < (req_cnt + 2)) { - cnt = (uint16_t)RD_REG_DWORD_RELAXED( + cnt = (uint16_t)rd_reg_dword_relaxed( ®->req_q_out[0]); if (req->ring_index < cnt) req->cnt = cnt - req->ring_index; @@ -3495,10 +3495,10 @@ qla82xx_start_scsi(srb_t *sp) if (ql2xdbwr) qla82xx_wr_32(ha, (uintptr_t __force)ha->nxdb_wr_ptr, dbval); else { - WRT_REG_DWORD(ha->nxdb_wr_ptr, dbval); + wrt_reg_dword(ha->nxdb_wr_ptr, dbval); wmb(); - while (RD_REG_DWORD(ha->nxdb_rd_ptr) != dbval) { - WRT_REG_DWORD(ha->nxdb_wr_ptr, dbval); + while (rd_reg_dword(ha->nxdb_rd_ptr) != dbval) { + wrt_reg_dword(ha->nxdb_wr_ptr, dbval); wmb(); } } @@ -3894,7 +3894,7 @@ qla2x00_start_bidir(srb_t *sp, struct scsi_qla_host *vha, uint32_t tot_dsds) /* Check for room on request queue. */ if (req->cnt < req_cnt + 2) { cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr : - RD_REG_DWORD_RELAXED(req->req_q_out); + rd_reg_dword_relaxed(req->req_q_out); if (req->ring_index < cnt) req->cnt = cnt - req->ring_index; else diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index e7d94ac7e073..87d0f5e4d81a 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -205,7 +205,7 @@ qla2100_intr_handler(int irq, void *dev_id) spin_lock_irqsave(&ha->hardware_lock, flags); vha = pci_get_drvdata(ha->pdev); for (iter = 50; iter--; ) { - hccr = RD_REG_WORD(®->hccr); + hccr = rd_reg_word(®->hccr); if (qla2x00_check_reg16_for_disconnect(vha, hccr)) break; if (hccr & HCCR_RISC_PAUSE) { @@ -217,18 +217,18 @@ qla2100_intr_handler(int irq, void *dev_id) * bit to be cleared. Schedule a big hammer to get * out of the RISC PAUSED state. */ - WRT_REG_WORD(®->hccr, HCCR_RESET_RISC); - RD_REG_WORD(®->hccr); + wrt_reg_word(®->hccr, HCCR_RESET_RISC); + rd_reg_word(®->hccr); ha->isp_ops->fw_dump(vha); set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); break; - } else if ((RD_REG_WORD(®->istatus) & ISR_RISC_INT) == 0) + } else if ((rd_reg_word(®->istatus) & ISR_RISC_INT) == 0) break; - if (RD_REG_WORD(®->semaphore) & BIT_0) { - WRT_REG_WORD(®->hccr, HCCR_CLR_RISC_INT); - RD_REG_WORD(®->hccr); + if (rd_reg_word(®->semaphore) & BIT_0) { + wrt_reg_word(®->hccr, HCCR_CLR_RISC_INT); + rd_reg_word(®->hccr); /* Get mailbox data. */ mb[0] = RD_MAILBOX_REG(ha, reg, 0); @@ -247,13 +247,13 @@ qla2100_intr_handler(int irq, void *dev_id) mb[0]); } /* Release mailbox registers. */ - WRT_REG_WORD(®->semaphore, 0); - RD_REG_WORD(®->semaphore); + wrt_reg_word(®->semaphore, 0); + rd_reg_word(®->semaphore); } else { qla2x00_process_response_queue(rsp); - WRT_REG_WORD(®->hccr, HCCR_CLR_RISC_INT); - RD_REG_WORD(®->hccr); + wrt_reg_word(®->hccr, HCCR_CLR_RISC_INT); + rd_reg_word(®->hccr); } } qla2x00_handle_mbx_completion(ha, status); @@ -325,14 +325,14 @@ qla2300_intr_handler(int irq, void *dev_id) spin_lock_irqsave(&ha->hardware_lock, flags); vha = pci_get_drvdata(ha->pdev); for (iter = 50; iter--; ) { - stat = RD_REG_DWORD(®->u.isp2300.host_status); + stat = rd_reg_dword(®->u.isp2300.host_status); if (qla2x00_check_reg32_for_disconnect(vha, stat)) break; if (stat & HSR_RISC_PAUSED) { if (unlikely(pci_channel_offline(ha->pdev))) break; - hccr = RD_REG_WORD(®->hccr); + hccr = rd_reg_word(®->hccr); if (hccr & (BIT_15 | BIT_13 | BIT_11 | BIT_8)) ql_log(ql_log_warn, vha, 0x5026, @@ -348,8 +348,8 @@ qla2300_intr_handler(int irq, void *dev_id) * interrupt bit to be cleared. Schedule a big * hammer to get out of the RISC PAUSED state. */ - WRT_REG_WORD(®->hccr, HCCR_RESET_RISC); - RD_REG_WORD(®->hccr); + wrt_reg_word(®->hccr, HCCR_RESET_RISC); + rd_reg_word(®->hccr); ha->isp_ops->fw_dump(vha); set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); @@ -366,7 +366,7 @@ qla2300_intr_handler(int irq, void *dev_id) status |= MBX_INTERRUPT; /* Release mailbox registers. */ - WRT_REG_WORD(®->semaphore, 0); + wrt_reg_word(®->semaphore, 0); break; case 0x12: mb[0] = MSW(stat); @@ -394,8 +394,8 @@ qla2300_intr_handler(int irq, void *dev_id) "Unrecognized interrupt type (%d).\n", stat & 0xff); break; } - WRT_REG_WORD(®->hccr, HCCR_CLR_RISC_INT); - RD_REG_WORD_RELAXED(®->hccr); + wrt_reg_word(®->hccr, HCCR_CLR_RISC_INT); + rd_reg_word_relaxed(®->hccr); } qla2x00_handle_mbx_completion(ha, status); spin_unlock_irqrestore(&ha->hardware_lock, flags); @@ -437,7 +437,7 @@ qla2x00_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0) if ((cnt == 4 || cnt == 5) && (mboxes & BIT_0)) ha->mailbox_out[cnt] = qla2x00_debounce_register(wptr); else if (mboxes & BIT_0) - ha->mailbox_out[cnt] = RD_REG_WORD(wptr); + ha->mailbox_out[cnt] = rd_reg_word(wptr); wptr++; mboxes >>= 1; @@ -464,7 +464,7 @@ qla81xx_idc_event(scsi_qla_host_t *vha, uint16_t aen, uint16_t descr) return; for (cnt = 0; cnt < QLA_IDC_ACK_REGS; cnt++, wptr++) - mb[cnt] = RD_REG_WORD(wptr); + mb[cnt] = rd_reg_word(wptr); ql_dbg(ql_dbg_async, vha, 0x5021, "Inter-Driver Communication %s -- " @@ -892,10 +892,10 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb) IS_QLA27XX(ha) || IS_QLA28XX(ha)) { u16 m[4]; - m[0] = RD_REG_WORD(®24->mailbox4); - m[1] = RD_REG_WORD(®24->mailbox5); - m[2] = RD_REG_WORD(®24->mailbox6); - mbx = m[3] = RD_REG_WORD(®24->mailbox7); + m[0] = rd_reg_word(®24->mailbox4); + m[1] = rd_reg_word(®24->mailbox5); + m[2] = rd_reg_word(®24->mailbox6); + mbx = m[3] = rd_reg_word(®24->mailbox7); ql_log(ql_log_warn, vha, 0x5003, "ISP System Error - mbx1=%xh mbx2=%xh mbx3=%xh mbx4=%xh mbx5=%xh mbx6=%xh mbx7=%xh.\n", @@ -906,7 +906,7 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb) mb[1], mb[2], mb[3]); if ((IS_QLA27XX(ha) || IS_QLA28XX(ha)) && - RD_REG_WORD(®24->mailbox7) & BIT_8) + rd_reg_word(®24->mailbox7) & BIT_8) ha->isp_ops->mpi_fw_dump(vha, 1); ha->isp_ops->fw_dump(vha); ha->flags.fw_init_done = 0; @@ -1013,8 +1013,8 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb) ha->current_topology = 0; mbx = (IS_QLA81XX(ha) || IS_QLA8031(ha)) - ? RD_REG_WORD(®24->mailbox4) : 0; - mbx = (IS_P3P_TYPE(ha)) ? RD_REG_WORD(®82->mailbox_out[4]) + ? rd_reg_word(®24->mailbox4) : 0; + mbx = (IS_P3P_TYPE(ha)) ? rd_reg_word(®82->mailbox_out[4]) : mbx; ql_log(ql_log_info, vha, 0x500b, "LOOP DOWN detected (%x %x %x %x).\n", @@ -1381,7 +1381,7 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb) break; case MBA_IDC_NOTIFY: if (IS_QLA8031(vha->hw) || IS_QLA8044(ha)) { - mb[4] = RD_REG_WORD(®24->mailbox4); + mb[4] = rd_reg_word(®24->mailbox4); if (((mb[2] & 0x7fff) == MBC_PORT_RESET || (mb[2] & 0x7fff) == MBC_SET_PORT_CONFIG) && (mb[4] & INTERNAL_LOOPBACK_MASK) != 0) { @@ -1410,10 +1410,10 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb) if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) { qla27xx_handle_8200_aen(vha, mb); } else if (IS_QLA83XX(ha)) { - mb[4] = RD_REG_WORD(®24->mailbox4); - mb[5] = RD_REG_WORD(®24->mailbox5); - mb[6] = RD_REG_WORD(®24->mailbox6); - mb[7] = RD_REG_WORD(®24->mailbox7); + mb[4] = rd_reg_word(®24->mailbox4); + mb[5] = rd_reg_word(®24->mailbox5); + mb[6] = rd_reg_word(®24->mailbox6); + mb[7] = rd_reg_word(®24->mailbox7); qla83xx_handle_8200_aen(vha, mb); } else { ql_dbg(ql_dbg_async, vha, 0x5052, @@ -2321,7 +2321,7 @@ qla2x00_process_response_queue(struct rsp_que *rsp) } /* Adjust ring index */ - WRT_REG_WORD(ISP_RSP_Q_OUT(ha, reg), rsp->ring_index); + wrt_reg_word(ISP_RSP_Q_OUT(ha, reg), rsp->ring_index); } static inline void @@ -3184,7 +3184,7 @@ qla24xx_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0) for (cnt = 1; cnt < ha->mbx_count; cnt++) { if (mboxes & BIT_0) - ha->mailbox_out[cnt] = RD_REG_WORD(wptr); + ha->mailbox_out[cnt] = rd_reg_word(wptr); mboxes >>= 1; wptr++; @@ -3361,9 +3361,9 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha, if (IS_P3P_TYPE(ha)) { struct device_reg_82xx __iomem *reg = &ha->iobase->isp82; - WRT_REG_DWORD(®->rsp_q_out[0], rsp->ring_index); + wrt_reg_dword(®->rsp_q_out[0], rsp->ring_index); } else { - WRT_REG_DWORD(rsp->rsp_q_out, rsp->ring_index); + wrt_reg_dword(rsp->rsp_q_out, rsp->ring_index); } } @@ -3380,13 +3380,13 @@ qla2xxx_check_risc_status(scsi_qla_host_t *vha) return; rval = QLA_SUCCESS; - WRT_REG_DWORD(®->iobase_addr, 0x7C00); - RD_REG_DWORD(®->iobase_addr); - WRT_REG_DWORD(®->iobase_window, 0x0001); - for (cnt = 10000; (RD_REG_DWORD(®->iobase_window) & BIT_0) == 0 && + wrt_reg_dword(®->iobase_addr, 0x7C00); + rd_reg_dword(®->iobase_addr); + wrt_reg_dword(®->iobase_window, 0x0001); + for (cnt = 10000; (rd_reg_dword(®->iobase_window) & BIT_0) == 0 && rval == QLA_SUCCESS; cnt--) { if (cnt) { - WRT_REG_DWORD(®->iobase_window, 0x0001); + wrt_reg_dword(®->iobase_window, 0x0001); udelay(10); } else rval = QLA_FUNCTION_TIMEOUT; @@ -3395,11 +3395,11 @@ qla2xxx_check_risc_status(scsi_qla_host_t *vha) goto next_test; rval = QLA_SUCCESS; - WRT_REG_DWORD(®->iobase_window, 0x0003); - for (cnt = 100; (RD_REG_DWORD(®->iobase_window) & BIT_0) == 0 && + wrt_reg_dword(®->iobase_window, 0x0003); + for (cnt = 100; (rd_reg_dword(®->iobase_window) & BIT_0) == 0 && rval == QLA_SUCCESS; cnt--) { if (cnt) { - WRT_REG_DWORD(®->iobase_window, 0x0003); + wrt_reg_dword(®->iobase_window, 0x0003); udelay(10); } else rval = QLA_FUNCTION_TIMEOUT; @@ -3408,13 +3408,13 @@ qla2xxx_check_risc_status(scsi_qla_host_t *vha) goto done; next_test: - if (RD_REG_DWORD(®->iobase_c8) & BIT_3) + if (rd_reg_dword(®->iobase_c8) & BIT_3) ql_log(ql_log_info, vha, 0x504c, "Additional code -- 0x55AA.\n"); done: - WRT_REG_DWORD(®->iobase_window, 0x0000); - RD_REG_DWORD(®->iobase_window); + wrt_reg_dword(®->iobase_window, 0x0000); + rd_reg_dword(®->iobase_window); } /** @@ -3458,14 +3458,14 @@ qla24xx_intr_handler(int irq, void *dev_id) spin_lock_irqsave(&ha->hardware_lock, flags); vha = pci_get_drvdata(ha->pdev); for (iter = 50; iter--; ) { - stat = RD_REG_DWORD(®->host_status); + stat = rd_reg_dword(®->host_status); if (qla2x00_check_reg32_for_disconnect(vha, stat)) break; if (stat & HSRX_RISC_PAUSED) { if (unlikely(pci_channel_offline(ha->pdev))) break; - hccr = RD_REG_DWORD(®->hccr); + hccr = rd_reg_dword(®->hccr); ql_log(ql_log_warn, vha, 0x504b, "RISC paused -- HCCR=%x, Dumping firmware.\n", @@ -3490,9 +3490,9 @@ qla24xx_intr_handler(int irq, void *dev_id) break; case INTR_ASYNC_EVENT: mb[0] = MSW(stat); - mb[1] = RD_REG_WORD(®->mailbox1); - mb[2] = RD_REG_WORD(®->mailbox2); - mb[3] = RD_REG_WORD(®->mailbox3); + mb[1] = rd_reg_word(®->mailbox1); + mb[2] = rd_reg_word(®->mailbox2); + mb[3] = rd_reg_word(®->mailbox3); qla2x00_async_event(vha, rsp, mb); break; case INTR_RSP_QUE_UPDATE: @@ -3512,8 +3512,8 @@ qla24xx_intr_handler(int irq, void *dev_id) "Unrecognized interrupt type (%d).\n", stat * 0xff); break; } - WRT_REG_DWORD(®->hccr, HCCRX_CLR_RISC_INT); - RD_REG_DWORD_RELAXED(®->hccr); + wrt_reg_dword(®->hccr, HCCRX_CLR_RISC_INT); + rd_reg_dword_relaxed(®->hccr); if (unlikely(IS_QLA83XX(ha) && (ha->pdev->revision == 1))) ndelay(3500); } @@ -3552,8 +3552,8 @@ qla24xx_msix_rsp_q(int irq, void *dev_id) vha = pci_get_drvdata(ha->pdev); qla24xx_process_response_queue(vha, rsp); if (!ha->flags.disable_msix_handshake) { - WRT_REG_DWORD(®->hccr, HCCRX_CLR_RISC_INT); - RD_REG_DWORD_RELAXED(®->hccr); + wrt_reg_dword(®->hccr, HCCRX_CLR_RISC_INT); + rd_reg_dword_relaxed(®->hccr); } spin_unlock_irqrestore(&ha->hardware_lock, flags); @@ -3587,14 +3587,14 @@ qla24xx_msix_default(int irq, void *dev_id) spin_lock_irqsave(&ha->hardware_lock, flags); vha = pci_get_drvdata(ha->pdev); do { - stat = RD_REG_DWORD(®->host_status); + stat = rd_reg_dword(®->host_status); if (qla2x00_check_reg32_for_disconnect(vha, stat)) break; if (stat & HSRX_RISC_PAUSED) { if (unlikely(pci_channel_offline(ha->pdev))) break; - hccr = RD_REG_DWORD(®->hccr); + hccr = rd_reg_dword(®->hccr); ql_log(ql_log_info, vha, 0x5050, "RISC paused -- HCCR=%x, Dumping firmware.\n", @@ -3619,9 +3619,9 @@ qla24xx_msix_default(int irq, void *dev_id) break; case INTR_ASYNC_EVENT: mb[0] = MSW(stat); - mb[1] = RD_REG_WORD(®->mailbox1); - mb[2] = RD_REG_WORD(®->mailbox2); - mb[3] = RD_REG_WORD(®->mailbox3); + mb[1] = rd_reg_word(®->mailbox1); + mb[2] = rd_reg_word(®->mailbox2); + mb[3] = rd_reg_word(®->mailbox3); qla2x00_async_event(vha, rsp, mb); break; case INTR_RSP_QUE_UPDATE: @@ -3641,7 +3641,7 @@ qla24xx_msix_default(int irq, void *dev_id) "Unrecognized interrupt type (%d).\n", stat & 0xff); break; } - WRT_REG_DWORD(®->hccr, HCCRX_CLR_RISC_INT); + wrt_reg_dword(®->hccr, HCCRX_CLR_RISC_INT); } while (0); qla2x00_handle_mbx_completion(ha, status); spin_unlock_irqrestore(&ha->hardware_lock, flags); @@ -3692,7 +3692,7 @@ qla2xxx_msix_rsp_q_hs(int irq, void *dev_id) reg = &ha->iobase->isp24; spin_lock_irqsave(&ha->hardware_lock, flags); - WRT_REG_DWORD(®->hccr, HCCRX_CLR_RISC_INT); + wrt_reg_dword(®->hccr, HCCRX_CLR_RISC_INT); spin_unlock_irqrestore(&ha->hardware_lock, flags); queue_work(ha->wq, &qpair->q_work); @@ -3953,7 +3953,7 @@ qla2x00_request_irqs(struct qla_hw_data *ha, struct rsp_que *rsp) goto fail; spin_lock_irq(&ha->hardware_lock); - WRT_REG_WORD(®->isp.semaphore, 0); + wrt_reg_word(®->isp.semaphore, 0); spin_unlock_irq(&ha->hardware_lock); fail: diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 357fc5aaecd8..985cae37a8f8 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -227,7 +227,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) if (mboxes & BIT_0) { ql_dbg(ql_dbg_mbx, vha, 0x1112, "mbox[%d]<-0x%04x\n", cnt, *iptr); - WRT_REG_WORD(optr, *iptr); + wrt_reg_word(optr, *iptr); } mboxes >>= 1; @@ -253,11 +253,11 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) set_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags); if (IS_P3P_TYPE(ha)) - WRT_REG_DWORD(®->isp82.hint, HINT_MBX_INT_PENDING); + wrt_reg_dword(®->isp82.hint, HINT_MBX_INT_PENDING); else if (IS_FWI2_CAPABLE(ha)) - WRT_REG_DWORD(®->isp24.hccr, HCCRX_SET_HOST_INT); + wrt_reg_dword(®->isp24.hccr, HCCRX_SET_HOST_INT); else - WRT_REG_WORD(®->isp.hccr, HCCR_SET_HOST_INT); + wrt_reg_word(®->isp.hccr, HCCR_SET_HOST_INT); spin_unlock_irqrestore(&ha->hardware_lock, flags); wait_time = jiffies; @@ -300,7 +300,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) "Cmd=%x Polling Mode.\n", command); if (IS_P3P_TYPE(ha)) { - if (RD_REG_DWORD(®->isp82.hint) & + if (rd_reg_dword(®->isp82.hint) & HINT_MBX_INT_PENDING) { ha->flags.mbox_busy = 0; spin_unlock_irqrestore(&ha->hardware_lock, @@ -311,11 +311,11 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) rval = QLA_FUNCTION_TIMEOUT; goto premature_exit; } - WRT_REG_DWORD(®->isp82.hint, HINT_MBX_INT_PENDING); + wrt_reg_dword(®->isp82.hint, HINT_MBX_INT_PENDING); } else if (IS_FWI2_CAPABLE(ha)) - WRT_REG_DWORD(®->isp24.hccr, HCCRX_SET_HOST_INT); + wrt_reg_dword(®->isp24.hccr, HCCRX_SET_HOST_INT); else - WRT_REG_WORD(®->isp.hccr, HCCR_SET_HOST_INT); + wrt_reg_word(®->isp.hccr, HCCR_SET_HOST_INT); spin_unlock_irqrestore(&ha->hardware_lock, flags); wait_time = jiffies + mcp->tov * HZ; /* wait at most tov secs */ @@ -413,14 +413,14 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) uint16_t w; if (IS_FWI2_CAPABLE(ha)) { - mb[0] = RD_REG_WORD(®->isp24.mailbox0); - mb[1] = RD_REG_WORD(®->isp24.mailbox1); - mb[2] = RD_REG_WORD(®->isp24.mailbox2); - mb[3] = RD_REG_WORD(®->isp24.mailbox3); - mb[7] = RD_REG_WORD(®->isp24.mailbox7); - ictrl = RD_REG_DWORD(®->isp24.ictrl); - host_status = RD_REG_DWORD(®->isp24.host_status); - hccr = RD_REG_DWORD(®->isp24.hccr); + mb[0] = rd_reg_word(®->isp24.mailbox0); + mb[1] = rd_reg_word(®->isp24.mailbox1); + mb[2] = rd_reg_word(®->isp24.mailbox2); + mb[3] = rd_reg_word(®->isp24.mailbox3); + mb[7] = rd_reg_word(®->isp24.mailbox7); + ictrl = rd_reg_dword(®->isp24.ictrl); + host_status = rd_reg_dword(®->isp24.host_status); + hccr = rd_reg_dword(®->isp24.hccr); ql_log(ql_log_warn, vha, 0xd04c, "MBX Command timeout for cmd %x, iocontrol=%x jiffies=%lx " @@ -430,7 +430,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) } else { mb[0] = RD_MAILBOX_REG(ha, ®->isp, 0); - ictrl = RD_REG_WORD(®->isp.ictrl); + ictrl = rd_reg_word(®->isp.ictrl); ql_dbg(ql_dbg_mbx + ql_dbg_buffer, vha, 0x1119, "MBX Command timeout for cmd %x, iocontrol=%x jiffies=%lx " "mb[0]=0x%x\n", command, ictrl, jiffies, mb[0]); @@ -573,15 +573,15 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) if (IS_FWI2_CAPABLE(ha) && !(IS_P3P_TYPE(ha))) { ql_dbg(ql_dbg_mbx, vha, 0x1198, "host_status=%#x intr_ctrl=%#x intr_status=%#x\n", - RD_REG_DWORD(®->isp24.host_status), - RD_REG_DWORD(®->isp24.ictrl), - RD_REG_DWORD(®->isp24.istatus)); + rd_reg_dword(®->isp24.host_status), + rd_reg_dword(®->isp24.ictrl), + rd_reg_dword(®->isp24.istatus)); } else { ql_dbg(ql_dbg_mbx, vha, 0x1206, "ctrl_status=%#x ictrl=%#x istatus=%#x\n", - RD_REG_WORD(®->isp.ctrl_status), - RD_REG_WORD(®->isp.ictrl), - RD_REG_WORD(®->isp.istatus)); + rd_reg_word(®->isp.ctrl_status), + rd_reg_word(®->isp.ictrl), + rd_reg_word(®->isp.istatus)); } } else { ql_dbg(ql_dbg_mbx, base_vha, 0x1021, "Done %s.\n", __func__); @@ -4427,9 +4427,9 @@ qla25xx_init_req_que(struct scsi_qla_host *vha, struct req_que *req) spin_lock_irqsave(&ha->hardware_lock, flags); if (!(req->options & BIT_0)) { - WRT_REG_DWORD(req->req_q_in, 0); + wrt_reg_dword(req->req_q_in, 0); if (!IS_QLA83XX(ha) && !IS_QLA27XX(ha) && !IS_QLA28XX(ha)) - WRT_REG_DWORD(req->req_q_out, 0); + wrt_reg_dword(req->req_q_out, 0); } spin_unlock_irqrestore(&ha->hardware_lock, flags); @@ -4498,9 +4498,9 @@ qla25xx_init_rsp_que(struct scsi_qla_host *vha, struct rsp_que *rsp) spin_lock_irqsave(&ha->hardware_lock, flags); if (!(rsp->options & BIT_0)) { - WRT_REG_DWORD(rsp->rsp_q_out, 0); + wrt_reg_dword(rsp->rsp_q_out, 0); if (!IS_QLA83XX(ha) && !IS_QLA27XX(ha) && !IS_QLA28XX(ha)) - WRT_REG_DWORD(rsp->rsp_q_in, 0); + wrt_reg_dword(rsp->rsp_q_in, 0); } spin_unlock_irqrestore(&ha->hardware_lock, flags); @@ -5413,18 +5413,18 @@ qla81xx_write_mpi_register(scsi_qla_host_t *vha, uint16_t *mb) clear_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags); /* Write the MBC data to the registers */ - WRT_REG_WORD(®->mailbox0, MBC_WRITE_MPI_REGISTER); - WRT_REG_WORD(®->mailbox1, mb[0]); - WRT_REG_WORD(®->mailbox2, mb[1]); - WRT_REG_WORD(®->mailbox3, mb[2]); - WRT_REG_WORD(®->mailbox4, mb[3]); + wrt_reg_word(®->mailbox0, MBC_WRITE_MPI_REGISTER); + wrt_reg_word(®->mailbox1, mb[0]); + wrt_reg_word(®->mailbox2, mb[1]); + wrt_reg_word(®->mailbox3, mb[2]); + wrt_reg_word(®->mailbox4, mb[3]); - WRT_REG_DWORD(®->hccr, HCCRX_SET_HOST_INT); + wrt_reg_dword(®->hccr, HCCRX_SET_HOST_INT); /* Poll for MBC interrupt */ for (timer = 6000000; timer; timer--) { /* Check for pending interrupts. */ - stat = RD_REG_DWORD(®->host_status); + stat = rd_reg_dword(®->host_status); if (stat & HSRX_RISC_INT) { stat &= 0xff; @@ -5432,10 +5432,10 @@ qla81xx_write_mpi_register(scsi_qla_host_t *vha, uint16_t *mb) stat == 0x10 || stat == 0x11) { set_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags); - mb0 = RD_REG_WORD(®->mailbox0); - WRT_REG_DWORD(®->hccr, + mb0 = rd_reg_word(®->mailbox0); + wrt_reg_dword(®->hccr, HCCRX_CLR_RISC_INT); - RD_REG_DWORD(®->hccr); + rd_reg_dword(®->hccr); break; } } diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c index 0e15bce82fc1..c5be5163b663 100644 --- a/drivers/scsi/qla2xxx/qla_mr.c +++ b/drivers/scsi/qla2xxx/qla_mr.c @@ -117,7 +117,7 @@ qlafx00_mailbox_command(scsi_qla_host_t *vha, struct mbx_cmd_32 *mcp) for (cnt = 0; cnt < ha->mbx_count; cnt++) { if (mboxes & BIT_0) - WRT_REG_DWORD(optr, *iptr); + wrt_reg_dword(optr, *iptr); mboxes >>= 1; optr++; @@ -676,14 +676,14 @@ qlafx00_config_rings(struct scsi_qla_host *vha) struct qla_hw_data *ha = vha->hw; struct device_reg_fx00 __iomem *reg = &ha->iobase->ispfx00; - WRT_REG_DWORD(®->req_q_in, 0); - WRT_REG_DWORD(®->req_q_out, 0); + wrt_reg_dword(®->req_q_in, 0); + wrt_reg_dword(®->req_q_out, 0); - WRT_REG_DWORD(®->rsp_q_in, 0); - WRT_REG_DWORD(®->rsp_q_out, 0); + wrt_reg_dword(®->rsp_q_in, 0); + wrt_reg_dword(®->rsp_q_out, 0); /* PCI posting */ - RD_REG_DWORD(®->rsp_q_out); + rd_reg_dword(®->rsp_q_out); } char * @@ -912,9 +912,9 @@ qlafx00_init_fw_ready(scsi_qla_host_t *vha) /* 30 seconds wait - Adjust if required */ wait_time = 30; - pseudo_aen = RD_REG_DWORD(®->pseudoaen); + pseudo_aen = rd_reg_dword(®->pseudoaen); if (pseudo_aen == 1) { - aenmbx7 = RD_REG_DWORD(®->initval7); + aenmbx7 = rd_reg_dword(®->initval7); ha->mbx_intr_code = MSW(aenmbx7); ha->rqstq_intr_code = LSW(aenmbx7); rval = qlafx00_driver_shutdown(vha, 10); @@ -925,7 +925,7 @@ qlafx00_init_fw_ready(scsi_qla_host_t *vha) /* wait time before firmware ready */ wtime = jiffies + (wait_time * HZ); do { - aenmbx = RD_REG_DWORD(®->aenmailbox0); + aenmbx = rd_reg_dword(®->aenmailbox0); barrier(); ql_dbg(ql_dbg_mbx, vha, 0x0133, "aenmbx: 0x%x\n", aenmbx); @@ -944,15 +944,15 @@ qlafx00_init_fw_ready(scsi_qla_host_t *vha) case MBA_FW_RESTART_CMPLT: /* Set the mbx and rqstq intr code */ - aenmbx7 = RD_REG_DWORD(®->aenmailbox7); + aenmbx7 = rd_reg_dword(®->aenmailbox7); ha->mbx_intr_code = MSW(aenmbx7); ha->rqstq_intr_code = LSW(aenmbx7); - ha->req_que_off = RD_REG_DWORD(®->aenmailbox1); - ha->rsp_que_off = RD_REG_DWORD(®->aenmailbox3); - ha->req_que_len = RD_REG_DWORD(®->aenmailbox5); - ha->rsp_que_len = RD_REG_DWORD(®->aenmailbox6); - WRT_REG_DWORD(®->aenmailbox0, 0); - RD_REG_DWORD_RELAXED(®->aenmailbox0); + ha->req_que_off = rd_reg_dword(®->aenmailbox1); + ha->rsp_que_off = rd_reg_dword(®->aenmailbox3); + ha->req_que_len = rd_reg_dword(®->aenmailbox5); + ha->rsp_que_len = rd_reg_dword(®->aenmailbox6); + wrt_reg_dword(®->aenmailbox0, 0); + rd_reg_dword_relaxed(®->aenmailbox0); ql_dbg(ql_dbg_init, vha, 0x0134, "f/w returned mbx_intr_code: 0x%x, " "rqstq_intr_code: 0x%x\n", @@ -982,13 +982,13 @@ qlafx00_init_fw_ready(scsi_qla_host_t *vha) * 3. issue Get FW State Mbox cmd to determine fw state * Set the mbx and rqstq intr code from Shadow Regs */ - aenmbx7 = RD_REG_DWORD(®->initval7); + aenmbx7 = rd_reg_dword(®->initval7); ha->mbx_intr_code = MSW(aenmbx7); ha->rqstq_intr_code = LSW(aenmbx7); - ha->req_que_off = RD_REG_DWORD(®->initval1); - ha->rsp_que_off = RD_REG_DWORD(®->initval3); - ha->req_que_len = RD_REG_DWORD(®->initval5); - ha->rsp_que_len = RD_REG_DWORD(®->initval6); + ha->req_que_off = rd_reg_dword(®->initval1); + ha->rsp_que_off = rd_reg_dword(®->initval3); + ha->req_que_len = rd_reg_dword(®->initval5); + ha->rsp_que_len = rd_reg_dword(®->initval6); ql_dbg(ql_dbg_init, vha, 0x0135, "f/w returned mbx_intr_code: 0x%x, " "rqstq_intr_code: 0x%x\n", @@ -1034,7 +1034,7 @@ qlafx00_init_fw_ready(scsi_qla_host_t *vha) if (time_after_eq(jiffies, wtime)) { ql_dbg(ql_dbg_init, vha, 0x0137, "Init f/w failed: aen[7]: 0x%x\n", - RD_REG_DWORD(®->aenmailbox7)); + rd_reg_dword(®->aenmailbox7)); rval = QLA_FUNCTION_FAILED; done = true; break; @@ -1428,7 +1428,7 @@ qlafx00_init_response_q_entries(struct rsp_que *rsp) pkt = rsp->ring_ptr; for (cnt = 0; cnt < rsp->length; cnt++) { pkt->signature = RESPONSE_PROCESSED; - WRT_REG_DWORD((void __force __iomem *)&pkt->signature, + wrt_reg_dword((void __force __iomem *)&pkt->signature, RESPONSE_PROCESSED); pkt++; } @@ -1444,13 +1444,13 @@ qlafx00_rescan_isp(scsi_qla_host_t *vha) qla2x00_request_irqs(ha, ha->rsp_q_map[0]); - aenmbx7 = RD_REG_DWORD(®->aenmailbox7); + aenmbx7 = rd_reg_dword(®->aenmailbox7); ha->mbx_intr_code = MSW(aenmbx7); ha->rqstq_intr_code = LSW(aenmbx7); - ha->req_que_off = RD_REG_DWORD(®->aenmailbox1); - ha->rsp_que_off = RD_REG_DWORD(®->aenmailbox3); - ha->req_que_len = RD_REG_DWORD(®->aenmailbox5); - ha->rsp_que_len = RD_REG_DWORD(®->aenmailbox6); + ha->req_que_off = rd_reg_dword(®->aenmailbox1); + ha->rsp_que_off = rd_reg_dword(®->aenmailbox3); + ha->req_que_len = rd_reg_dword(®->aenmailbox5); + ha->rsp_que_len = rd_reg_dword(®->aenmailbox6); ql_dbg(ql_dbg_disc, vha, 0x2094, "fw returned mbx_intr_code: 0x%x, rqstq_intr_code: 0x%x " @@ -1495,7 +1495,7 @@ qlafx00_timer_routine(scsi_qla_host_t *vha) (!test_bit(UNLOADING, &vha->dpc_flags)) && (!test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags)) && (ha->mr.fw_hbt_en)) { - fw_heart_beat = RD_REG_DWORD(®->fwheartbeat); + fw_heart_beat = rd_reg_dword(®->fwheartbeat); if (fw_heart_beat != ha->mr.old_fw_hbt_cnt) { ha->mr.old_fw_hbt_cnt = fw_heart_beat; ha->mr.fw_hbt_miss_cnt = 0; @@ -1515,7 +1515,7 @@ qlafx00_timer_routine(scsi_qla_host_t *vha) if (test_bit(FX00_RESET_RECOVERY, &vha->dpc_flags)) { /* Reset recovery to be performed in timer routine */ - aenmbx0 = RD_REG_DWORD(®->aenmailbox0); + aenmbx0 = rd_reg_dword(®->aenmailbox0); if (ha->mr.fw_reset_timer_exp) { set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); qla2xxx_wake_dpc(vha); @@ -2718,7 +2718,7 @@ qlafx00_process_response_queue(struct scsi_qla_host *vha, uint16_t lreq_q_in = 0; uint16_t lreq_q_out = 0; - lreq_q_in = RD_REG_DWORD(rsp->rsp_q_in); + lreq_q_in = rd_reg_dword(rsp->rsp_q_in); lreq_q_out = rsp->ring_index; while (lreq_q_in != lreq_q_out) { @@ -2780,7 +2780,7 @@ qlafx00_process_response_queue(struct scsi_qla_host *vha, } /* Adjust ring index */ - WRT_REG_DWORD(rsp->rsp_q_out, rsp->ring_index); + wrt_reg_dword(rsp->rsp_q_out, rsp->ring_index); } /** @@ -2811,9 +2811,9 @@ qlafx00_async_event(scsi_qla_host_t *vha) break; case QLAFX00_MBA_PORT_UPDATE: /* Port database update */ - ha->aenmb[1] = RD_REG_DWORD(®->aenmailbox1); - ha->aenmb[2] = RD_REG_DWORD(®->aenmailbox2); - ha->aenmb[3] = RD_REG_DWORD(®->aenmailbox3); + ha->aenmb[1] = rd_reg_dword(®->aenmailbox1); + ha->aenmb[2] = rd_reg_dword(®->aenmailbox2); + ha->aenmb[3] = rd_reg_dword(®->aenmailbox3); ql_dbg(ql_dbg_async, vha, 0x5077, "Asynchronous port Update received " "aenmb[0]: %x, aenmb[1]: %x, aenmb[2]: %x, aenmb[3]: %x\n", @@ -2843,13 +2843,13 @@ qlafx00_async_event(scsi_qla_host_t *vha) break; default: - ha->aenmb[1] = RD_REG_DWORD(®->aenmailbox1); - ha->aenmb[2] = RD_REG_DWORD(®->aenmailbox2); - ha->aenmb[3] = RD_REG_DWORD(®->aenmailbox3); - ha->aenmb[4] = RD_REG_DWORD(®->aenmailbox4); - ha->aenmb[5] = RD_REG_DWORD(®->aenmailbox5); - ha->aenmb[6] = RD_REG_DWORD(®->aenmailbox6); - ha->aenmb[7] = RD_REG_DWORD(®->aenmailbox7); + ha->aenmb[1] = rd_reg_dword(®->aenmailbox1); + ha->aenmb[2] = rd_reg_dword(®->aenmailbox2); + ha->aenmb[3] = rd_reg_dword(®->aenmailbox3); + ha->aenmb[4] = rd_reg_dword(®->aenmailbox4); + ha->aenmb[5] = rd_reg_dword(®->aenmailbox5); + ha->aenmb[6] = rd_reg_dword(®->aenmailbox6); + ha->aenmb[7] = rd_reg_dword(®->aenmailbox7); ql_dbg(ql_dbg_async, vha, 0x5078, "AEN:%04x %04x %04x %04x :%04x %04x %04x %04x\n", ha->aenmb[0], ha->aenmb[1], ha->aenmb[2], ha->aenmb[3], @@ -2882,7 +2882,7 @@ qlafx00_mbx_completion(scsi_qla_host_t *vha, uint32_t mb0) wptr = ®->mailbox17; for (cnt = 1; cnt < ha->mbx_count; cnt++) { - ha->mailbox_out32[cnt] = RD_REG_DWORD(wptr); + ha->mailbox_out32[cnt] = rd_reg_dword(wptr); wptr++; } } @@ -2936,13 +2936,13 @@ qlafx00_intr_handler(int irq, void *dev_id) break; if (stat & QLAFX00_INTR_MB_CMPLT) { - mb[0] = RD_REG_DWORD(®->mailbox16); + mb[0] = rd_reg_dword(®->mailbox16); qlafx00_mbx_completion(vha, mb[0]); status |= MBX_INTERRUPT; clr_intr |= QLAFX00_INTR_MB_CMPLT; } if (intr_stat & QLAFX00_INTR_ASYNC_CMPLT) { - ha->aenmb[0] = RD_REG_DWORD(®->aenmailbox0); + ha->aenmb[0] = rd_reg_dword(®->aenmailbox0); qlafx00_async_event(vha); clr_intr |= QLAFX00_INTR_ASYNC_CMPLT; } @@ -3110,7 +3110,7 @@ qlafx00_start_scsi(srb_t *sp) tot_dsds = nseg; req_cnt = qla24xx_calc_iocbs(vha, tot_dsds); if (req->cnt < (req_cnt + 2)) { - cnt = RD_REG_DWORD_RELAXED(req->req_q_out); + cnt = rd_reg_dword_relaxed(req->req_q_out); if (req->ring_index < cnt) req->cnt = cnt - req->ring_index; @@ -3175,7 +3175,7 @@ qlafx00_start_scsi(srb_t *sp) sp->flags |= SRB_DMA_VALID; /* Set chip new ring index. */ - WRT_REG_DWORD(req->req_q_in, req->ring_index); + wrt_reg_dword(req->req_q_in, req->ring_index); QLAFX00_SET_HST_INTR(ha, ha->rqstq_intr_code); spin_unlock_irqrestore(&ha->hardware_lock, flags); diff --git a/drivers/scsi/qla2xxx/qla_mr.h b/drivers/scsi/qla2xxx/qla_mr.h index 4567f0c42486..3aa9bfd1c840 100644 --- a/drivers/scsi/qla2xxx/qla_mr.h +++ b/drivers/scsi/qla2xxx/qla_mr.h @@ -359,47 +359,47 @@ struct config_info_data { #define CONTINUE_A64_TYPE_FX00 0x03 /* Continuation entry. */ #define QLAFX00_SET_HST_INTR(ha, value) \ - WRT_REG_DWORD((ha)->cregbase + QLAFX00_HST_TO_HBA_REG, \ + wrt_reg_dword((ha)->cregbase + QLAFX00_HST_TO_HBA_REG, \ value) #define QLAFX00_CLR_HST_INTR(ha, value) \ - WRT_REG_DWORD((ha)->cregbase + QLAFX00_HBA_TO_HOST_REG, \ + wrt_reg_dword((ha)->cregbase + QLAFX00_HBA_TO_HOST_REG, \ ~value) #define QLAFX00_RD_INTR_REG(ha) \ - RD_REG_DWORD((ha)->cregbase + QLAFX00_HBA_TO_HOST_REG) + rd_reg_dword((ha)->cregbase + QLAFX00_HBA_TO_HOST_REG) #define QLAFX00_CLR_INTR_REG(ha, value) \ - WRT_REG_DWORD((ha)->cregbase + QLAFX00_HBA_TO_HOST_REG, \ + wrt_reg_dword((ha)->cregbase + QLAFX00_HBA_TO_HOST_REG, \ ~value) #define QLAFX00_SET_HBA_SOC_REG(ha, off, val)\ - WRT_REG_DWORD((ha)->cregbase + off, val) + wrt_reg_dword((ha)->cregbase + off, val) #define QLAFX00_GET_HBA_SOC_REG(ha, off)\ - RD_REG_DWORD((ha)->cregbase + off) + rd_reg_dword((ha)->cregbase + off) #define QLAFX00_HBA_RST_REG(ha, val)\ - WRT_REG_DWORD((ha)->cregbase + QLAFX00_HST_RST_REG, val) + wrt_reg_dword((ha)->cregbase + QLAFX00_HST_RST_REG, val) #define QLAFX00_RD_ICNTRL_REG(ha) \ - RD_REG_DWORD((ha)->cregbase + QLAFX00_HBA_ICNTRL_REG) + rd_reg_dword((ha)->cregbase + QLAFX00_HBA_ICNTRL_REG) #define QLAFX00_ENABLE_ICNTRL_REG(ha) \ - WRT_REG_DWORD((ha)->cregbase + QLAFX00_HBA_ICNTRL_REG, \ + wrt_reg_dword((ha)->cregbase + QLAFX00_HBA_ICNTRL_REG, \ (QLAFX00_GET_HBA_SOC_REG(ha, QLAFX00_HBA_ICNTRL_REG) | \ QLAFX00_ICR_ENB_MASK)) #define QLAFX00_DISABLE_ICNTRL_REG(ha) \ - WRT_REG_DWORD((ha)->cregbase + QLAFX00_HBA_ICNTRL_REG, \ + wrt_reg_dword((ha)->cregbase + QLAFX00_HBA_ICNTRL_REG, \ (QLAFX00_GET_HBA_SOC_REG(ha, QLAFX00_HBA_ICNTRL_REG) & \ QLAFX00_ICR_DIS_MASK)) #define QLAFX00_RD_REG(ha, off) \ - RD_REG_DWORD((ha)->cregbase + off) + rd_reg_dword((ha)->cregbase + off) #define QLAFX00_WR_REG(ha, off, val) \ - WRT_REG_DWORD((ha)->cregbase + off, val) + wrt_reg_dword((ha)->cregbase + off, val) struct qla_mt_iocb_rqst_fx00 { __le32 reserved_0; diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 4886d247df6f..ad3aa1947e7d 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -384,7 +384,7 @@ static inline int qla2x00_start_nvme_mq(srb_t *sp) req_cnt = qla24xx_calc_iocbs(vha, tot_dsds); if (req->cnt < (req_cnt + 2)) { cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr : - RD_REG_DWORD_RELAXED(req->req_q_out); + rd_reg_dword_relaxed(req->req_q_out); if (req->ring_index < cnt) req->cnt = cnt - req->ring_index; @@ -514,7 +514,7 @@ static inline int qla2x00_start_nvme_mq(srb_t *sp) } /* Set chip new ring index. */ - WRT_REG_DWORD(req->req_q_in, req->ring_index); + wrt_reg_dword(req->req_q_in, req->ring_index); queuing_error: spin_unlock_irqrestore(&qpair->qp_lock, flags); diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index 9cc6ad62265c..293dbde1d6e4 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c @@ -370,7 +370,7 @@ qla82xx_pci_set_crbwindow_2M(struct qla_hw_data *ha, ulong off_in, /* Read back value to make sure write has gone through before trying * to use it. */ - win_read = RD_REG_DWORD(CRB_WINDOW_2M + ha->nx_pcibase); + win_read = rd_reg_dword(CRB_WINDOW_2M + ha->nx_pcibase); if (win_read != ha->crb_win) { ql_dbg(ql_dbg_p3p, vha, 0xb000, "%s: Written crbwin (0x%x) " @@ -520,7 +520,7 @@ qla82xx_rd_32(struct qla_hw_data *ha, ulong off_in) qla82xx_crb_win_lock(ha); qla82xx_pci_set_crbwindow_2M(ha, off_in, &off); } - data = RD_REG_DWORD(off); + data = rd_reg_dword(off); if (rv == 1) { qla82xx_rd_32(ha, QLA82XX_PCIE_REG(PCIE_SEM7_UNLOCK)); @@ -937,17 +937,17 @@ qla82xx_md_rw_32(struct qla_hw_data *ha, uint32_t off, u32 data, uint8_t flag) { uint32_t off_value, rval = 0; - WRT_REG_DWORD(CRB_WINDOW_2M + ha->nx_pcibase, off & 0xFFFF0000); + wrt_reg_dword(CRB_WINDOW_2M + ha->nx_pcibase, off & 0xFFFF0000); /* Read back value to make sure write has gone through */ - RD_REG_DWORD(CRB_WINDOW_2M + ha->nx_pcibase); + rd_reg_dword(CRB_WINDOW_2M + ha->nx_pcibase); off_value = (off & 0x0000FFFF); if (flag) - WRT_REG_DWORD(off_value + CRB_INDIRECT_2M + ha->nx_pcibase, + wrt_reg_dword(off_value + CRB_INDIRECT_2M + ha->nx_pcibase, data); else - rval = RD_REG_DWORD(off_value + CRB_INDIRECT_2M + + rval = rd_reg_dword(off_value + CRB_INDIRECT_2M + ha->nx_pcibase); return rval; @@ -1790,9 +1790,9 @@ void qla82xx_config_rings(struct scsi_qla_host *vha) put_unaligned_le64(req->dma, &icb->request_q_address); put_unaligned_le64(rsp->dma, &icb->response_q_address); - WRT_REG_DWORD(®->req_q_out[0], 0); - WRT_REG_DWORD(®->rsp_q_in[0], 0); - WRT_REG_DWORD(®->rsp_q_out[0], 0); + wrt_reg_dword(®->req_q_out[0], 0); + wrt_reg_dword(®->rsp_q_in[0], 0); + wrt_reg_dword(®->rsp_q_out[0], 0); } static int @@ -2007,7 +2007,7 @@ qla82xx_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0) ha->mailbox_out[0] = mb0; for (cnt = 1; cnt < ha->mbx_count; cnt++) { - ha->mailbox_out[cnt] = RD_REG_WORD(wptr); + ha->mailbox_out[cnt] = rd_reg_word(wptr); wptr++; } @@ -2069,8 +2069,8 @@ qla82xx_intr_handler(int irq, void *dev_id) vha = pci_get_drvdata(ha->pdev); for (iter = 1; iter--; ) { - if (RD_REG_DWORD(®->host_int)) { - stat = RD_REG_DWORD(®->host_status); + if (rd_reg_dword(®->host_int)) { + stat = rd_reg_dword(®->host_status); switch (stat & 0xff) { case 0x1: @@ -2082,9 +2082,9 @@ qla82xx_intr_handler(int irq, void *dev_id) break; case 0x12: mb[0] = MSW(stat); - mb[1] = RD_REG_WORD(®->mailbox_out[1]); - mb[2] = RD_REG_WORD(®->mailbox_out[2]); - mb[3] = RD_REG_WORD(®->mailbox_out[3]); + mb[1] = rd_reg_word(®->mailbox_out[1]); + mb[2] = rd_reg_word(®->mailbox_out[2]); + mb[3] = rd_reg_word(®->mailbox_out[3]); qla2x00_async_event(vha, rsp, mb); break; case 0x13: @@ -2097,7 +2097,7 @@ qla82xx_intr_handler(int irq, void *dev_id) break; } } - WRT_REG_DWORD(®->host_int, 0); + wrt_reg_dword(®->host_int, 0); } qla2x00_handle_mbx_completion(ha, status); @@ -2135,11 +2135,11 @@ qla82xx_msix_default(int irq, void *dev_id) spin_lock_irqsave(&ha->hardware_lock, flags); vha = pci_get_drvdata(ha->pdev); do { - host_int = RD_REG_DWORD(®->host_int); + host_int = rd_reg_dword(®->host_int); if (qla2x00_check_reg32_for_disconnect(vha, host_int)) break; if (host_int) { - stat = RD_REG_DWORD(®->host_status); + stat = rd_reg_dword(®->host_status); switch (stat & 0xff) { case 0x1: @@ -2151,9 +2151,9 @@ qla82xx_msix_default(int irq, void *dev_id) break; case 0x12: mb[0] = MSW(stat); - mb[1] = RD_REG_WORD(®->mailbox_out[1]); - mb[2] = RD_REG_WORD(®->mailbox_out[2]); - mb[3] = RD_REG_WORD(®->mailbox_out[3]); + mb[1] = rd_reg_word(®->mailbox_out[1]); + mb[2] = rd_reg_word(®->mailbox_out[2]); + mb[3] = rd_reg_word(®->mailbox_out[3]); qla2x00_async_event(vha, rsp, mb); break; case 0x13: @@ -2166,7 +2166,7 @@ qla82xx_msix_default(int irq, void *dev_id) break; } } - WRT_REG_DWORD(®->host_int, 0); + wrt_reg_dword(®->host_int, 0); } while (0); qla2x00_handle_mbx_completion(ha, status); @@ -2196,11 +2196,11 @@ qla82xx_msix_rsp_q(int irq, void *dev_id) reg = &ha->iobase->isp82; spin_lock_irqsave(&ha->hardware_lock, flags); vha = pci_get_drvdata(ha->pdev); - host_int = RD_REG_DWORD(®->host_int); + host_int = rd_reg_dword(®->host_int); if (qla2x00_check_reg32_for_disconnect(vha, host_int)) goto out; qla24xx_process_response_queue(vha, rsp); - WRT_REG_DWORD(®->host_int, 0); + wrt_reg_dword(®->host_int, 0); out: spin_unlock_irqrestore(&ha->hardware_lock, flags); return IRQ_HANDLED; @@ -2231,11 +2231,11 @@ qla82xx_poll(int irq, void *dev_id) spin_lock_irqsave(&ha->hardware_lock, flags); vha = pci_get_drvdata(ha->pdev); - host_int = RD_REG_DWORD(®->host_int); + host_int = rd_reg_dword(®->host_int); if (qla2x00_check_reg32_for_disconnect(vha, host_int)) goto out; if (host_int) { - stat = RD_REG_DWORD(®->host_status); + stat = rd_reg_dword(®->host_status); switch (stat & 0xff) { case 0x1: case 0x2: @@ -2246,9 +2246,9 @@ qla82xx_poll(int irq, void *dev_id) break; case 0x12: mb[0] = MSW(stat); - mb[1] = RD_REG_WORD(®->mailbox_out[1]); - mb[2] = RD_REG_WORD(®->mailbox_out[2]); - mb[3] = RD_REG_WORD(®->mailbox_out[3]); + mb[1] = rd_reg_word(®->mailbox_out[1]); + mb[2] = rd_reg_word(®->mailbox_out[2]); + mb[3] = rd_reg_word(®->mailbox_out[3]); qla2x00_async_event(vha, rsp, mb); break; case 0x13: @@ -2260,7 +2260,7 @@ qla82xx_poll(int irq, void *dev_id) stat * 0xff); break; } - WRT_REG_DWORD(®->host_int, 0); + wrt_reg_dword(®->host_int, 0); } out: spin_unlock_irqrestore(&ha->hardware_lock, flags); @@ -2818,10 +2818,10 @@ qla82xx_start_iocbs(scsi_qla_host_t *vha) if (ql2xdbwr) qla82xx_wr_32(ha, (unsigned long)ha->nxdb_wr_ptr, dbval); else { - WRT_REG_DWORD(ha->nxdb_wr_ptr, dbval); + wrt_reg_dword(ha->nxdb_wr_ptr, dbval); wmb(); - while (RD_REG_DWORD(ha->nxdb_rd_ptr) != dbval) { - WRT_REG_DWORD(ha->nxdb_wr_ptr, dbval); + while (rd_reg_dword(ha->nxdb_rd_ptr) != dbval) { + wrt_reg_dword(ha->nxdb_wr_ptr, dbval); wmb(); } } @@ -3854,7 +3854,7 @@ qla82xx_minidump_process_rdocm(scsi_qla_host_t *vha, loop_cnt = ocm_hdr->op_count; for (i = 0; i < loop_cnt; i++) { - r_value = RD_REG_DWORD(r_addr + ha->nx_pcibase); + r_value = rd_reg_dword(r_addr + ha->nx_pcibase); *data_ptr++ = cpu_to_le32(r_value); r_addr += r_stride; } diff --git a/drivers/scsi/qla2xxx/qla_nx2.c b/drivers/scsi/qla2xxx/qla_nx2.c index df9429428316..a46830a99968 100644 --- a/drivers/scsi/qla2xxx/qla_nx2.c +++ b/drivers/scsi/qla2xxx/qla_nx2.c @@ -3946,8 +3946,8 @@ qla8044_intr_handler(int irq, void *dev_id) spin_lock_irqsave(&ha->hardware_lock, flags); for (iter = 1; iter--; ) { - if (RD_REG_DWORD(®->host_int)) { - stat = RD_REG_DWORD(®->host_status); + if (rd_reg_dword(®->host_int)) { + stat = rd_reg_dword(®->host_status); if ((stat & HSRX_RISC_INT) == 0) break; @@ -3961,9 +3961,9 @@ qla8044_intr_handler(int irq, void *dev_id) break; case 0x12: mb[0] = MSW(stat); - mb[1] = RD_REG_WORD(®->mailbox_out[1]); - mb[2] = RD_REG_WORD(®->mailbox_out[2]); - mb[3] = RD_REG_WORD(®->mailbox_out[3]); + mb[1] = rd_reg_word(®->mailbox_out[1]); + mb[2] = rd_reg_word(®->mailbox_out[2]); + mb[3] = rd_reg_word(®->mailbox_out[3]); qla2x00_async_event(vha, rsp, mb); break; case 0x13: @@ -3976,7 +3976,7 @@ qla8044_intr_handler(int irq, void *dev_id) break; } } - WRT_REG_DWORD(®->host_int, 0); + wrt_reg_dword(®->host_int, 0); } qla2x00_handle_mbx_completion(ha, status); diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 017f4e0f1b58..2aa8ea6e1ceb 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1221,9 +1221,9 @@ uint32_t qla2x00_isp_reg_stat(struct qla_hw_data *ha) struct device_reg_82xx __iomem *reg82 = &ha->iobase->isp82; if (IS_P3P_TYPE(ha)) - return ((RD_REG_DWORD(®82->host_int)) == ISP_REG_DISCONNECT); + return ((rd_reg_dword(®82->host_int)) == ISP_REG_DISCONNECT); else - return ((RD_REG_DWORD(®->host_status)) == + return ((rd_reg_dword(®->host_status)) == ISP_REG_DISCONNECT); } @@ -1907,8 +1907,8 @@ qla2x00_enable_intrs(struct qla_hw_data *ha) spin_lock_irqsave(&ha->hardware_lock, flags); ha->interrupts_on = 1; /* enable risc and host interrupts */ - WRT_REG_WORD(®->ictrl, ICR_EN_INT | ICR_EN_RISC); - RD_REG_WORD(®->ictrl); + wrt_reg_word(®->ictrl, ICR_EN_INT | ICR_EN_RISC); + rd_reg_word(®->ictrl); spin_unlock_irqrestore(&ha->hardware_lock, flags); } @@ -1922,8 +1922,8 @@ qla2x00_disable_intrs(struct qla_hw_data *ha) spin_lock_irqsave(&ha->hardware_lock, flags); ha->interrupts_on = 0; /* disable risc and host interrupts */ - WRT_REG_WORD(®->ictrl, 0); - RD_REG_WORD(®->ictrl); + wrt_reg_word(®->ictrl, 0); + rd_reg_word(®->ictrl); spin_unlock_irqrestore(&ha->hardware_lock, flags); } @@ -1935,8 +1935,8 @@ qla24xx_enable_intrs(struct qla_hw_data *ha) spin_lock_irqsave(&ha->hardware_lock, flags); ha->interrupts_on = 1; - WRT_REG_DWORD(®->ictrl, ICRX_EN_RISC_INT); - RD_REG_DWORD(®->ictrl); + wrt_reg_dword(®->ictrl, ICRX_EN_RISC_INT); + rd_reg_dword(®->ictrl); spin_unlock_irqrestore(&ha->hardware_lock, flags); } @@ -1950,8 +1950,8 @@ qla24xx_disable_intrs(struct qla_hw_data *ha) return; spin_lock_irqsave(&ha->hardware_lock, flags); ha->interrupts_on = 0; - WRT_REG_DWORD(®->ictrl, 0); - RD_REG_DWORD(®->ictrl); + wrt_reg_dword(®->ictrl, 0); + rd_reg_dword(®->ictrl); spin_unlock_irqrestore(&ha->hardware_lock, flags); } @@ -7558,15 +7558,15 @@ qla2xxx_pci_mmio_enabled(struct pci_dev *pdev) spin_lock_irqsave(&ha->hardware_lock, flags); if (IS_QLA2100(ha) || IS_QLA2200(ha)){ - stat = RD_REG_WORD(®->hccr); + stat = rd_reg_word(®->hccr); if (stat & HCCR_RISC_PAUSE) risc_paused = 1; } else if (IS_QLA23XX(ha)) { - stat = RD_REG_DWORD(®->u.isp2300.host_status); + stat = rd_reg_dword(®->u.isp2300.host_status); if (stat & HSR_RISC_PAUSED) risc_paused = 1; } else if (IS_FWI2_CAPABLE(ha)) { - stat = RD_REG_DWORD(®24->host_status); + stat = rd_reg_dword(®24->host_status); if (stat & HSRX_RISC_PAUSED) risc_paused = 1; } diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c index 57ffbf9d7dbf..da984d7552d5 100644 --- a/drivers/scsi/qla2xxx/qla_sup.c +++ b/drivers/scsi/qla2xxx/qla_sup.c @@ -26,24 +26,24 @@ qla2x00_lock_nvram_access(struct qla_hw_data *ha) struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; if (!IS_QLA2100(ha) && !IS_QLA2200(ha) && !IS_QLA2300(ha)) { - data = RD_REG_WORD(®->nvram); + data = rd_reg_word(®->nvram); while (data & NVR_BUSY) { udelay(100); - data = RD_REG_WORD(®->nvram); + data = rd_reg_word(®->nvram); } /* Lock resource */ - WRT_REG_WORD(®->u.isp2300.host_semaphore, 0x1); - RD_REG_WORD(®->u.isp2300.host_semaphore); + wrt_reg_word(®->u.isp2300.host_semaphore, 0x1); + rd_reg_word(®->u.isp2300.host_semaphore); udelay(5); - data = RD_REG_WORD(®->u.isp2300.host_semaphore); + data = rd_reg_word(®->u.isp2300.host_semaphore); while ((data & BIT_0) == 0) { /* Lock failed */ udelay(100); - WRT_REG_WORD(®->u.isp2300.host_semaphore, 0x1); - RD_REG_WORD(®->u.isp2300.host_semaphore); + wrt_reg_word(®->u.isp2300.host_semaphore, 0x1); + rd_reg_word(®->u.isp2300.host_semaphore); udelay(5); - data = RD_REG_WORD(®->u.isp2300.host_semaphore); + data = rd_reg_word(®->u.isp2300.host_semaphore); } } } @@ -58,8 +58,8 @@ qla2x00_unlock_nvram_access(struct qla_hw_data *ha) struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; if (!IS_QLA2100(ha) && !IS_QLA2200(ha) && !IS_QLA2300(ha)) { - WRT_REG_WORD(®->u.isp2300.host_semaphore, 0); - RD_REG_WORD(®->u.isp2300.host_semaphore); + wrt_reg_word(®->u.isp2300.host_semaphore, 0); + rd_reg_word(®->u.isp2300.host_semaphore); } } @@ -73,15 +73,15 @@ qla2x00_nv_write(struct qla_hw_data *ha, uint16_t data) { struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; - WRT_REG_WORD(®->nvram, data | NVR_SELECT | NVR_WRT_ENABLE); - RD_REG_WORD(®->nvram); /* PCI Posting. */ + wrt_reg_word(®->nvram, data | NVR_SELECT | NVR_WRT_ENABLE); + rd_reg_word(®->nvram); /* PCI Posting. */ NVRAM_DELAY(); - WRT_REG_WORD(®->nvram, data | NVR_SELECT | NVR_CLOCK | + wrt_reg_word(®->nvram, data | NVR_SELECT | NVR_CLOCK | NVR_WRT_ENABLE); - RD_REG_WORD(®->nvram); /* PCI Posting. */ + rd_reg_word(®->nvram); /* PCI Posting. */ NVRAM_DELAY(); - WRT_REG_WORD(®->nvram, data | NVR_SELECT | NVR_WRT_ENABLE); - RD_REG_WORD(®->nvram); /* PCI Posting. */ + wrt_reg_word(®->nvram, data | NVR_SELECT | NVR_WRT_ENABLE); + rd_reg_word(®->nvram); /* PCI Posting. */ NVRAM_DELAY(); } @@ -120,21 +120,21 @@ qla2x00_nvram_request(struct qla_hw_data *ha, uint32_t nv_cmd) /* Read data from NVRAM. */ for (cnt = 0; cnt < 16; cnt++) { - WRT_REG_WORD(®->nvram, NVR_SELECT | NVR_CLOCK); - RD_REG_WORD(®->nvram); /* PCI Posting. */ + wrt_reg_word(®->nvram, NVR_SELECT | NVR_CLOCK); + rd_reg_word(®->nvram); /* PCI Posting. */ NVRAM_DELAY(); data <<= 1; - reg_data = RD_REG_WORD(®->nvram); + reg_data = rd_reg_word(®->nvram); if (reg_data & NVR_DATA_IN) data |= BIT_0; - WRT_REG_WORD(®->nvram, NVR_SELECT); - RD_REG_WORD(®->nvram); /* PCI Posting. */ + wrt_reg_word(®->nvram, NVR_SELECT); + rd_reg_word(®->nvram); /* PCI Posting. */ NVRAM_DELAY(); } /* Deselect chip. */ - WRT_REG_WORD(®->nvram, NVR_DESELECT); - RD_REG_WORD(®->nvram); /* PCI Posting. */ + wrt_reg_word(®->nvram, NVR_DESELECT); + rd_reg_word(®->nvram); /* PCI Posting. */ NVRAM_DELAY(); return data; @@ -171,8 +171,8 @@ qla2x00_nv_deselect(struct qla_hw_data *ha) { struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; - WRT_REG_WORD(®->nvram, NVR_DESELECT); - RD_REG_WORD(®->nvram); /* PCI Posting. */ + wrt_reg_word(®->nvram, NVR_DESELECT); + rd_reg_word(®->nvram); /* PCI Posting. */ NVRAM_DELAY(); } @@ -216,8 +216,8 @@ qla2x00_write_nvram_word(struct qla_hw_data *ha, uint32_t addr, uint16_t data) qla2x00_nv_deselect(ha); /* Wait for NVRAM to become ready */ - WRT_REG_WORD(®->nvram, NVR_SELECT); - RD_REG_WORD(®->nvram); /* PCI Posting. */ + wrt_reg_word(®->nvram, NVR_SELECT); + rd_reg_word(®->nvram); /* PCI Posting. */ wait_cnt = NVR_WAIT_CNT; do { if (!--wait_cnt) { @@ -226,7 +226,7 @@ qla2x00_write_nvram_word(struct qla_hw_data *ha, uint32_t addr, uint16_t data) break; } NVRAM_DELAY(); - word = RD_REG_WORD(®->nvram); + word = rd_reg_word(®->nvram); } while ((word & NVR_DATA_IN) == 0); qla2x00_nv_deselect(ha); @@ -275,11 +275,11 @@ qla2x00_write_nvram_word_tmo(struct qla_hw_data *ha, uint32_t addr, qla2x00_nv_deselect(ha); /* Wait for NVRAM to become ready */ - WRT_REG_WORD(®->nvram, NVR_SELECT); - RD_REG_WORD(®->nvram); /* PCI Posting. */ + wrt_reg_word(®->nvram, NVR_SELECT); + rd_reg_word(®->nvram); /* PCI Posting. */ do { NVRAM_DELAY(); - word = RD_REG_WORD(®->nvram); + word = rd_reg_word(®->nvram); if (!--tmo) { ret = QLA_FUNCTION_FAILED; break; @@ -347,8 +347,8 @@ qla2x00_clear_nvram_protection(struct qla_hw_data *ha) qla2x00_nv_deselect(ha); /* Wait for NVRAM to become ready. */ - WRT_REG_WORD(®->nvram, NVR_SELECT); - RD_REG_WORD(®->nvram); /* PCI Posting. */ + wrt_reg_word(®->nvram, NVR_SELECT); + rd_reg_word(®->nvram); /* PCI Posting. */ wait_cnt = NVR_WAIT_CNT; do { if (!--wait_cnt) { @@ -357,7 +357,7 @@ qla2x00_clear_nvram_protection(struct qla_hw_data *ha) break; } NVRAM_DELAY(); - word = RD_REG_WORD(®->nvram); + word = rd_reg_word(®->nvram); } while ((word & NVR_DATA_IN) == 0); if (wait_cnt) @@ -407,8 +407,8 @@ qla2x00_set_nvram_protection(struct qla_hw_data *ha, int stat) qla2x00_nv_deselect(ha); /* Wait for NVRAM to become ready. */ - WRT_REG_WORD(®->nvram, NVR_SELECT); - RD_REG_WORD(®->nvram); /* PCI Posting. */ + wrt_reg_word(®->nvram, NVR_SELECT); + rd_reg_word(®->nvram); /* PCI Posting. */ wait_cnt = NVR_WAIT_CNT; do { if (!--wait_cnt) { @@ -417,7 +417,7 @@ qla2x00_set_nvram_protection(struct qla_hw_data *ha, int stat) break; } NVRAM_DELAY(); - word = RD_REG_WORD(®->nvram); + word = rd_reg_word(®->nvram); } while ((word & NVR_DATA_IN) == 0); } @@ -456,11 +456,11 @@ qla24xx_read_flash_dword(struct qla_hw_data *ha, uint32_t addr, uint32_t *data) struct device_reg_24xx __iomem *reg = &ha->iobase->isp24; ulong cnt = 30000; - WRT_REG_DWORD(®->flash_addr, addr & ~FARX_DATA_FLAG); + wrt_reg_dword(®->flash_addr, addr & ~FARX_DATA_FLAG); while (cnt--) { - if (RD_REG_DWORD(®->flash_addr) & FARX_DATA_FLAG) { - *data = RD_REG_DWORD(®->flash_data); + if (rd_reg_dword(®->flash_addr) & FARX_DATA_FLAG) { + *data = rd_reg_dword(®->flash_data); return QLA_SUCCESS; } udelay(10); @@ -499,11 +499,11 @@ qla24xx_write_flash_dword(struct qla_hw_data *ha, uint32_t addr, uint32_t data) struct device_reg_24xx __iomem *reg = &ha->iobase->isp24; ulong cnt = 500000; - WRT_REG_DWORD(®->flash_data, data); - WRT_REG_DWORD(®->flash_addr, addr | FARX_DATA_FLAG); + wrt_reg_dword(®->flash_data, data); + wrt_reg_dword(®->flash_addr, addr | FARX_DATA_FLAG); while (cnt--) { - if (!(RD_REG_DWORD(®->flash_addr) & FARX_DATA_FLAG)) + if (!(rd_reg_dword(®->flash_addr) & FARX_DATA_FLAG)) return QLA_SUCCESS; udelay(10); cond_resched(); @@ -1197,9 +1197,9 @@ qla24xx_unprotect_flash(scsi_qla_host_t *vha) return qla81xx_fac_do_write_enable(vha, 1); /* Enable flash write. */ - WRT_REG_DWORD(®->ctrl_status, - RD_REG_DWORD(®->ctrl_status) | CSRX_FLASH_ENABLE); - RD_REG_DWORD(®->ctrl_status); /* PCI Posting. */ + wrt_reg_dword(®->ctrl_status, + rd_reg_dword(®->ctrl_status) | CSRX_FLASH_ENABLE); + rd_reg_dword(®->ctrl_status); /* PCI Posting. */ if (!ha->fdt_wrt_disable) goto done; @@ -1240,8 +1240,8 @@ qla24xx_protect_flash(scsi_qla_host_t *vha) skip_wrt_protect: /* Disable flash write. */ - WRT_REG_DWORD(®->ctrl_status, - RD_REG_DWORD(®->ctrl_status) & ~CSRX_FLASH_ENABLE); + wrt_reg_dword(®->ctrl_status, + rd_reg_dword(®->ctrl_status) & ~CSRX_FLASH_ENABLE); return QLA_SUCCESS; } @@ -1466,9 +1466,9 @@ qla24xx_write_nvram_data(scsi_qla_host_t *vha, void *buf, uint32_t naddr, return ret; /* Enable flash write. */ - WRT_REG_DWORD(®->ctrl_status, - RD_REG_DWORD(®->ctrl_status) | CSRX_FLASH_ENABLE); - RD_REG_DWORD(®->ctrl_status); /* PCI Posting. */ + wrt_reg_dword(®->ctrl_status, + rd_reg_dword(®->ctrl_status) | CSRX_FLASH_ENABLE); + rd_reg_dword(®->ctrl_status); /* PCI Posting. */ /* Disable NVRAM write-protection. */ qla24xx_write_flash_dword(ha, nvram_conf_addr(ha, 0x101), 0); @@ -1490,9 +1490,9 @@ qla24xx_write_nvram_data(scsi_qla_host_t *vha, void *buf, uint32_t naddr, qla24xx_write_flash_dword(ha, nvram_conf_addr(ha, 0x101), 0x8c); /* Disable flash write. */ - WRT_REG_DWORD(®->ctrl_status, - RD_REG_DWORD(®->ctrl_status) & ~CSRX_FLASH_ENABLE); - RD_REG_DWORD(®->ctrl_status); /* PCI Posting. */ + wrt_reg_dword(®->ctrl_status, + rd_reg_dword(®->ctrl_status) & ~CSRX_FLASH_ENABLE); + rd_reg_dword(®->ctrl_status); /* PCI Posting. */ return ret; } @@ -1588,8 +1588,8 @@ qla2x00_beacon_blink(struct scsi_qla_host *vha) gpio_enable = RD_REG_WORD_PIO(PIO_REG(ha, gpioe)); gpio_data = RD_REG_WORD_PIO(PIO_REG(ha, gpiod)); } else { - gpio_enable = RD_REG_WORD(®->gpioe); - gpio_data = RD_REG_WORD(®->gpiod); + gpio_enable = rd_reg_word(®->gpioe); + gpio_data = rd_reg_word(®->gpiod); } /* Set the modified gpio_enable values */ @@ -1598,8 +1598,8 @@ qla2x00_beacon_blink(struct scsi_qla_host *vha) if (ha->pio_address) { WRT_REG_WORD_PIO(PIO_REG(ha, gpioe), gpio_enable); } else { - WRT_REG_WORD(®->gpioe, gpio_enable); - RD_REG_WORD(®->gpioe); + wrt_reg_word(®->gpioe, gpio_enable); + rd_reg_word(®->gpioe); } qla2x00_flip_colors(ha, &led_color); @@ -1614,8 +1614,8 @@ qla2x00_beacon_blink(struct scsi_qla_host *vha) if (ha->pio_address) { WRT_REG_WORD_PIO(PIO_REG(ha, gpiod), gpio_data); } else { - WRT_REG_WORD(®->gpiod, gpio_data); - RD_REG_WORD(®->gpiod); + wrt_reg_word(®->gpiod, gpio_data); + rd_reg_word(®->gpiod); } spin_unlock_irqrestore(&ha->hardware_lock, flags); @@ -1645,8 +1645,8 @@ qla2x00_beacon_on(struct scsi_qla_host *vha) gpio_enable = RD_REG_WORD_PIO(PIO_REG(ha, gpioe)); gpio_data = RD_REG_WORD_PIO(PIO_REG(ha, gpiod)); } else { - gpio_enable = RD_REG_WORD(®->gpioe); - gpio_data = RD_REG_WORD(®->gpiod); + gpio_enable = rd_reg_word(®->gpioe); + gpio_data = rd_reg_word(®->gpiod); } gpio_enable |= GPIO_LED_MASK; @@ -1654,8 +1654,8 @@ qla2x00_beacon_on(struct scsi_qla_host *vha) if (ha->pio_address) { WRT_REG_WORD_PIO(PIO_REG(ha, gpioe), gpio_enable); } else { - WRT_REG_WORD(®->gpioe, gpio_enable); - RD_REG_WORD(®->gpioe); + wrt_reg_word(®->gpioe, gpio_enable); + rd_reg_word(®->gpioe); } /* Clear out previously set LED colour. */ @@ -1663,8 +1663,8 @@ qla2x00_beacon_on(struct scsi_qla_host *vha) if (ha->pio_address) { WRT_REG_WORD_PIO(PIO_REG(ha, gpiod), gpio_data); } else { - WRT_REG_WORD(®->gpiod, gpio_data); - RD_REG_WORD(®->gpiod); + wrt_reg_word(®->gpiod, gpio_data); + rd_reg_word(®->gpiod); } spin_unlock_irqrestore(&ha->hardware_lock, flags); @@ -1731,13 +1731,13 @@ qla24xx_beacon_blink(struct scsi_qla_host *vha) /* Save the Original GPIOD. */ spin_lock_irqsave(&ha->hardware_lock, flags); - gpio_data = RD_REG_DWORD(®->gpiod); + gpio_data = rd_reg_dword(®->gpiod); /* Enable the gpio_data reg for update. */ gpio_data |= GPDX_LED_UPDATE_MASK; - WRT_REG_DWORD(®->gpiod, gpio_data); - gpio_data = RD_REG_DWORD(®->gpiod); + wrt_reg_dword(®->gpiod, gpio_data); + gpio_data = rd_reg_dword(®->gpiod); /* Set the color bits. */ qla24xx_flip_colors(ha, &led_color); @@ -1749,8 +1749,8 @@ qla24xx_beacon_blink(struct scsi_qla_host *vha) gpio_data |= led_color; /* Set the modified gpio_data values. */ - WRT_REG_DWORD(®->gpiod, gpio_data); - gpio_data = RD_REG_DWORD(®->gpiod); + wrt_reg_dword(®->gpiod, gpio_data); + gpio_data = rd_reg_dword(®->gpiod); spin_unlock_irqrestore(&ha->hardware_lock, flags); } @@ -1881,12 +1881,12 @@ qla24xx_beacon_on(struct scsi_qla_host *vha) goto skip_gpio; spin_lock_irqsave(&ha->hardware_lock, flags); - gpio_data = RD_REG_DWORD(®->gpiod); + gpio_data = rd_reg_dword(®->gpiod); /* Enable the gpio_data reg for update. */ gpio_data |= GPDX_LED_UPDATE_MASK; - WRT_REG_DWORD(®->gpiod, gpio_data); - RD_REG_DWORD(®->gpiod); + wrt_reg_dword(®->gpiod, gpio_data); + rd_reg_dword(®->gpiod); spin_unlock_irqrestore(&ha->hardware_lock, flags); } @@ -1929,12 +1929,12 @@ qla24xx_beacon_off(struct scsi_qla_host *vha) /* Give control back to firmware. */ spin_lock_irqsave(&ha->hardware_lock, flags); - gpio_data = RD_REG_DWORD(®->gpiod); + gpio_data = rd_reg_dword(®->gpiod); /* Disable the gpio_data reg for update. */ gpio_data &= ~GPDX_LED_UPDATE_MASK; - WRT_REG_DWORD(®->gpiod, gpio_data); - RD_REG_DWORD(®->gpiod); + wrt_reg_dword(®->gpiod, gpio_data); + rd_reg_dword(®->gpiod); spin_unlock_irqrestore(&ha->hardware_lock, flags); set_fw_options: @@ -1970,10 +1970,10 @@ qla2x00_flash_enable(struct qla_hw_data *ha) uint16_t data; struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; - data = RD_REG_WORD(®->ctrl_status); + data = rd_reg_word(®->ctrl_status); data |= CSR_FLASH_ENABLE; - WRT_REG_WORD(®->ctrl_status, data); - RD_REG_WORD(®->ctrl_status); /* PCI Posting. */ + wrt_reg_word(®->ctrl_status, data); + rd_reg_word(®->ctrl_status); /* PCI Posting. */ } /** @@ -1986,10 +1986,10 @@ qla2x00_flash_disable(struct qla_hw_data *ha) uint16_t data; struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; - data = RD_REG_WORD(®->ctrl_status); + data = rd_reg_word(®->ctrl_status); data &= ~(CSR_FLASH_ENABLE); - WRT_REG_WORD(®->ctrl_status, data); - RD_REG_WORD(®->ctrl_status); /* PCI Posting. */ + wrt_reg_word(®->ctrl_status, data); + rd_reg_word(®->ctrl_status); /* PCI Posting. */ } /** @@ -2008,7 +2008,7 @@ qla2x00_read_flash_byte(struct qla_hw_data *ha, uint32_t addr) uint16_t bank_select; struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; - bank_select = RD_REG_WORD(®->ctrl_status); + bank_select = rd_reg_word(®->ctrl_status); if (IS_QLA2322(ha) || IS_QLA6322(ha)) { /* Specify 64K address range: */ @@ -2016,11 +2016,11 @@ qla2x00_read_flash_byte(struct qla_hw_data *ha, uint32_t addr) bank_select &= ~0xf8; bank_select |= addr >> 12 & 0xf0; bank_select |= CSR_FLASH_64K_BANK; - WRT_REG_WORD(®->ctrl_status, bank_select); - RD_REG_WORD(®->ctrl_status); /* PCI Posting. */ + wrt_reg_word(®->ctrl_status, bank_select); + rd_reg_word(®->ctrl_status); /* PCI Posting. */ - WRT_REG_WORD(®->flash_address, (uint16_t)addr); - data = RD_REG_WORD(®->flash_data); + wrt_reg_word(®->flash_address, (uint16_t)addr); + data = rd_reg_word(®->flash_data); return (uint8_t)data; } @@ -2028,13 +2028,13 @@ qla2x00_read_flash_byte(struct qla_hw_data *ha, uint32_t addr) /* Setup bit 16 of flash address. */ if ((addr & BIT_16) && ((bank_select & CSR_FLASH_64K_BANK) == 0)) { bank_select |= CSR_FLASH_64K_BANK; - WRT_REG_WORD(®->ctrl_status, bank_select); - RD_REG_WORD(®->ctrl_status); /* PCI Posting. */ + wrt_reg_word(®->ctrl_status, bank_select); + rd_reg_word(®->ctrl_status); /* PCI Posting. */ } else if (((addr & BIT_16) == 0) && (bank_select & CSR_FLASH_64K_BANK)) { bank_select &= ~(CSR_FLASH_64K_BANK); - WRT_REG_WORD(®->ctrl_status, bank_select); - RD_REG_WORD(®->ctrl_status); /* PCI Posting. */ + wrt_reg_word(®->ctrl_status, bank_select); + rd_reg_word(®->ctrl_status); /* PCI Posting. */ } /* Always perform IO mapped accesses to the FLASH registers. */ @@ -2049,7 +2049,7 @@ qla2x00_read_flash_byte(struct qla_hw_data *ha, uint32_t addr) data2 = RD_REG_WORD_PIO(PIO_REG(ha, flash_data)); } while (data != data2); } else { - WRT_REG_WORD(®->flash_address, (uint16_t)addr); + wrt_reg_word(®->flash_address, (uint16_t)addr); data = qla2x00_debounce_register(®->flash_data); } @@ -2068,20 +2068,20 @@ qla2x00_write_flash_byte(struct qla_hw_data *ha, uint32_t addr, uint8_t data) uint16_t bank_select; struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; - bank_select = RD_REG_WORD(®->ctrl_status); + bank_select = rd_reg_word(®->ctrl_status); if (IS_QLA2322(ha) || IS_QLA6322(ha)) { /* Specify 64K address range: */ /* clear out Module Select and Flash Address bits [19:16]. */ bank_select &= ~0xf8; bank_select |= addr >> 12 & 0xf0; bank_select |= CSR_FLASH_64K_BANK; - WRT_REG_WORD(®->ctrl_status, bank_select); - RD_REG_WORD(®->ctrl_status); /* PCI Posting. */ + wrt_reg_word(®->ctrl_status, bank_select); + rd_reg_word(®->ctrl_status); /* PCI Posting. */ - WRT_REG_WORD(®->flash_address, (uint16_t)addr); - RD_REG_WORD(®->ctrl_status); /* PCI Posting. */ - WRT_REG_WORD(®->flash_data, (uint16_t)data); - RD_REG_WORD(®->ctrl_status); /* PCI Posting. */ + wrt_reg_word(®->flash_address, (uint16_t)addr); + rd_reg_word(®->ctrl_status); /* PCI Posting. */ + wrt_reg_word(®->flash_data, (uint16_t)data); + rd_reg_word(®->ctrl_status); /* PCI Posting. */ return; } @@ -2089,13 +2089,13 @@ qla2x00_write_flash_byte(struct qla_hw_data *ha, uint32_t addr, uint8_t data) /* Setup bit 16 of flash address. */ if ((addr & BIT_16) && ((bank_select & CSR_FLASH_64K_BANK) == 0)) { bank_select |= CSR_FLASH_64K_BANK; - WRT_REG_WORD(®->ctrl_status, bank_select); - RD_REG_WORD(®->ctrl_status); /* PCI Posting. */ + wrt_reg_word(®->ctrl_status, bank_select); + rd_reg_word(®->ctrl_status); /* PCI Posting. */ } else if (((addr & BIT_16) == 0) && (bank_select & CSR_FLASH_64K_BANK)) { bank_select &= ~(CSR_FLASH_64K_BANK); - WRT_REG_WORD(®->ctrl_status, bank_select); - RD_REG_WORD(®->ctrl_status); /* PCI Posting. */ + wrt_reg_word(®->ctrl_status, bank_select); + rd_reg_word(®->ctrl_status); /* PCI Posting. */ } /* Always perform IO mapped accesses to the FLASH registers. */ @@ -2103,10 +2103,10 @@ qla2x00_write_flash_byte(struct qla_hw_data *ha, uint32_t addr, uint8_t data) WRT_REG_WORD_PIO(PIO_REG(ha, flash_address), (uint16_t)addr); WRT_REG_WORD_PIO(PIO_REG(ha, flash_data), (uint16_t)data); } else { - WRT_REG_WORD(®->flash_address, (uint16_t)addr); - RD_REG_WORD(®->ctrl_status); /* PCI Posting. */ - WRT_REG_WORD(®->flash_data, (uint16_t)data); - RD_REG_WORD(®->ctrl_status); /* PCI Posting. */ + wrt_reg_word(®->flash_address, (uint16_t)addr); + rd_reg_word(®->ctrl_status); /* PCI Posting. */ + wrt_reg_word(®->flash_data, (uint16_t)data); + rd_reg_word(®->ctrl_status); /* PCI Posting. */ } } @@ -2289,12 +2289,12 @@ qla2x00_read_flash_data(struct qla_hw_data *ha, uint8_t *tmp_buf, midpoint = length / 2; - WRT_REG_WORD(®->nvram, 0); - RD_REG_WORD(®->nvram); + wrt_reg_word(®->nvram, 0); + rd_reg_word(®->nvram); for (ilength = 0; ilength < length; saddr++, ilength++, tmp_buf++) { if (ilength == midpoint) { - WRT_REG_WORD(®->nvram, NVR_SELECT); - RD_REG_WORD(®->nvram); + wrt_reg_word(®->nvram, NVR_SELECT); + rd_reg_word(®->nvram); } data = qla2x00_read_flash_byte(ha, saddr); if (saddr % 100) @@ -2319,11 +2319,11 @@ qla2x00_suspend_hba(struct scsi_qla_host *vha) /* Pause RISC. */ spin_lock_irqsave(&ha->hardware_lock, flags); - WRT_REG_WORD(®->hccr, HCCR_PAUSE_RISC); - RD_REG_WORD(®->hccr); + wrt_reg_word(®->hccr, HCCR_PAUSE_RISC); + rd_reg_word(®->hccr); if (IS_QLA2100(ha) || IS_QLA2200(ha) || IS_QLA2300(ha)) { for (cnt = 0; cnt < 30000; cnt++) { - if ((RD_REG_WORD(®->hccr) & HCCR_RISC_PAUSE) != 0) + if ((rd_reg_word(®->hccr) & HCCR_RISC_PAUSE) != 0) break; udelay(100); } @@ -2362,12 +2362,12 @@ qla2x00_read_optrom_data(struct scsi_qla_host *vha, void *buf, midpoint = ha->optrom_size / 2; qla2x00_flash_enable(ha); - WRT_REG_WORD(®->nvram, 0); - RD_REG_WORD(®->nvram); /* PCI Posting. */ + wrt_reg_word(®->nvram, 0); + rd_reg_word(®->nvram); /* PCI Posting. */ for (addr = offset, data = buf; addr < length; addr++, data++) { if (addr == midpoint) { - WRT_REG_WORD(®->nvram, NVR_SELECT); - RD_REG_WORD(®->nvram); /* PCI Posting. */ + wrt_reg_word(®->nvram, NVR_SELECT); + rd_reg_word(®->nvram); /* PCI Posting. */ } *data = qla2x00_read_flash_byte(ha, addr); @@ -2399,7 +2399,7 @@ qla2x00_write_optrom_data(struct scsi_qla_host *vha, void *buf, sec_number = 0; /* Reset ISP chip. */ - WRT_REG_WORD(®->ctrl_status, CSR_ISP_SOFT_RESET); + wrt_reg_word(®->ctrl_status, CSR_ISP_SOFT_RESET); pci_read_config_word(ha->pdev, PCI_COMMAND, &wd); /* Go with write. */ @@ -2548,8 +2548,8 @@ qla2x00_write_optrom_data(struct scsi_qla_host *vha, void *buf, } } } else if (addr == ha->optrom_size / 2) { - WRT_REG_WORD(®->nvram, NVR_SELECT); - RD_REG_WORD(®->nvram); + wrt_reg_word(®->nvram, NVR_SELECT); + rd_reg_word(®->nvram); } if (flash_id == 0xda && man_id == 0xc1) { diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 3af8a8a7f997..f7425875f4f9 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -2484,7 +2484,7 @@ static int qlt_check_reserve_free_req(struct qla_qpair *qpair, if (req->cnt < (req_cnt + 2)) { cnt = (uint16_t)(qpair->use_shadow_reg ? *req->out_ptr : - RD_REG_DWORD_RELAXED(req->req_q_out)); + rd_reg_dword_relaxed(req->req_q_out)); if (req->ring_index < cnt) req->cnt = cnt - req->ring_index; @@ -6794,7 +6794,7 @@ qlt_24xx_process_atio_queue(struct scsi_qla_host *vha, uint8_t ha_locked) } /* Adjust ring index */ - WRT_REG_DWORD(ISP_ATIO_Q_OUT(vha), ha->tgt.atio_ring_index); + wrt_reg_dword(ISP_ATIO_Q_OUT(vha), ha->tgt.atio_ring_index); } void @@ -6807,9 +6807,9 @@ qlt_24xx_config_rings(struct scsi_qla_host *vha) if (!QLA_TGT_MODE_ENABLED()) return; - WRT_REG_DWORD(ISP_ATIO_Q_IN(vha), 0); - WRT_REG_DWORD(ISP_ATIO_Q_OUT(vha), 0); - RD_REG_DWORD(ISP_ATIO_Q_OUT(vha)); + wrt_reg_dword(ISP_ATIO_Q_IN(vha), 0); + wrt_reg_dword(ISP_ATIO_Q_OUT(vha), 0); + rd_reg_dword(ISP_ATIO_Q_OUT(vha)); if (ha->flags.msix_enabled) { if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) { diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c index 645496091186..f05a4fa2b9d7 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.c +++ b/drivers/scsi/qla2xxx/qla_tmpl.c @@ -22,9 +22,9 @@ qla27xx_write_remote_reg(struct scsi_qla_host *vha, ql_dbg(ql_dbg_misc, vha, 0xd300, "%s: addr/data = %xh/%xh\n", __func__, addr, data); - WRT_REG_DWORD(®->iobase_addr, 0x40); - WRT_REG_DWORD(®->iobase_c4, data); - WRT_REG_DWORD(®->iobase_window, addr); + wrt_reg_dword(®->iobase_addr, 0x40); + wrt_reg_dword(®->iobase_c4, data); + wrt_reg_dword(®->iobase_window, addr); } void @@ -75,7 +75,7 @@ qla27xx_read8(void __iomem *window, void *buf, ulong *len) uint8_t value = ~0; if (buf) { - value = RD_REG_BYTE(window); + value = rd_reg_byte(window); } qla27xx_insert32(value, buf, len); } @@ -86,7 +86,7 @@ qla27xx_read16(void __iomem *window, void *buf, ulong *len) uint16_t value = ~0; if (buf) { - value = RD_REG_WORD(window); + value = rd_reg_word(window); } qla27xx_insert32(value, buf, len); } @@ -97,7 +97,7 @@ qla27xx_read32(void __iomem *window, void *buf, ulong *len) uint32_t value = ~0; if (buf) { - value = RD_REG_DWORD(window); + value = rd_reg_dword(window); } qla27xx_insert32(value, buf, len); } @@ -126,7 +126,7 @@ qla27xx_write_reg(__iomem struct device_reg_24xx *reg, if (buf) { void __iomem *window = (void __iomem *)reg + offset; - WRT_REG_DWORD(window, data); + wrt_reg_dword(window, data); } } From ab053c09ee2066a2fe62a755f1e64dbc8eddc17c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2020 14:17:09 -0700 Subject: [PATCH 419/562] scsi: qla2xxx: Cast explicitly to uint16_t / uint32_t Casting a pointer to void * and relying on an implicit cast from void * to uint16_t or uint32_t suppresses sparse warnings about endianness. Hence cast explicitly to uint16_t and uint32_t. Additionally, remove superfluous void * casts. Link: https://lore.kernel.org/r/20200518211712.11395-13-bvanassche@acm.org Cc: Arun Easi Cc: Nilesh Javali Cc: Daniel Wagner Cc: Himanshu Madhani Cc: Martin Wilck Cc: Roman Bolshakov Reviewed-by: Hannes Reinecke Reviewed-by: Daniel Wagner Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_dbg.c | 4 ++-- drivers/scsi/qla2xxx/qla_init.c | 26 +++++++++++++------------- drivers/scsi/qla2xxx/qla_mbx.c | 6 +++--- drivers/scsi/qla2xxx/qla_mid.c | 4 ++-- drivers/scsi/qla2xxx/qla_mr.c | 4 ++-- drivers/scsi/qla2xxx/qla_nvme.c | 4 ++-- drivers/scsi/qla2xxx/qla_nx2.c | 4 ++-- drivers/scsi/qla2xxx/qla_os.c | 10 +++++----- drivers/scsi/qla2xxx/qla_sup.c | 12 ++++++------ drivers/scsi/qla2xxx/qla_target.c | 4 ++-- 10 files changed, 39 insertions(+), 39 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index fbd8cb5647b6..d020c23a5106 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -115,7 +115,7 @@ qla27xx_dump_mpi_ram(struct qla_hw_data *ha, uint32_t addr, uint32_t *ram, { struct device_reg_24xx __iomem *reg = &ha->iobase->isp24; dma_addr_t dump_dma = ha->gid_list_dma; - uint32_t *chunk = (void *)ha->gid_list; + uint32_t *chunk = (uint32_t *)ha->gid_list; uint32_t dwords = qla2x00_gid_list_size(ha) / 4; uint32_t stat; ulong i, j, timer = 6000000; @@ -195,7 +195,7 @@ qla24xx_dump_ram(struct qla_hw_data *ha, uint32_t addr, uint32_t *ram, int rval = QLA_FUNCTION_FAILED; struct device_reg_24xx __iomem *reg = &ha->iobase->isp24; dma_addr_t dump_dma = ha->gid_list_dma; - uint32_t *chunk = (void *)ha->gid_list; + uint32_t *chunk = (uint32_t *)ha->gid_list; uint32_t dwords = qla2x00_gid_list_size(ha) / 4; uint32_t stat; ulong i, j, timer = 6000000; diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 02614e28451b..135440f4a922 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -992,7 +992,7 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res) ql_dbg(ql_dbg_disc, vha, 0x20e8, "%s %8phC %02x:%02x:%02x CLS %x/%x lid %x \n", - __func__, (void *)&wwn, e->port_id[2], e->port_id[1], + __func__, &wwn, e->port_id[2], e->port_id[1], e->port_id[0], e->current_login_state, e->last_login_state, (loop_id & 0x7fff)); } @@ -1343,7 +1343,7 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) mb[9] = vha->vp_idx; mb[10] = opt; - mbx->u.mbx.in = (void *)pd; + mbx->u.mbx.in = pd; mbx->u.mbx.in_dma = pd_dma; sp->done = qla24xx_async_gpdb_sp_done; @@ -4128,7 +4128,7 @@ qla2x00_init_rings(scsi_qla_host_t *vha) req = ha->req_q_map[que]; if (!req || !test_bit(que, ha->req_qid_map)) continue; - req->out_ptr = (void *)(req->ring + req->length); + req->out_ptr = (uint16_t *)(req->ring + req->length); *req->out_ptr = 0; for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) req->outstanding_cmds[cnt] = NULL; @@ -4145,7 +4145,7 @@ qla2x00_init_rings(scsi_qla_host_t *vha) rsp = ha->rsp_q_map[que]; if (!rsp || !test_bit(que, ha->rsp_qid_map)) continue; - rsp->in_ptr = (void *)(rsp->ring + rsp->length); + rsp->in_ptr = (uint16_t *)(rsp->ring + rsp->length); *rsp->in_ptr = 0; /* Initialize response queue entries */ if (IS_QLAFX00(ha)) @@ -7446,7 +7446,7 @@ qla27xx_check_image_status_signature(struct qla27xx_image_status *image_status) static ulong qla27xx_image_status_checksum(struct qla27xx_image_status *image_status) { - uint32_t *p = (void *)image_status; + uint32_t *p = (uint32_t *)image_status; uint n = sizeof(*image_status) / sizeof(*p); uint32_t sum = 0; @@ -7509,7 +7509,7 @@ qla28xx_get_aux_images( goto check_sec_image; } - qla24xx_read_flash_data(vha, (void *)&pri_aux_image_status, + qla24xx_read_flash_data(vha, (uint32_t *)&pri_aux_image_status, ha->flt_region_aux_img_status_pri, sizeof(pri_aux_image_status) >> 2); qla27xx_print_image(vha, "Primary aux image", &pri_aux_image_status); @@ -7542,7 +7542,7 @@ qla28xx_get_aux_images( goto check_valid_image; } - qla24xx_read_flash_data(vha, (void *)&sec_aux_image_status, + qla24xx_read_flash_data(vha, (uint32_t *)&sec_aux_image_status, ha->flt_region_aux_img_status_sec, sizeof(sec_aux_image_status) >> 2); qla27xx_print_image(vha, "Secondary aux image", &sec_aux_image_status); @@ -7607,7 +7607,7 @@ qla27xx_get_active_image(struct scsi_qla_host *vha, goto check_sec_image; } - if (qla24xx_read_flash_data(vha, (void *)(&pri_image_status), + if (qla24xx_read_flash_data(vha, (uint32_t *)&pri_image_status, ha->flt_region_img_status_pri, sizeof(pri_image_status) >> 2) != QLA_SUCCESS) { WARN_ON_ONCE(true); @@ -7714,7 +7714,7 @@ qla24xx_load_risc_flash(scsi_qla_host_t *vha, uint32_t *srisc_addr, ql_dbg(ql_dbg_init, vha, 0x008b, "FW: Loading firmware from flash (%x).\n", faddr); - dcode = (void *)req->ring; + dcode = (uint32_t *)req->ring; qla24xx_read_flash_data(vha, dcode, faddr, 8); if (qla24xx_risc_firmware_invalid(dcode)) { ql_log(ql_log_fatal, vha, 0x008c, @@ -7727,7 +7727,7 @@ qla24xx_load_risc_flash(scsi_qla_host_t *vha, uint32_t *srisc_addr, return QLA_FUNCTION_FAILED; } - dcode = (void *)req->ring; + dcode = (uint32_t *)req->ring; *srisc_addr = 0; segments = FA_RISC_CODE_SEGMENTS; for (j = 0; j < segments; j++) { @@ -7778,7 +7778,7 @@ qla24xx_load_risc_flash(scsi_qla_host_t *vha, uint32_t *srisc_addr, fwdt->template = NULL; fwdt->length = 0; - dcode = (void *)req->ring; + dcode = (uint32_t *)req->ring; qla24xx_read_flash_data(vha, dcode, faddr, 7); risc_size = be32_to_cpu(dcode[2]); ql_dbg(ql_dbg_init, vha, 0x0161, @@ -7970,7 +7970,7 @@ qla24xx_load_risc_blob(scsi_qla_host_t *vha, uint32_t *srisc_addr) return QLA_FUNCTION_FAILED; } - fwcode = (void *)blob->fw->data; + fwcode = (uint32_t *)blob->fw->data; dcode = fwcode; if (qla24xx_risc_firmware_invalid(dcode)) { ql_log(ql_log_fatal, vha, 0x0093, @@ -7982,7 +7982,7 @@ qla24xx_load_risc_blob(scsi_qla_host_t *vha, uint32_t *srisc_addr) return QLA_FUNCTION_FAILED; } - dcode = (void *)req->ring; + dcode = (uint32_t *)req->ring; *srisc_addr = 0; segments = FA_RISC_CODE_SEGMENTS; for (j = 0; j < segments; j++) { diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 985cae37a8f8..e6ab5f07406d 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -3038,7 +3038,7 @@ qla2x00_get_link_status(scsi_qla_host_t *vha, uint16_t loop_id, int rval; mbx_cmd_t mc; mbx_cmd_t *mcp = &mc; - uint32_t *iter = (void *)stats; + uint32_t *iter = (uint32_t *)stats; ushort dwords = offsetof(typeof(*stats), link_up_cnt)/sizeof(*iter); struct qla_hw_data *ha = vha->hw; @@ -3097,7 +3097,7 @@ qla24xx_get_isp_stats(scsi_qla_host_t *vha, struct link_statistics *stats, int rval; mbx_cmd_t mc; mbx_cmd_t *mcp = &mc; - uint32_t *iter = (void *)stats; + uint32_t *iter = (uint32_t *)stats; ushort dwords = sizeof(*stats)/sizeof(*iter); ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1088, @@ -4736,7 +4736,7 @@ qla82xx_set_driver_version(scsi_qla_host_t *vha, char *version) ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x117b, "Entered %s.\n", __func__); - str = (void *)version; + str = (uint16_t *)version; len = strlen(version); mcp->mb[0] = MBC_SET_RNID_PARAMS; diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index d82e92da529a..15efe2f04b86 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -770,7 +770,7 @@ qla25xx_create_req_que(struct qla_hw_data *ha, uint16_t options, req->req_q_in = ®->isp25mq.req_q_in; req->req_q_out = ®->isp25mq.req_q_out; req->max_q_depth = ha->req_q_map[0]->max_q_depth; - req->out_ptr = (void *)(req->ring + req->length); + req->out_ptr = (uint16_t *)(req->ring + req->length); mutex_unlock(&ha->mq_lock); ql_dbg(ql_dbg_multiq, base_vha, 0xc004, "ring_ptr=%p ring_index=%d, " @@ -884,7 +884,7 @@ qla25xx_create_rsp_que(struct qla_hw_data *ha, uint16_t options, reg = ISP_QUE_REG(ha, que_id); rsp->rsp_q_in = ®->isp25mq.rsp_q_in; rsp->rsp_q_out = ®->isp25mq.rsp_q_out; - rsp->in_ptr = (void *)(rsp->ring + rsp->length); + rsp->in_ptr = (uint16_t *)(rsp->ring + rsp->length); mutex_unlock(&ha->mq_lock); ql_dbg(ql_dbg_multiq, base_vha, 0xc00b, "options=%x id=%d rsp_q_in=%p rsp_q_out=%p\n", diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c index c5be5163b663..908594c1541e 100644 --- a/drivers/scsi/qla2xxx/qla_mr.c +++ b/drivers/scsi/qla2xxx/qla_mr.c @@ -3212,7 +3212,7 @@ qlafx00_tm_iocb(srb_t *sp, struct tsk_mgmt_entry_fx00 *ptm_iocb) sizeof(struct scsi_lun)); } - memcpy((void *)ptm_iocb, &tm_iocb, + memcpy(ptm_iocb, &tm_iocb, sizeof(struct tsk_mgmt_entry_fx00)); wmb(); } @@ -3234,7 +3234,7 @@ qlafx00_abort_iocb(srb_t *sp, struct abort_iocb_entry_fx00 *pabt_iocb) abt_iocb.tgt_id_sts = cpu_to_le16(sp->fcport->tgt_id); abt_iocb.req_que_no = cpu_to_le16(req->id); - memcpy((void *)pabt_iocb, &abt_iocb, + memcpy(pabt_iocb, &abt_iocb, sizeof(struct abort_iocb_entry_fx00)); wmb(); } diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index ad3aa1947e7d..6f20e20559bb 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -295,7 +295,7 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport, sp->name = "nvme_ls"; sp->done = qla_nvme_sp_ls_done; sp->put_fn = qla_nvme_release_ls_cmd_kref; - sp->priv = (void *)priv; + sp->priv = priv; priv->sp = sp; kref_init(&sp->cmd_kref); spin_lock_init(&priv->cmd_lock); @@ -560,7 +560,7 @@ static int qla_nvme_post_cmd(struct nvme_fc_local_port *lport, init_waitqueue_head(&sp->nvme_ls_waitq); kref_init(&sp->cmd_kref); spin_lock_init(&priv->cmd_lock); - sp->priv = (void *)priv; + sp->priv = priv; priv->sp = sp; sp->type = SRB_NVME_CMD; sp->name = "nvme_cmd"; diff --git a/drivers/scsi/qla2xxx/qla_nx2.c b/drivers/scsi/qla2xxx/qla_nx2.c index a46830a99968..50e57603ce3d 100644 --- a/drivers/scsi/qla2xxx/qla_nx2.c +++ b/drivers/scsi/qla2xxx/qla_nx2.c @@ -2965,7 +2965,7 @@ qla8044_minidump_pex_dma_read(struct scsi_qla_host *vha, /* Prepare: Write pex-dma descriptor to MS memory. */ rval = qla8044_ms_mem_write_128b(vha, - m_hdr->desc_card_addr, (void *)&dma_desc, + m_hdr->desc_card_addr, (uint32_t *)&dma_desc, (sizeof(struct qla8044_pex_dma_descriptor)/16)); if (rval) { ql_log(ql_log_warn, vha, 0xb14a, @@ -2987,7 +2987,7 @@ qla8044_minidump_pex_dma_read(struct scsi_qla_host *vha, read_size += chunk_size; } - *d_ptr = (void *)data_ptr; + *d_ptr = (uint32_t *)data_ptr; error_exit: if (rdmem_buffer) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 2aa8ea6e1ceb..85c369fed9c5 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -5915,7 +5915,7 @@ void qla24xx_process_purex_rdp(struct scsi_qla_host *vha, void *pkt) ql_dbg(ql_dbg_init + ql_dbg_verbose, vha, 0x0181, "-------- ELS REQ -------\n"); ql_dump_buffer(ql_dbg_init + ql_dbg_verbose, vha, 0x0182, - (void *)purex, sizeof(*purex)); + purex, sizeof(*purex)); if (qla25xx_rdp_rsp_reduce_size(vha, purex)) { rsp_payload_length = @@ -6031,7 +6031,7 @@ void qla24xx_process_purex_rdp(struct scsi_qla_host *vha, void *pkt) memset(sfp, 0, SFP_RTDI_LEN); rval = qla2x00_read_sfp(vha, sfp_dma, sfp, 0xa2, 0x60, 10, 0); if (!rval) { - uint16_t *trx = (void *)sfp; /* already be16 */ + uint16_t *trx = (uint16_t *)sfp; /* already be16 */ rsp_payload->sfp_diag_desc.temperature = trx[0]; rsp_payload->sfp_diag_desc.vcc = trx[1]; rsp_payload->sfp_diag_desc.tx_bias = trx[2]; @@ -6140,7 +6140,7 @@ void qla24xx_process_purex_rdp(struct scsi_qla_host *vha, void *pkt) memset(sfp, 0, SFP_RTDI_LEN); rval = qla2x00_read_sfp(vha, sfp_dma, sfp, 0xa2, 0, 64, 0); if (!rval) { - uint16_t *trx = (void *)sfp; /* already be16 */ + uint16_t *trx = (uint16_t *)sfp; /* already be16 */ /* Optical Element Descriptor, Temperature */ rsp_payload->optical_elmt_desc[0].high_alarm = trx[0]; @@ -6266,11 +6266,11 @@ void qla24xx_process_purex_rdp(struct scsi_qla_host *vha, void *pkt) ql_dbg(ql_dbg_init + ql_dbg_verbose, vha, 0x0184, "-------- ELS RSP -------\n"); ql_dump_buffer(ql_dbg_init + ql_dbg_verbose, vha, 0x0185, - (void *)rsp_els, sizeof(*rsp_els)); + rsp_els, sizeof(*rsp_els)); ql_dbg(ql_dbg_init + ql_dbg_verbose, vha, 0x0186, "-------- ELS RSP PAYLOAD -------\n"); ql_dump_buffer(ql_dbg_init + ql_dbg_verbose, vha, 0x0187, - (void *)rsp_payload, rsp_payload_length); + rsp_payload, rsp_payload_length); rval = qla2x00_issue_iocb(vha, rsp_els, rsp_els_dma, 0); diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c index da984d7552d5..749b0c197d31 100644 --- a/drivers/scsi/qla2xxx/qla_sup.c +++ b/drivers/scsi/qla2xxx/qla_sup.c @@ -553,7 +553,7 @@ qla2xxx_find_flt_start(scsi_qla_host_t *vha, uint32_t *start) struct qla_hw_data *ha = vha->hw; struct req_que *req = ha->req_q_map[0]; struct qla_flt_location *fltl = (void *)req->ring; - uint32_t *dcode = (void *)req->ring; + uint32_t *dcode = (uint32_t *)req->ring; uint8_t *buf = (void *)req->ring, *bcode, last_image; /* @@ -610,7 +610,7 @@ qla2xxx_find_flt_start(scsi_qla_host_t *vha, uint32_t *start) if (memcmp(fltl->sig, "QFLT", 4)) goto end; - wptr = (void *)req->ring; + wptr = (uint16_t *)req->ring; cnt = sizeof(*fltl) / sizeof(*wptr); for (chksum = 0; cnt--; wptr++) chksum += le16_to_cpu(*wptr); @@ -682,7 +682,7 @@ qla2xxx_get_flt_info(scsi_qla_host_t *vha, uint32_t flt_addr) ha->flt_region_flt = flt_addr; wptr = (uint16_t *)ha->flt; - ha->isp_ops->read_optrom(vha, (void *)flt, flt_addr << 2, + ha->isp_ops->read_optrom(vha, flt, flt_addr << 2, (sizeof(struct qla_flt_header) + FLT_REGIONS_SIZE)); if (le16_to_cpu(*wptr) == 0xffff) @@ -949,7 +949,7 @@ qla2xxx_get_fdt_info(scsi_qla_host_t *vha) struct qla_hw_data *ha = vha->hw; struct req_que *req = ha->req_q_map[0]; uint16_t cnt, chksum; - uint16_t *wptr = (void *)req->ring; + uint16_t *wptr = (uint16_t *)req->ring; struct qla_fdt_layout *fdt = (struct qla_fdt_layout *)req->ring; uint8_t man_id, flash_id; uint16_t mid = 0, fid = 0; @@ -2610,7 +2610,7 @@ qla24xx_read_optrom_data(struct scsi_qla_host *vha, void *buf, set_bit(MBX_UPDATE_FLASH_ACTIVE, &ha->mbx_cmd_flags); /* Go with read. */ - qla24xx_read_flash_data(vha, (void *)buf, offset >> 2, length >> 2); + qla24xx_read_flash_data(vha, buf, offset >> 2, length >> 2); /* Resume HBA. */ clear_bit(MBX_UPDATE_FLASH_ACTIVE, &ha->mbx_cmd_flags); @@ -3528,7 +3528,7 @@ qla24xx_get_flash_version(scsi_qla_host_t *vha, void *mbuf) memset(ha->gold_fw_version, 0, sizeof(ha->gold_fw_version)); faddr = ha->flt_region_gold_fw; - qla24xx_read_flash_data(vha, (void *)dcode, ha->flt_region_gold_fw, 8); + qla24xx_read_flash_data(vha, dcode, ha->flt_region_gold_fw, 8); if (qla24xx_risc_firmware_invalid(dcode)) { ql_log(ql_log_warn, vha, 0x0056, "Unrecognized golden fw at %#x.\n", faddr); diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index f7425875f4f9..77f976555159 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -3885,7 +3885,7 @@ static void *qlt_ctio_to_cmd(struct scsi_qla_host *vha, return NULL; } - cmd = (void *) req->outstanding_cmds[h]; + cmd = req->outstanding_cmds[h]; if (unlikely(cmd == NULL)) { ql_dbg(ql_dbg_async, vha, 0xe053, "qla_target(%d): Suspicious: unable to find the command with handle %x req->id %d rsp->id %d\n", @@ -5932,7 +5932,7 @@ void qlt_async_event(uint16_t code, struct scsi_qla_host *vha, le16_to_cpu(mailbox[2]), le16_to_cpu(mailbox[3])); if (tgt->link_reinit_iocb_pending) { qlt_send_notify_ack(ha->base_qpair, - (void *)&tgt->link_reinit_iocb, + &tgt->link_reinit_iocb, 0, 0, 0, 0, 0, 0); tgt->link_reinit_iocb_pending = 0; } From 2a4b684ab0aa2a8f8efede77ca1525042c2c0e01 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2020 14:17:10 -0700 Subject: [PATCH 420/562] scsi: qla2xxx: Use make_handle() instead of open-coding it Link: https://lore.kernel.org/r/20200518211712.11395-14-bvanassche@acm.org Cc: Arun Easi Cc: Nilesh Javali Cc: Martin Wilck Reviewed-by: Hannes Reinecke Reviewed-by: Daniel Wagner Reviewed-by: Himanshu Madhani Reviewed-by: Roman Bolshakov Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_isr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 87d0f5e4d81a..0a9a838c7f20 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -819,7 +819,7 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb) goto skip_rio; switch (mb[0]) { case MBA_SCSI_COMPLETION: - handles[0] = le32_to_cpu((uint32_t)((mb[2] << 16) | mb[1])); + handles[0] = le32_to_cpu(make_handle(mb[2], mb[1])); handle_cnt = 1; break; case MBA_CMPLT_1_16BIT: @@ -858,10 +858,10 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb) mb[0] = MBA_SCSI_COMPLETION; break; case MBA_CMPLT_2_32BIT: - handles[0] = le32_to_cpu((uint32_t)((mb[2] << 16) | mb[1])); - handles[1] = le32_to_cpu( - ((uint32_t)(RD_MAILBOX_REG(ha, reg, 7) << 16)) | - RD_MAILBOX_REG(ha, reg, 6)); + handles[0] = le32_to_cpu(make_handle(mb[2], mb[1])); + handles[1] = + le32_to_cpu(make_handle(RD_MAILBOX_REG(ha, reg, 7), + RD_MAILBOX_REG(ha, reg, 6))); handle_cnt = 2; mb[0] = MBA_SCSI_COMPLETION; break; From 21038b0900d1b8728ec77d9286d7b0b57ca7b585 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2020 14:17:11 -0700 Subject: [PATCH 421/562] scsi: qla2xxx: Fix endianness annotations in header files Annotate members of FC protocol and firmware dump data structures as big endian. Annotate members of RISC control structures as little endian. Annotate mailbox registers as little endian. Annotate the mb[] arrays as CPU-endian because communication of the mb[] values with the hardware happens through the readw() and writew() functions. readw() converts from __le16 to u16 and writew() converts from u16 to __le16. Annotate 'handles' as CPU-endian because for the firmware these are opaque values. Link: https://lore.kernel.org/r/20200518211712.11395-15-bvanassche@acm.org CC: Hannes Reinecke Cc: Nilesh Javali Cc: Quinn Tran Cc: Martin Wilck Cc: Roman Bolshakov Reviewed-by: Daniel Wagner Reviewed-by: Himanshu Madhani Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_dbg.h | 444 +++++++++--------- drivers/scsi/qla2xxx/qla_def.h | 652 +++++++++++++------------- drivers/scsi/qla2xxx/qla_fw.h | 754 +++++++++++++++--------------- drivers/scsi/qla2xxx/qla_inline.h | 2 +- drivers/scsi/qla2xxx/qla_mr.h | 8 +- drivers/scsi/qla2xxx/qla_nvme.h | 64 +-- drivers/scsi/qla2xxx/qla_nx.h | 36 +- drivers/scsi/qla2xxx/qla_target.h | 232 ++++----- drivers/scsi/qla2xxx/qla_tmpl.h | 2 +- 9 files changed, 1097 insertions(+), 1097 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h index b106b6808d34..54ed020e6f75 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.h +++ b/drivers/scsi/qla2xxx/qla_dbg.h @@ -12,205 +12,205 @@ */ struct qla2300_fw_dump { - uint16_t hccr; - uint16_t pbiu_reg[8]; - uint16_t risc_host_reg[8]; - uint16_t mailbox_reg[32]; - uint16_t resp_dma_reg[32]; - uint16_t dma_reg[48]; - uint16_t risc_hdw_reg[16]; - uint16_t risc_gp0_reg[16]; - uint16_t risc_gp1_reg[16]; - uint16_t risc_gp2_reg[16]; - uint16_t risc_gp3_reg[16]; - uint16_t risc_gp4_reg[16]; - uint16_t risc_gp5_reg[16]; - uint16_t risc_gp6_reg[16]; - uint16_t risc_gp7_reg[16]; - uint16_t frame_buf_hdw_reg[64]; - uint16_t fpm_b0_reg[64]; - uint16_t fpm_b1_reg[64]; - uint16_t risc_ram[0xf800]; - uint16_t stack_ram[0x1000]; - uint16_t data_ram[1]; + __be16 hccr; + __be16 pbiu_reg[8]; + __be16 risc_host_reg[8]; + __be16 mailbox_reg[32]; + __be16 resp_dma_reg[32]; + __be16 dma_reg[48]; + __be16 risc_hdw_reg[16]; + __be16 risc_gp0_reg[16]; + __be16 risc_gp1_reg[16]; + __be16 risc_gp2_reg[16]; + __be16 risc_gp3_reg[16]; + __be16 risc_gp4_reg[16]; + __be16 risc_gp5_reg[16]; + __be16 risc_gp6_reg[16]; + __be16 risc_gp7_reg[16]; + __be16 frame_buf_hdw_reg[64]; + __be16 fpm_b0_reg[64]; + __be16 fpm_b1_reg[64]; + __be16 risc_ram[0xf800]; + __be16 stack_ram[0x1000]; + __be16 data_ram[1]; }; struct qla2100_fw_dump { - uint16_t hccr; - uint16_t pbiu_reg[8]; - uint16_t mailbox_reg[32]; - uint16_t dma_reg[48]; - uint16_t risc_hdw_reg[16]; - uint16_t risc_gp0_reg[16]; - uint16_t risc_gp1_reg[16]; - uint16_t risc_gp2_reg[16]; - uint16_t risc_gp3_reg[16]; - uint16_t risc_gp4_reg[16]; - uint16_t risc_gp5_reg[16]; - uint16_t risc_gp6_reg[16]; - uint16_t risc_gp7_reg[16]; - uint16_t frame_buf_hdw_reg[16]; - uint16_t fpm_b0_reg[64]; - uint16_t fpm_b1_reg[64]; - uint16_t risc_ram[0xf000]; + __be16 hccr; + __be16 pbiu_reg[8]; + __be16 mailbox_reg[32]; + __be16 dma_reg[48]; + __be16 risc_hdw_reg[16]; + __be16 risc_gp0_reg[16]; + __be16 risc_gp1_reg[16]; + __be16 risc_gp2_reg[16]; + __be16 risc_gp3_reg[16]; + __be16 risc_gp4_reg[16]; + __be16 risc_gp5_reg[16]; + __be16 risc_gp6_reg[16]; + __be16 risc_gp7_reg[16]; + __be16 frame_buf_hdw_reg[16]; + __be16 fpm_b0_reg[64]; + __be16 fpm_b1_reg[64]; + __be16 risc_ram[0xf000]; }; struct qla24xx_fw_dump { - uint32_t host_status; - uint32_t host_reg[32]; - uint32_t shadow_reg[7]; - uint16_t mailbox_reg[32]; - uint32_t xseq_gp_reg[128]; - uint32_t xseq_0_reg[16]; - uint32_t xseq_1_reg[16]; - uint32_t rseq_gp_reg[128]; - uint32_t rseq_0_reg[16]; - uint32_t rseq_1_reg[16]; - uint32_t rseq_2_reg[16]; - uint32_t cmd_dma_reg[16]; - uint32_t req0_dma_reg[15]; - uint32_t resp0_dma_reg[15]; - uint32_t req1_dma_reg[15]; - uint32_t xmt0_dma_reg[32]; - uint32_t xmt1_dma_reg[32]; - uint32_t xmt2_dma_reg[32]; - uint32_t xmt3_dma_reg[32]; - uint32_t xmt4_dma_reg[32]; - uint32_t xmt_data_dma_reg[16]; - uint32_t rcvt0_data_dma_reg[32]; - uint32_t rcvt1_data_dma_reg[32]; - uint32_t risc_gp_reg[128]; - uint32_t lmc_reg[112]; - uint32_t fpm_hdw_reg[192]; - uint32_t fb_hdw_reg[176]; - uint32_t code_ram[0x2000]; - uint32_t ext_mem[1]; + __be32 host_status; + __be32 host_reg[32]; + __be32 shadow_reg[7]; + __be16 mailbox_reg[32]; + __be32 xseq_gp_reg[128]; + __be32 xseq_0_reg[16]; + __be32 xseq_1_reg[16]; + __be32 rseq_gp_reg[128]; + __be32 rseq_0_reg[16]; + __be32 rseq_1_reg[16]; + __be32 rseq_2_reg[16]; + __be32 cmd_dma_reg[16]; + __be32 req0_dma_reg[15]; + __be32 resp0_dma_reg[15]; + __be32 req1_dma_reg[15]; + __be32 xmt0_dma_reg[32]; + __be32 xmt1_dma_reg[32]; + __be32 xmt2_dma_reg[32]; + __be32 xmt3_dma_reg[32]; + __be32 xmt4_dma_reg[32]; + __be32 xmt_data_dma_reg[16]; + __be32 rcvt0_data_dma_reg[32]; + __be32 rcvt1_data_dma_reg[32]; + __be32 risc_gp_reg[128]; + __be32 lmc_reg[112]; + __be32 fpm_hdw_reg[192]; + __be32 fb_hdw_reg[176]; + __be32 code_ram[0x2000]; + __be32 ext_mem[1]; }; struct qla25xx_fw_dump { - uint32_t host_status; - uint32_t host_risc_reg[32]; - uint32_t pcie_regs[4]; - uint32_t host_reg[32]; - uint32_t shadow_reg[11]; - uint32_t risc_io_reg; - uint16_t mailbox_reg[32]; - uint32_t xseq_gp_reg[128]; - uint32_t xseq_0_reg[48]; - uint32_t xseq_1_reg[16]; - uint32_t rseq_gp_reg[128]; - uint32_t rseq_0_reg[32]; - uint32_t rseq_1_reg[16]; - uint32_t rseq_2_reg[16]; - uint32_t aseq_gp_reg[128]; - uint32_t aseq_0_reg[32]; - uint32_t aseq_1_reg[16]; - uint32_t aseq_2_reg[16]; - uint32_t cmd_dma_reg[16]; - uint32_t req0_dma_reg[15]; - uint32_t resp0_dma_reg[15]; - uint32_t req1_dma_reg[15]; - uint32_t xmt0_dma_reg[32]; - uint32_t xmt1_dma_reg[32]; - uint32_t xmt2_dma_reg[32]; - uint32_t xmt3_dma_reg[32]; - uint32_t xmt4_dma_reg[32]; - uint32_t xmt_data_dma_reg[16]; - uint32_t rcvt0_data_dma_reg[32]; - uint32_t rcvt1_data_dma_reg[32]; - uint32_t risc_gp_reg[128]; - uint32_t lmc_reg[128]; - uint32_t fpm_hdw_reg[192]; - uint32_t fb_hdw_reg[192]; - uint32_t code_ram[0x2000]; - uint32_t ext_mem[1]; + __be32 host_status; + __be32 host_risc_reg[32]; + __be32 pcie_regs[4]; + __be32 host_reg[32]; + __be32 shadow_reg[11]; + __be32 risc_io_reg; + __be16 mailbox_reg[32]; + __be32 xseq_gp_reg[128]; + __be32 xseq_0_reg[48]; + __be32 xseq_1_reg[16]; + __be32 rseq_gp_reg[128]; + __be32 rseq_0_reg[32]; + __be32 rseq_1_reg[16]; + __be32 rseq_2_reg[16]; + __be32 aseq_gp_reg[128]; + __be32 aseq_0_reg[32]; + __be32 aseq_1_reg[16]; + __be32 aseq_2_reg[16]; + __be32 cmd_dma_reg[16]; + __be32 req0_dma_reg[15]; + __be32 resp0_dma_reg[15]; + __be32 req1_dma_reg[15]; + __be32 xmt0_dma_reg[32]; + __be32 xmt1_dma_reg[32]; + __be32 xmt2_dma_reg[32]; + __be32 xmt3_dma_reg[32]; + __be32 xmt4_dma_reg[32]; + __be32 xmt_data_dma_reg[16]; + __be32 rcvt0_data_dma_reg[32]; + __be32 rcvt1_data_dma_reg[32]; + __be32 risc_gp_reg[128]; + __be32 lmc_reg[128]; + __be32 fpm_hdw_reg[192]; + __be32 fb_hdw_reg[192]; + __be32 code_ram[0x2000]; + __be32 ext_mem[1]; }; struct qla81xx_fw_dump { - uint32_t host_status; - uint32_t host_risc_reg[32]; - uint32_t pcie_regs[4]; - uint32_t host_reg[32]; - uint32_t shadow_reg[11]; - uint32_t risc_io_reg; - uint16_t mailbox_reg[32]; - uint32_t xseq_gp_reg[128]; - uint32_t xseq_0_reg[48]; - uint32_t xseq_1_reg[16]; - uint32_t rseq_gp_reg[128]; - uint32_t rseq_0_reg[32]; - uint32_t rseq_1_reg[16]; - uint32_t rseq_2_reg[16]; - uint32_t aseq_gp_reg[128]; - uint32_t aseq_0_reg[32]; - uint32_t aseq_1_reg[16]; - uint32_t aseq_2_reg[16]; - uint32_t cmd_dma_reg[16]; - uint32_t req0_dma_reg[15]; - uint32_t resp0_dma_reg[15]; - uint32_t req1_dma_reg[15]; - uint32_t xmt0_dma_reg[32]; - uint32_t xmt1_dma_reg[32]; - uint32_t xmt2_dma_reg[32]; - uint32_t xmt3_dma_reg[32]; - uint32_t xmt4_dma_reg[32]; - uint32_t xmt_data_dma_reg[16]; - uint32_t rcvt0_data_dma_reg[32]; - uint32_t rcvt1_data_dma_reg[32]; - uint32_t risc_gp_reg[128]; - uint32_t lmc_reg[128]; - uint32_t fpm_hdw_reg[224]; - uint32_t fb_hdw_reg[208]; - uint32_t code_ram[0x2000]; - uint32_t ext_mem[1]; + __be32 host_status; + __be32 host_risc_reg[32]; + __be32 pcie_regs[4]; + __be32 host_reg[32]; + __be32 shadow_reg[11]; + __be32 risc_io_reg; + __be16 mailbox_reg[32]; + __be32 xseq_gp_reg[128]; + __be32 xseq_0_reg[48]; + __be32 xseq_1_reg[16]; + __be32 rseq_gp_reg[128]; + __be32 rseq_0_reg[32]; + __be32 rseq_1_reg[16]; + __be32 rseq_2_reg[16]; + __be32 aseq_gp_reg[128]; + __be32 aseq_0_reg[32]; + __be32 aseq_1_reg[16]; + __be32 aseq_2_reg[16]; + __be32 cmd_dma_reg[16]; + __be32 req0_dma_reg[15]; + __be32 resp0_dma_reg[15]; + __be32 req1_dma_reg[15]; + __be32 xmt0_dma_reg[32]; + __be32 xmt1_dma_reg[32]; + __be32 xmt2_dma_reg[32]; + __be32 xmt3_dma_reg[32]; + __be32 xmt4_dma_reg[32]; + __be32 xmt_data_dma_reg[16]; + __be32 rcvt0_data_dma_reg[32]; + __be32 rcvt1_data_dma_reg[32]; + __be32 risc_gp_reg[128]; + __be32 lmc_reg[128]; + __be32 fpm_hdw_reg[224]; + __be32 fb_hdw_reg[208]; + __be32 code_ram[0x2000]; + __be32 ext_mem[1]; }; struct qla83xx_fw_dump { - uint32_t host_status; - uint32_t host_risc_reg[48]; - uint32_t pcie_regs[4]; - uint32_t host_reg[32]; - uint32_t shadow_reg[11]; - uint32_t risc_io_reg; - uint16_t mailbox_reg[32]; - uint32_t xseq_gp_reg[256]; - uint32_t xseq_0_reg[48]; - uint32_t xseq_1_reg[16]; - uint32_t xseq_2_reg[16]; - uint32_t rseq_gp_reg[256]; - uint32_t rseq_0_reg[32]; - uint32_t rseq_1_reg[16]; - uint32_t rseq_2_reg[16]; - uint32_t rseq_3_reg[16]; - uint32_t aseq_gp_reg[256]; - uint32_t aseq_0_reg[32]; - uint32_t aseq_1_reg[16]; - uint32_t aseq_2_reg[16]; - uint32_t aseq_3_reg[16]; - uint32_t cmd_dma_reg[64]; - uint32_t req0_dma_reg[15]; - uint32_t resp0_dma_reg[15]; - uint32_t req1_dma_reg[15]; - uint32_t xmt0_dma_reg[32]; - uint32_t xmt1_dma_reg[32]; - uint32_t xmt2_dma_reg[32]; - uint32_t xmt3_dma_reg[32]; - uint32_t xmt4_dma_reg[32]; - uint32_t xmt_data_dma_reg[16]; - uint32_t rcvt0_data_dma_reg[32]; - uint32_t rcvt1_data_dma_reg[32]; - uint32_t risc_gp_reg[128]; - uint32_t lmc_reg[128]; - uint32_t fpm_hdw_reg[256]; - uint32_t rq0_array_reg[256]; - uint32_t rq1_array_reg[256]; - uint32_t rp0_array_reg[256]; - uint32_t rp1_array_reg[256]; - uint32_t queue_control_reg[16]; - uint32_t fb_hdw_reg[432]; - uint32_t at0_array_reg[128]; - uint32_t code_ram[0x2400]; - uint32_t ext_mem[1]; + __be32 host_status; + __be32 host_risc_reg[48]; + __be32 pcie_regs[4]; + __be32 host_reg[32]; + __be32 shadow_reg[11]; + __be32 risc_io_reg; + __be16 mailbox_reg[32]; + __be32 xseq_gp_reg[256]; + __be32 xseq_0_reg[48]; + __be32 xseq_1_reg[16]; + __be32 xseq_2_reg[16]; + __be32 rseq_gp_reg[256]; + __be32 rseq_0_reg[32]; + __be32 rseq_1_reg[16]; + __be32 rseq_2_reg[16]; + __be32 rseq_3_reg[16]; + __be32 aseq_gp_reg[256]; + __be32 aseq_0_reg[32]; + __be32 aseq_1_reg[16]; + __be32 aseq_2_reg[16]; + __be32 aseq_3_reg[16]; + __be32 cmd_dma_reg[64]; + __be32 req0_dma_reg[15]; + __be32 resp0_dma_reg[15]; + __be32 req1_dma_reg[15]; + __be32 xmt0_dma_reg[32]; + __be32 xmt1_dma_reg[32]; + __be32 xmt2_dma_reg[32]; + __be32 xmt3_dma_reg[32]; + __be32 xmt4_dma_reg[32]; + __be32 xmt_data_dma_reg[16]; + __be32 rcvt0_data_dma_reg[32]; + __be32 rcvt1_data_dma_reg[32]; + __be32 risc_gp_reg[128]; + __be32 lmc_reg[128]; + __be32 fpm_hdw_reg[256]; + __be32 rq0_array_reg[256]; + __be32 rq1_array_reg[256]; + __be32 rp0_array_reg[256]; + __be32 rp1_array_reg[256]; + __be32 queue_control_reg[16]; + __be32 fb_hdw_reg[432]; + __be32 at0_array_reg[128]; + __be32 code_ram[0x2400]; + __be32 ext_mem[1]; }; #define EFT_NUM_BUFFERS 4 @@ -223,45 +223,45 @@ struct qla83xx_fw_dump { #define fce_calc_size(b) ((FCE_BYTES_PER_BUFFER) * (b)) struct qla2xxx_fce_chain { - uint32_t type; - uint32_t chain_size; + __be32 type; + __be32 chain_size; - uint32_t size; - uint32_t addr_l; - uint32_t addr_h; - uint32_t eregs[8]; + __be32 size; + __be32 addr_l; + __be32 addr_h; + __be32 eregs[8]; }; /* used by exchange off load and extended login offload */ struct qla2xxx_offld_chain { - uint32_t type; - uint32_t chain_size; + __be32 type; + __be32 chain_size; - uint32_t size; - uint32_t reserved; - u64 addr; + __be32 size; + __be32 reserved; + __be64 addr; }; struct qla2xxx_mq_chain { - uint32_t type; - uint32_t chain_size; + __be32 type; + __be32 chain_size; - uint32_t count; - uint32_t qregs[4 * QLA_MQ_SIZE]; + __be32 count; + __be32 qregs[4 * QLA_MQ_SIZE]; }; struct qla2xxx_mqueue_header { - uint32_t queue; + __be32 queue; #define TYPE_REQUEST_QUEUE 0x1 #define TYPE_RESPONSE_QUEUE 0x2 #define TYPE_ATIO_QUEUE 0x3 - uint32_t number; - uint32_t size; + __be32 number; + __be32 size; }; struct qla2xxx_mqueue_chain { - uint32_t type; - uint32_t chain_size; + __be32 type; + __be32 chain_size; }; #define DUMP_CHAIN_VARIANT 0x80000000 @@ -274,28 +274,28 @@ struct qla2xxx_mqueue_chain { struct qla2xxx_fw_dump { uint8_t signature[4]; - uint32_t version; + __be32 version; - uint32_t fw_major_version; - uint32_t fw_minor_version; - uint32_t fw_subminor_version; - uint32_t fw_attributes; + __be32 fw_major_version; + __be32 fw_minor_version; + __be32 fw_subminor_version; + __be32 fw_attributes; - uint32_t vendor; - uint32_t device; - uint32_t subsystem_vendor; - uint32_t subsystem_device; + __be32 vendor; + __be32 device; + __be32 subsystem_vendor; + __be32 subsystem_device; - uint32_t fixed_size; - uint32_t mem_size; - uint32_t req_q_size; - uint32_t rsp_q_size; + __be32 fixed_size; + __be32 mem_size; + __be32 req_q_size; + __be32 rsp_q_size; - uint32_t eft_size; - uint32_t eft_addr_l; - uint32_t eft_addr_h; + __be32 eft_size; + __be32 eft_addr_l; + __be32 eft_addr_h; - uint32_t header_size; + __be32 header_size; union { struct qla2100_fw_dump isp21; @@ -370,7 +370,7 @@ ql_log_qp(uint32_t, struct qla_qpair *, int32_t, const char *fmt, ...); extern int qla27xx_dump_mpi_ram(struct qla_hw_data *, uint32_t, uint32_t *, uint32_t, void **); -extern int qla24xx_dump_ram(struct qla_hw_data *, uint32_t, uint32_t *, +extern int qla24xx_dump_ram(struct qla_hw_data *, uint32_t, __be32 *, uint32_t, void **); extern void qla24xx_pause_risc(struct device_reg_24xx __iomem *, struct qla_hw_data *); diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 3368fdf8b2dd..42dbf90d4651 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -504,7 +504,7 @@ struct srb_iocb { u32 rx_size; dma_addr_t els_plogi_pyld_dma; dma_addr_t els_resp_pyld_dma; - uint32_t fw_status[3]; + __le32 fw_status[3]; __le16 comp_status; __le16 len; } els_plogi; @@ -555,8 +555,8 @@ struct srb_iocb { #define MAX_IOCB_MB_REG 28 #define SIZEOF_IOCB_MB_REG (MAX_IOCB_MB_REG * sizeof(uint16_t)) struct { - __le16 in_mb[MAX_IOCB_MB_REG]; /* from FW */ - __le16 out_mb[MAX_IOCB_MB_REG]; /* to FW */ + u16 in_mb[MAX_IOCB_MB_REG]; /* from FW */ + u16 out_mb[MAX_IOCB_MB_REG]; /* to FW */ void *out, *in; dma_addr_t out_dma, in_dma; struct completion comp; @@ -567,7 +567,7 @@ struct srb_iocb { } nack; struct { __le16 comp_status; - uint16_t rsp_pyld_len; + __le16 rsp_pyld_len; uint8_t aen_op; void *desc; @@ -698,23 +698,23 @@ struct msg_echo_lb { * ISP I/O Register Set structure definitions. */ struct device_reg_2xxx { - uint16_t flash_address; /* Flash BIOS address */ - uint16_t flash_data; /* Flash BIOS data */ - uint16_t unused_1[1]; /* Gap */ - uint16_t ctrl_status; /* Control/Status */ + __le16 flash_address; /* Flash BIOS address */ + __le16 flash_data; /* Flash BIOS data */ + __le16 unused_1[1]; /* Gap */ + __le16 ctrl_status; /* Control/Status */ #define CSR_FLASH_64K_BANK BIT_3 /* Flash upper 64K bank select */ #define CSR_FLASH_ENABLE BIT_1 /* Flash BIOS Read/Write enable */ #define CSR_ISP_SOFT_RESET BIT_0 /* ISP soft reset */ - uint16_t ictrl; /* Interrupt control */ + __le16 ictrl; /* Interrupt control */ #define ICR_EN_INT BIT_15 /* ISP enable interrupts. */ #define ICR_EN_RISC BIT_3 /* ISP enable RISC interrupts. */ - uint16_t istatus; /* Interrupt status */ + __le16 istatus; /* Interrupt status */ #define ISR_RISC_INT BIT_3 /* RISC interrupt */ - uint16_t semaphore; /* Semaphore */ - uint16_t nvram; /* NVRAM register. */ + __le16 semaphore; /* Semaphore */ + __le16 nvram; /* NVRAM register. */ #define NVR_DESELECT 0 #define NVR_BUSY BIT_15 #define NVR_WRT_ENABLE BIT_14 /* Write enable */ @@ -728,80 +728,80 @@ struct device_reg_2xxx { union { struct { - uint16_t mailbox0; - uint16_t mailbox1; - uint16_t mailbox2; - uint16_t mailbox3; - uint16_t mailbox4; - uint16_t mailbox5; - uint16_t mailbox6; - uint16_t mailbox7; - uint16_t unused_2[59]; /* Gap */ + __le16 mailbox0; + __le16 mailbox1; + __le16 mailbox2; + __le16 mailbox3; + __le16 mailbox4; + __le16 mailbox5; + __le16 mailbox6; + __le16 mailbox7; + __le16 unused_2[59]; /* Gap */ } __attribute__((packed)) isp2100; struct { /* Request Queue */ - uint16_t req_q_in; /* In-Pointer */ - uint16_t req_q_out; /* Out-Pointer */ + __le16 req_q_in; /* In-Pointer */ + __le16 req_q_out; /* Out-Pointer */ /* Response Queue */ - uint16_t rsp_q_in; /* In-Pointer */ - uint16_t rsp_q_out; /* Out-Pointer */ + __le16 rsp_q_in; /* In-Pointer */ + __le16 rsp_q_out; /* Out-Pointer */ /* RISC to Host Status */ - uint32_t host_status; + __le32 host_status; #define HSR_RISC_INT BIT_15 /* RISC interrupt */ #define HSR_RISC_PAUSED BIT_8 /* RISC Paused */ /* Host to Host Semaphore */ - uint16_t host_semaphore; - uint16_t unused_3[17]; /* Gap */ - uint16_t mailbox0; - uint16_t mailbox1; - uint16_t mailbox2; - uint16_t mailbox3; - uint16_t mailbox4; - uint16_t mailbox5; - uint16_t mailbox6; - uint16_t mailbox7; - uint16_t mailbox8; - uint16_t mailbox9; - uint16_t mailbox10; - uint16_t mailbox11; - uint16_t mailbox12; - uint16_t mailbox13; - uint16_t mailbox14; - uint16_t mailbox15; - uint16_t mailbox16; - uint16_t mailbox17; - uint16_t mailbox18; - uint16_t mailbox19; - uint16_t mailbox20; - uint16_t mailbox21; - uint16_t mailbox22; - uint16_t mailbox23; - uint16_t mailbox24; - uint16_t mailbox25; - uint16_t mailbox26; - uint16_t mailbox27; - uint16_t mailbox28; - uint16_t mailbox29; - uint16_t mailbox30; - uint16_t mailbox31; - uint16_t fb_cmd; - uint16_t unused_4[10]; /* Gap */ + __le16 host_semaphore; + __le16 unused_3[17]; /* Gap */ + __le16 mailbox0; + __le16 mailbox1; + __le16 mailbox2; + __le16 mailbox3; + __le16 mailbox4; + __le16 mailbox5; + __le16 mailbox6; + __le16 mailbox7; + __le16 mailbox8; + __le16 mailbox9; + __le16 mailbox10; + __le16 mailbox11; + __le16 mailbox12; + __le16 mailbox13; + __le16 mailbox14; + __le16 mailbox15; + __le16 mailbox16; + __le16 mailbox17; + __le16 mailbox18; + __le16 mailbox19; + __le16 mailbox20; + __le16 mailbox21; + __le16 mailbox22; + __le16 mailbox23; + __le16 mailbox24; + __le16 mailbox25; + __le16 mailbox26; + __le16 mailbox27; + __le16 mailbox28; + __le16 mailbox29; + __le16 mailbox30; + __le16 mailbox31; + __le16 fb_cmd; + __le16 unused_4[10]; /* Gap */ } __attribute__((packed)) isp2300; } u; - uint16_t fpm_diag_config; - uint16_t unused_5[0x4]; /* Gap */ - uint16_t risc_hw; - uint16_t unused_5_1; /* Gap */ - uint16_t pcr; /* Processor Control Register. */ - uint16_t unused_6[0x5]; /* Gap */ - uint16_t mctr; /* Memory Configuration and Timing. */ - uint16_t unused_7[0x3]; /* Gap */ - uint16_t fb_cmd_2100; /* Unused on 23XX */ - uint16_t unused_8[0x3]; /* Gap */ - uint16_t hccr; /* Host command & control register. */ + __le16 fpm_diag_config; + __le16 unused_5[0x4]; /* Gap */ + __le16 risc_hw; + __le16 unused_5_1; /* Gap */ + __le16 pcr; /* Processor Control Register. */ + __le16 unused_6[0x5]; /* Gap */ + __le16 mctr; /* Memory Configuration and Timing. */ + __le16 unused_7[0x3]; /* Gap */ + __le16 fb_cmd_2100; /* Unused on 23XX */ + __le16 unused_8[0x3]; /* Gap */ + __le16 hccr; /* Host command & control register. */ #define HCCR_HOST_INT BIT_7 /* Host interrupt bit */ #define HCCR_RISC_PAUSE BIT_5 /* Pause mode bit */ /* HCCR commands */ @@ -814,9 +814,9 @@ struct device_reg_2xxx { #define HCCR_DISABLE_PARITY_PAUSE 0x4001 /* Disable parity error RISC pause. */ #define HCCR_ENABLE_PARITY 0xA000 /* Enable PARITY interrupt */ - uint16_t unused_9[5]; /* Gap */ - uint16_t gpiod; /* GPIO Data register. */ - uint16_t gpioe; /* GPIO Enable register. */ + __le16 unused_9[5]; /* Gap */ + __le16 gpiod; /* GPIO Data register. */ + __le16 gpioe; /* GPIO Enable register. */ #define GPIO_LED_MASK 0x00C0 #define GPIO_LED_GREEN_OFF_AMBER_OFF 0x0000 #define GPIO_LED_GREEN_ON_AMBER_OFF 0x0040 @@ -828,95 +828,95 @@ struct device_reg_2xxx { union { struct { - uint16_t unused_10[8]; /* Gap */ - uint16_t mailbox8; - uint16_t mailbox9; - uint16_t mailbox10; - uint16_t mailbox11; - uint16_t mailbox12; - uint16_t mailbox13; - uint16_t mailbox14; - uint16_t mailbox15; - uint16_t mailbox16; - uint16_t mailbox17; - uint16_t mailbox18; - uint16_t mailbox19; - uint16_t mailbox20; - uint16_t mailbox21; - uint16_t mailbox22; - uint16_t mailbox23; /* Also probe reg. */ + __le16 unused_10[8]; /* Gap */ + __le16 mailbox8; + __le16 mailbox9; + __le16 mailbox10; + __le16 mailbox11; + __le16 mailbox12; + __le16 mailbox13; + __le16 mailbox14; + __le16 mailbox15; + __le16 mailbox16; + __le16 mailbox17; + __le16 mailbox18; + __le16 mailbox19; + __le16 mailbox20; + __le16 mailbox21; + __le16 mailbox22; + __le16 mailbox23; /* Also probe reg. */ } __attribute__((packed)) isp2200; } u_end; }; struct device_reg_25xxmq { - uint32_t req_q_in; - uint32_t req_q_out; - uint32_t rsp_q_in; - uint32_t rsp_q_out; - uint32_t atio_q_in; - uint32_t atio_q_out; + __le32 req_q_in; + __le32 req_q_out; + __le32 rsp_q_in; + __le32 rsp_q_out; + __le32 atio_q_in; + __le32 atio_q_out; }; struct device_reg_fx00 { - uint32_t mailbox0; /* 00 */ - uint32_t mailbox1; /* 04 */ - uint32_t mailbox2; /* 08 */ - uint32_t mailbox3; /* 0C */ - uint32_t mailbox4; /* 10 */ - uint32_t mailbox5; /* 14 */ - uint32_t mailbox6; /* 18 */ - uint32_t mailbox7; /* 1C */ - uint32_t mailbox8; /* 20 */ - uint32_t mailbox9; /* 24 */ - uint32_t mailbox10; /* 28 */ - uint32_t mailbox11; - uint32_t mailbox12; - uint32_t mailbox13; - uint32_t mailbox14; - uint32_t mailbox15; - uint32_t mailbox16; - uint32_t mailbox17; - uint32_t mailbox18; - uint32_t mailbox19; - uint32_t mailbox20; - uint32_t mailbox21; - uint32_t mailbox22; - uint32_t mailbox23; - uint32_t mailbox24; - uint32_t mailbox25; - uint32_t mailbox26; - uint32_t mailbox27; - uint32_t mailbox28; - uint32_t mailbox29; - uint32_t mailbox30; - uint32_t mailbox31; - uint32_t aenmailbox0; - uint32_t aenmailbox1; - uint32_t aenmailbox2; - uint32_t aenmailbox3; - uint32_t aenmailbox4; - uint32_t aenmailbox5; - uint32_t aenmailbox6; - uint32_t aenmailbox7; + __le32 mailbox0; /* 00 */ + __le32 mailbox1; /* 04 */ + __le32 mailbox2; /* 08 */ + __le32 mailbox3; /* 0C */ + __le32 mailbox4; /* 10 */ + __le32 mailbox5; /* 14 */ + __le32 mailbox6; /* 18 */ + __le32 mailbox7; /* 1C */ + __le32 mailbox8; /* 20 */ + __le32 mailbox9; /* 24 */ + __le32 mailbox10; /* 28 */ + __le32 mailbox11; + __le32 mailbox12; + __le32 mailbox13; + __le32 mailbox14; + __le32 mailbox15; + __le32 mailbox16; + __le32 mailbox17; + __le32 mailbox18; + __le32 mailbox19; + __le32 mailbox20; + __le32 mailbox21; + __le32 mailbox22; + __le32 mailbox23; + __le32 mailbox24; + __le32 mailbox25; + __le32 mailbox26; + __le32 mailbox27; + __le32 mailbox28; + __le32 mailbox29; + __le32 mailbox30; + __le32 mailbox31; + __le32 aenmailbox0; + __le32 aenmailbox1; + __le32 aenmailbox2; + __le32 aenmailbox3; + __le32 aenmailbox4; + __le32 aenmailbox5; + __le32 aenmailbox6; + __le32 aenmailbox7; /* Request Queue. */ - uint32_t req_q_in; /* A0 - Request Queue In-Pointer */ - uint32_t req_q_out; /* A4 - Request Queue Out-Pointer */ + __le32 req_q_in; /* A0 - Request Queue In-Pointer */ + __le32 req_q_out; /* A4 - Request Queue Out-Pointer */ /* Response Queue. */ - uint32_t rsp_q_in; /* A8 - Response Queue In-Pointer */ - uint32_t rsp_q_out; /* AC - Response Queue Out-Pointer */ + __le32 rsp_q_in; /* A8 - Response Queue In-Pointer */ + __le32 rsp_q_out; /* AC - Response Queue Out-Pointer */ /* Init values shadowed on FW Up Event */ - uint32_t initval0; /* B0 */ - uint32_t initval1; /* B4 */ - uint32_t initval2; /* B8 */ - uint32_t initval3; /* BC */ - uint32_t initval4; /* C0 */ - uint32_t initval5; /* C4 */ - uint32_t initval6; /* C8 */ - uint32_t initval7; /* CC */ - uint32_t fwheartbeat; /* D0 */ - uint32_t pseudoaen; /* D4 */ + __le32 initval0; /* B0 */ + __le32 initval1; /* B4 */ + __le32 initval2; /* B8 */ + __le32 initval3; /* BC */ + __le32 initval4; /* C0 */ + __le32 initval5; /* C4 */ + __le32 initval6; /* C8 */ + __le32 initval7; /* CC */ + __le32 fwheartbeat; /* D0 */ + __le32 pseudoaen; /* D4 */ }; @@ -1351,7 +1351,7 @@ typedef struct { uint8_t port_id[4]; uint8_t node_name[WWN_SIZE]; uint8_t port_name[WWN_SIZE]; - uint16_t execution_throttle; + __le16 execution_throttle; uint16_t execution_count; uint8_t reset_count; uint8_t reserved_2; @@ -1437,9 +1437,9 @@ typedef struct { */ uint8_t firmware_options[2]; - uint16_t frame_payload_size; - uint16_t max_iocb_allocation; - uint16_t execution_throttle; + __le16 frame_payload_size; + __le16 max_iocb_allocation; + __le16 execution_throttle; uint8_t retry_count; uint8_t retry_delay; /* unused */ uint8_t port_name[WWN_SIZE]; /* Big endian. */ @@ -1448,17 +1448,17 @@ typedef struct { uint8_t login_timeout; uint8_t node_name[WWN_SIZE]; /* Big endian. */ - uint16_t request_q_outpointer; - uint16_t response_q_inpointer; - uint16_t request_q_length; - uint16_t response_q_length; - __le64 request_q_address __packed; - __le64 response_q_address __packed; + __le16 request_q_outpointer; + __le16 response_q_inpointer; + __le16 request_q_length; + __le16 response_q_length; + __le64 request_q_address __packed; + __le64 response_q_address __packed; - uint16_t lun_enables; + __le16 lun_enables; uint8_t command_resource_count; uint8_t immediate_notify_resource_count; - uint16_t timeout; + __le16 timeout; uint8_t reserved_2[2]; /* @@ -1606,8 +1606,8 @@ typedef struct { uint8_t firmware_options[2]; uint16_t frame_payload_size; - uint16_t max_iocb_allocation; - uint16_t execution_throttle; + __le16 max_iocb_allocation; + __le16 execution_throttle; uint8_t retry_count; uint8_t retry_delay; /* unused */ uint8_t port_name[WWN_SIZE]; /* Big endian. */ @@ -1731,7 +1731,7 @@ typedef struct { uint8_t reset_delay; uint8_t port_down_retry_count; uint8_t boot_id_number; - uint16_t max_luns_per_target; + __le16 max_luns_per_target; uint8_t fcode_boot_port_name[WWN_SIZE]; uint8_t alternate_port_name[WWN_SIZE]; uint8_t alternate_node_name[WWN_SIZE]; @@ -1837,7 +1837,7 @@ struct atio { }; typedef union { - uint16_t extended; + __le16 extended; struct { uint8_t reserved; uint8_t standard; @@ -1863,18 +1863,18 @@ typedef struct { uint8_t entry_status; /* Entry Status. */ uint32_t handle; /* System handle. */ target_id_t target; /* SCSI ID */ - uint16_t lun; /* SCSI LUN */ - uint16_t control_flags; /* Control flags. */ + __le16 lun; /* SCSI LUN */ + __le16 control_flags; /* Control flags. */ #define CF_WRITE BIT_6 #define CF_READ BIT_5 #define CF_SIMPLE_TAG BIT_3 #define CF_ORDERED_TAG BIT_2 #define CF_HEAD_TAG BIT_1 uint16_t reserved_1; - uint16_t timeout; /* Command timeout. */ - uint16_t dseg_count; /* Data segment count. */ + __le16 timeout; /* Command timeout. */ + __le16 dseg_count; /* Data segment count. */ uint8_t scsi_cdb[MAX_CMDSZ]; /* SCSI command words. */ - uint32_t byte_count; /* Total byte count. */ + __le32 byte_count; /* Total byte count. */ union { struct dsd32 dsd32[3]; struct dsd64 dsd64[2]; @@ -1892,11 +1892,11 @@ typedef struct { uint8_t entry_status; /* Entry Status. */ uint32_t handle; /* System handle. */ target_id_t target; /* SCSI ID */ - uint16_t lun; /* SCSI LUN */ - uint16_t control_flags; /* Control flags. */ + __le16 lun; /* SCSI LUN */ + __le16 control_flags; /* Control flags. */ uint16_t reserved_1; - uint16_t timeout; /* Command timeout. */ - uint16_t dseg_count; /* Data segment count. */ + __le16 timeout; /* Command timeout. */ + __le16 dseg_count; /* Data segment count. */ uint8_t scsi_cdb[MAX_CMDSZ]; /* SCSI command words. */ uint32_t byte_count; /* Total byte count. */ struct dsd64 dsd[2]; @@ -1958,7 +1958,7 @@ struct crc_context { __le16 guard_seed; /* Initial Guard Seed */ __le16 prot_opts; /* Requested Data Protection Mode */ __le16 blk_size; /* Data size in bytes */ - uint16_t runt_blk_guard; /* Guard value for runt block (tape + __le16 runt_blk_guard; /* Guard value for runt block (tape * only) */ __le32 byte_count; /* Total byte count/ total data * transfer count */ @@ -2011,13 +2011,13 @@ typedef struct { uint8_t sys_define; /* System defined. */ uint8_t entry_status; /* Entry Status. */ uint32_t handle; /* System handle. */ - uint16_t scsi_status; /* SCSI status. */ - uint16_t comp_status; /* Completion status. */ - uint16_t state_flags; /* State flags. */ - uint16_t status_flags; /* Status flags. */ - uint16_t rsp_info_len; /* Response Info Length. */ - uint16_t req_sense_length; /* Request sense data length. */ - uint32_t residual_length; /* Residual transfer length. */ + __le16 scsi_status; /* SCSI status. */ + __le16 comp_status; /* Completion status. */ + __le16 state_flags; /* State flags. */ + __le16 status_flags; /* Status flags. */ + __le16 rsp_info_len; /* Response Info Length. */ + __le16 req_sense_length; /* Request sense data length. */ + __le32 residual_length; /* Residual transfer length. */ uint8_t rsp_info[8]; /* FCP response information. */ uint8_t req_sense_data[32]; /* Request sense data. */ } sts_entry_t; @@ -2149,8 +2149,8 @@ typedef struct { /* clear port changed, */ /* use sequence number. */ uint8_t reserved_1; - uint16_t sequence_number; /* Sequence number of event */ - uint16_t lun; /* SCSI LUN */ + __le16 sequence_number; /* Sequence number of event */ + __le16 lun; /* SCSI LUN */ uint8_t reserved_2[48]; } mrk_entry_t; @@ -2165,19 +2165,19 @@ typedef struct { uint8_t entry_status; /* Entry Status. */ uint32_t handle1; /* System handle. */ target_id_t loop_id; - uint16_t status; - uint16_t control_flags; /* Control flags. */ + __le16 status; + __le16 control_flags; /* Control flags. */ uint16_t reserved2; - uint16_t timeout; - uint16_t cmd_dsd_count; - uint16_t total_dsd_count; + __le16 timeout; + __le16 cmd_dsd_count; + __le16 total_dsd_count; uint8_t type; uint8_t r_ctl; - uint16_t rx_id; + __le16 rx_id; uint16_t reserved3; uint32_t handle2; - uint32_t rsp_bytecount; - uint32_t req_bytecount; + __le32 rsp_bytecount; + __le32 req_bytecount; struct dsd64 req_dsd; struct dsd64 rsp_dsd; } ms_iocb_entry_t; @@ -2205,20 +2205,20 @@ struct mbx_entry { uint32_t handle; target_id_t loop_id; - uint16_t status; - uint16_t state_flags; - uint16_t status_flags; + __le16 status; + __le16 state_flags; + __le16 status_flags; uint32_t sys_define2[2]; - uint16_t mb0; - uint16_t mb1; - uint16_t mb2; - uint16_t mb3; - uint16_t mb6; - uint16_t mb7; - uint16_t mb9; - uint16_t mb10; + __le16 mb0; + __le16 mb1; + __le16 mb2; + __le16 mb3; + __le16 mb6; + __le16 mb7; + __le16 mb9; + __le16 mb10; uint32_t reserved_2[2]; uint8_t node_name[WWN_SIZE]; uint8_t port_name[WWN_SIZE]; @@ -2240,52 +2240,52 @@ struct imm_ntfy_from_isp { uint8_t entry_status; /* Entry Status. */ union { struct { - uint32_t sys_define_2; /* System defined. */ + __le32 sys_define_2; /* System defined. */ target_id_t target; - uint16_t lun; + __le16 lun; uint8_t target_id; uint8_t reserved_1; - uint16_t status_modifier; - uint16_t status; - uint16_t task_flags; - uint16_t seq_id; - uint16_t srr_rx_id; - uint32_t srr_rel_offs; - uint16_t srr_ui; + __le16 status_modifier; + __le16 status; + __le16 task_flags; + __le16 seq_id; + __le16 srr_rx_id; + __le32 srr_rel_offs; + __le16 srr_ui; #define SRR_IU_DATA_IN 0x1 #define SRR_IU_DATA_OUT 0x5 #define SRR_IU_STATUS 0x7 - uint16_t srr_ox_id; + __le16 srr_ox_id; uint8_t reserved_2[28]; } isp2x; struct { uint32_t reserved; - uint16_t nport_handle; + __le16 nport_handle; uint16_t reserved_2; - uint16_t flags; + __le16 flags; #define NOTIFY24XX_FLAGS_GLOBAL_TPRLO BIT_1 #define NOTIFY24XX_FLAGS_PUREX_IOCB BIT_0 - uint16_t srr_rx_id; - uint16_t status; + __le16 srr_rx_id; + __le16 status; uint8_t status_subcode; uint8_t fw_handle; - uint32_t exchange_address; - uint32_t srr_rel_offs; - uint16_t srr_ui; - uint16_t srr_ox_id; + __le32 exchange_address; + __le32 srr_rel_offs; + __le16 srr_ui; + __le16 srr_ox_id; union { struct { uint8_t node_name[8]; } plogi; /* PLOGI/ADISC/PDISC */ struct { /* PRLI word 3 bit 0-15 */ - uint16_t wd3_lo; + __le16 wd3_lo; uint8_t resv0[6]; } prli; struct { uint8_t port_id[3]; uint8_t resv1; - uint16_t nport_handle; + __le16 nport_handle; uint16_t resv2; } req_els; } u; @@ -2298,7 +2298,7 @@ struct imm_ntfy_from_isp { } isp24; } u; uint16_t reserved_7; - uint16_t ox_id; + __le16 ox_id; } __packed; #endif @@ -2688,8 +2688,8 @@ static const char * const port_dstate_str[] = { #define FDMI_HBA_VENDOR_IDENTIFIER 0xe0 struct ct_fdmi_hba_attr { - uint16_t type; - uint16_t len; + __be16 type; + __be16 len; union { uint8_t node_name[WWN_SIZE]; uint8_t manufacturer[64]; @@ -2701,11 +2701,11 @@ struct ct_fdmi_hba_attr { uint8_t orom_version[16]; uint8_t fw_version[32]; uint8_t os_version[128]; - uint32_t max_ct_len; + __be32 max_ct_len; uint8_t sym_name[256]; - uint32_t vendor_specific_info; - uint32_t num_ports; + __be32 vendor_specific_info; + __be32 num_ports; uint8_t fabric_name[WWN_SIZE]; uint8_t bios_name[32]; uint8_t vendor_identifier[8]; @@ -2713,12 +2713,12 @@ struct ct_fdmi_hba_attr { }; struct ct_fdmi1_hba_attributes { - uint32_t count; + __be32 count; struct ct_fdmi_hba_attr entry[FDMI1_HBA_ATTR_COUNT]; }; struct ct_fdmi2_hba_attributes { - uint32_t count; + __be32 count; struct ct_fdmi_hba_attr entry[FDMI2_HBA_ATTR_COUNT]; }; @@ -2770,44 +2770,44 @@ struct ct_fdmi2_hba_attributes { #define FC_CLASS_2_3 0x0C struct ct_fdmi_port_attr { - uint16_t type; - uint16_t len; + __be16 type; + __be16 len; union { uint8_t fc4_types[32]; - uint32_t sup_speed; - uint32_t cur_speed; - uint32_t max_frame_size; + __be32 sup_speed; + __be32 cur_speed; + __be32 max_frame_size; uint8_t os_dev_name[32]; uint8_t host_name[256]; uint8_t node_name[WWN_SIZE]; uint8_t port_name[WWN_SIZE]; uint8_t port_sym_name[128]; - uint32_t port_type; - uint32_t port_supported_cos; + __be32 port_type; + __be32 port_supported_cos; uint8_t fabric_name[WWN_SIZE]; uint8_t port_fc4_type[32]; - uint32_t port_state; - uint32_t num_ports; - uint32_t port_id; + __be32 port_state; + __be32 num_ports; + __be32 port_id; uint8_t smartsan_service[24]; uint8_t smartsan_guid[16]; uint8_t smartsan_version[24]; uint8_t smartsan_prod_name[16]; - uint32_t smartsan_port_info; - uint32_t smartsan_qos_support; - uint32_t smartsan_security_support; + __be32 smartsan_port_info; + __be32 smartsan_qos_support; + __be32 smartsan_security_support; } a; }; struct ct_fdmi1_port_attributes { - uint32_t count; + __be32 count; struct ct_fdmi_port_attr entry[FDMI1_PORT_ATTR_COUNT]; }; struct ct_fdmi2_port_attributes { - uint32_t count; + __be32 count; struct ct_fdmi_port_attr entry[FDMI2_PORT_ATTR_COUNT]; }; @@ -2861,8 +2861,8 @@ struct ct_cmd_hdr { /* CT command request */ struct ct_sns_req { struct ct_cmd_hdr header; - uint16_t command; - uint16_t max_rsp_size; + __be16 command; + __be16 max_rsp_size; uint8_t fragment_id; uint8_t reserved[3]; @@ -2919,7 +2919,7 @@ struct ct_sns_req { struct { uint8_t hba_identifier[8]; - uint32_t entry_count; + __be32 entry_count; uint8_t port_name[8]; struct ct_fdmi2_hba_attributes attrs; } rhba; @@ -2974,7 +2974,7 @@ struct ct_sns_req { /* CT command response header */ struct ct_rsp_hdr { struct ct_cmd_hdr header; - uint16_t response; + __be16 response; uint16_t residual; uint8_t fragment_id; uint8_t reason_code; @@ -3060,8 +3060,8 @@ struct ct_sns_rsp { } gfpn_id; struct { - uint16_t speeds; - uint16_t speed; + __be16 speeds; + __be16 speed; } gpsc; #define GFF_FCP_SCSI_OFFSET 7 @@ -3151,13 +3151,13 @@ struct fab_scan { struct sns_cmd_pkt { union { struct { - uint16_t buffer_length; - uint16_t reserved_1; - __le64 buffer_address __packed; - uint16_t subcommand_length; - uint16_t reserved_2; - uint16_t subcommand; - uint16_t size; + __le16 buffer_length; + __le16 reserved_1; + __le64 buffer_address __packed; + __le16 subcommand_length; + __le16 reserved_2; + __le16 subcommand; + __le16 size; uint32_t reserved_3; uint8_t param[36]; } cmd; @@ -3183,7 +3183,7 @@ struct gid_list_info { uint8_t area; uint8_t domain; uint8_t loop_id_2100; /* ISP2100/ISP2200 -- 4 bytes. */ - uint16_t loop_id; /* ISP23XX -- 6 bytes. */ + __le16 loop_id; /* ISP23XX -- 6 bytes. */ uint16_t reserved_1; /* ISP24XX -- 8 bytes. */ }; @@ -3492,8 +3492,8 @@ struct rsp_que { dma_addr_t dma; response_t *ring; response_t *ring_ptr; - uint32_t __iomem *rsp_q_in; /* FWI2-capable only. */ - uint32_t __iomem *rsp_q_out; + __le32 __iomem *rsp_q_in; /* FWI2-capable only. */ + __le32 __iomem *rsp_q_out; uint16_t ring_index; uint16_t out_ptr; uint16_t *in_ptr; /* queue shadow in index */ @@ -3519,8 +3519,8 @@ struct req_que { dma_addr_t dma; request_t *ring; request_t *ring_ptr; - uint32_t __iomem *req_q_in; /* FWI2-capable only. */ - uint32_t __iomem *req_q_out; + __le32 __iomem *req_q_in; /* FWI2-capable only. */ + __le32 __iomem *req_q_out; uint16_t ring_index; uint16_t in_ptr; uint16_t *out_ptr; /* queue shadow out index */ @@ -3588,7 +3588,7 @@ struct qla_qpair { struct list_head hints_list; uint16_t cpuid; uint16_t retry_term_cnt; - uint32_t retry_term_exchg_addr; + __le32 retry_term_exchg_addr; uint64_t retry_term_jiff; struct qla_tgt_counters tgt_counters; }; @@ -3615,98 +3615,98 @@ struct rdp_req_payload { struct rdp_rsp_payload { struct { - uint32_t cmd; - uint32_t len; + __be32 cmd; + __be32 len; } hdr; /* LS Request Info descriptor */ struct { - uint32_t desc_tag; - uint32_t desc_len; - uint32_t req_payload_word_0; + __be32 desc_tag; + __be32 desc_len; + __be32 req_payload_word_0; } ls_req_info_desc; /* LS Request Info descriptor */ struct { - uint32_t desc_tag; - uint32_t desc_len; - uint32_t req_payload_word_0; + __be32 desc_tag; + __be32 desc_len; + __be32 req_payload_word_0; } ls_req_info_desc2; /* SFP diagnostic param descriptor */ struct { - uint32_t desc_tag; - uint32_t desc_len; - uint16_t temperature; - uint16_t vcc; - uint16_t tx_bias; - uint16_t tx_power; - uint16_t rx_power; - uint16_t sfp_flags; + __be32 desc_tag; + __be32 desc_len; + __be16 temperature; + __be16 vcc; + __be16 tx_bias; + __be16 tx_power; + __be16 rx_power; + __be16 sfp_flags; } sfp_diag_desc; /* Port Speed Descriptor */ struct { - uint32_t desc_tag; - uint32_t desc_len; - uint16_t speed_capab; - uint16_t operating_speed; + __be32 desc_tag; + __be32 desc_len; + __be16 speed_capab; + __be16 operating_speed; } port_speed_desc; /* Link Error Status Descriptor */ struct { - uint32_t desc_tag; - uint32_t desc_len; - uint32_t link_fail_cnt; - uint32_t loss_sync_cnt; - uint32_t loss_sig_cnt; - uint32_t prim_seq_err_cnt; - uint32_t inval_xmit_word_cnt; - uint32_t inval_crc_cnt; + __be32 desc_tag; + __be32 desc_len; + __be32 link_fail_cnt; + __be32 loss_sync_cnt; + __be32 loss_sig_cnt; + __be32 prim_seq_err_cnt; + __be32 inval_xmit_word_cnt; + __be32 inval_crc_cnt; uint8_t pn_port_phy_type; uint8_t reserved[3]; } ls_err_desc; /* Port name description with diag param */ struct { - uint32_t desc_tag; - uint32_t desc_len; + __be32 desc_tag; + __be32 desc_len; uint8_t WWNN[WWN_SIZE]; uint8_t WWPN[WWN_SIZE]; } port_name_diag_desc; /* Port Name desc for Direct attached Fx_Port or Nx_Port */ struct { - uint32_t desc_tag; - uint32_t desc_len; + __be32 desc_tag; + __be32 desc_len; uint8_t WWNN[WWN_SIZE]; uint8_t WWPN[WWN_SIZE]; } port_name_direct_desc; /* Buffer Credit descriptor */ struct { - uint32_t desc_tag; - uint32_t desc_len; - uint32_t fcport_b2b; - uint32_t attached_fcport_b2b; - uint32_t fcport_rtt; + __be32 desc_tag; + __be32 desc_len; + __be32 fcport_b2b; + __be32 attached_fcport_b2b; + __be32 fcport_rtt; } buffer_credit_desc; /* Optical Element Data Descriptor */ struct { - uint32_t desc_tag; - uint32_t desc_len; - uint16_t high_alarm; - uint16_t low_alarm; - uint16_t high_warn; - uint16_t low_warn; - uint32_t element_flags; + __be32 desc_tag; + __be32 desc_len; + __be16 high_alarm; + __be16 low_alarm; + __be16 high_warn; + __be16 low_warn; + __be32 element_flags; } optical_elmt_desc[5]; /* Optical Product Data Descriptor */ struct { - uint32_t desc_tag; - uint32_t desc_len; + __be32 desc_tag; + __be32 desc_len; uint8_t vendor_name[16]; uint8_t part_number[16]; uint8_t serial_number[16]; @@ -3744,17 +3744,17 @@ struct qlt_hw_data { struct atio *atio_ring_ptr; /* Current address. */ uint16_t atio_ring_index; /* Current index. */ uint16_t atio_q_length; - uint32_t __iomem *atio_q_in; - uint32_t __iomem *atio_q_out; + __le32 __iomem *atio_q_in; + __le32 __iomem *atio_q_out; struct qla_tgt_func_tmpl *tgt_ops; struct qla_tgt_vp_map *tgt_vp_map; int saved_set; - uint16_t saved_exchange_count; - uint32_t saved_firmware_options_1; - uint32_t saved_firmware_options_2; - uint32_t saved_firmware_options_3; + __le16 saved_exchange_count; + __le32 saved_firmware_options_1; + __le32 saved_firmware_options_2; + __le32 saved_firmware_options_3; uint8_t saved_firmware_options[2]; uint8_t saved_add_firmware_options[2]; @@ -4253,7 +4253,7 @@ struct qla_hw_data { uint16_t fw_options[16]; /* slots: 1,2,3,10,11 */ uint8_t fw_seriallink_options[4]; - uint16_t fw_seriallink_options24[4]; + __le16 fw_seriallink_options24[4]; uint8_t serdes_version[3]; uint8_t mpi_version[3]; @@ -4436,7 +4436,7 @@ struct qla_hw_data { #define NUM_DSD_CHAIN 4096 uint8_t fw_type; - __le32 file_prd_off; /* File firmware product offset */ + uint32_t file_prd_off; /* File firmware product offset */ uint32_t md_template_size; void *md_tmplt_hdr; @@ -4744,13 +4744,13 @@ typedef struct scsi_qla_host { struct qla27xx_image_status { uint8_t image_status_mask; - uint16_t generation; + __le16 generation; uint8_t ver_major; uint8_t ver_minor; uint8_t bitmap; /* 28xx only */ uint8_t reserved[2]; - uint32_t checksum; - uint32_t signature; + __le32 checksum; + __le32 signature; } __packed; /* 28xx aux image status bimap values */ diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h index f18d2d00d28c..d1e12a29c3f7 100644 --- a/drivers/scsi/qla2xxx/qla_fw.h +++ b/drivers/scsi/qla2xxx/qla_fw.h @@ -134,28 +134,28 @@ struct vp_database_24xx { struct nvram_24xx { /* NVRAM header. */ uint8_t id[4]; - uint16_t nvram_version; + __le16 nvram_version; uint16_t reserved_0; /* Firmware Initialization Control Block. */ - uint16_t version; + __le16 version; uint16_t reserved_1; - __le16 frame_payload_size; - uint16_t execution_throttle; - uint16_t exchange_count; - uint16_t hard_address; + __le16 frame_payload_size; + __le16 execution_throttle; + __le16 exchange_count; + __le16 hard_address; uint8_t port_name[WWN_SIZE]; uint8_t node_name[WWN_SIZE]; - uint16_t login_retry_count; - uint16_t link_down_on_nos; - uint16_t interrupt_delay_timer; - uint16_t login_timeout; + __le16 login_retry_count; + __le16 link_down_on_nos; + __le16 interrupt_delay_timer; + __le16 login_timeout; - uint32_t firmware_options_1; - uint32_t firmware_options_2; - uint32_t firmware_options_3; + __le32 firmware_options_1; + __le32 firmware_options_2; + __le32 firmware_options_3; /* Offset 56. */ @@ -178,7 +178,7 @@ struct nvram_24xx { * BIT 11-13 = Output Emphasis 4G * BIT 14-15 = Reserved */ - uint16_t seriallink_options[4]; + __le16 seriallink_options[4]; uint16_t reserved_2[16]; @@ -218,25 +218,25 @@ struct nvram_24xx { * * BIT 16-31 = */ - uint32_t host_p; + __le32 host_p; uint8_t alternate_port_name[WWN_SIZE]; uint8_t alternate_node_name[WWN_SIZE]; uint8_t boot_port_name[WWN_SIZE]; - uint16_t boot_lun_number; + __le16 boot_lun_number; uint16_t reserved_8; uint8_t alt1_boot_port_name[WWN_SIZE]; - uint16_t alt1_boot_lun_number; + __le16 alt1_boot_lun_number; uint16_t reserved_9; uint8_t alt2_boot_port_name[WWN_SIZE]; - uint16_t alt2_boot_lun_number; + __le16 alt2_boot_lun_number; uint16_t reserved_10; uint8_t alt3_boot_port_name[WWN_SIZE]; - uint16_t alt3_boot_lun_number; + __le16 alt3_boot_lun_number; uint16_t reserved_11; /* @@ -249,23 +249,23 @@ struct nvram_24xx { * BIT 6 = Reserved * BIT 7-31 = */ - uint32_t efi_parameters; + __le32 efi_parameters; uint8_t reset_delay; uint8_t reserved_12; uint16_t reserved_13; - uint16_t boot_id_number; + __le16 boot_id_number; uint16_t reserved_14; - uint16_t max_luns_per_target; + __le16 max_luns_per_target; uint16_t reserved_15; - uint16_t port_down_retry_count; - uint16_t link_down_timeout; + __le16 port_down_retry_count; + __le16 link_down_timeout; /* FCode parameters. */ - uint16_t fcode_parameter; + __le16 fcode_parameter; uint16_t reserved_16[3]; @@ -275,13 +275,13 @@ struct nvram_24xx { uint8_t prev_drv_ver_minor; uint8_t prev_drv_ver_subminor; - uint16_t prev_bios_ver_major; - uint16_t prev_bios_ver_minor; + __le16 prev_bios_ver_major; + __le16 prev_bios_ver_minor; - uint16_t prev_efi_ver_major; - uint16_t prev_efi_ver_minor; + __le16 prev_efi_ver_major; + __le16 prev_efi_ver_minor; - uint16_t prev_fw_ver_major; + __le16 prev_fw_ver_major; uint8_t prev_fw_ver_minor; uint8_t prev_fw_ver_subminor; @@ -309,7 +309,7 @@ struct nvram_24xx { uint16_t subsystem_vendor_id; uint16_t subsystem_device_id; - uint32_t checksum; + __le32 checksum; }; /* @@ -318,46 +318,46 @@ struct nvram_24xx { */ #define ICB_VERSION 1 struct init_cb_24xx { - uint16_t version; + __le16 version; uint16_t reserved_1; - uint16_t frame_payload_size; - uint16_t execution_throttle; - uint16_t exchange_count; + __le16 frame_payload_size; + __le16 execution_throttle; + __le16 exchange_count; - uint16_t hard_address; + __le16 hard_address; uint8_t port_name[WWN_SIZE]; /* Big endian. */ uint8_t node_name[WWN_SIZE]; /* Big endian. */ - uint16_t response_q_inpointer; - uint16_t request_q_outpointer; + __le16 response_q_inpointer; + __le16 request_q_outpointer; - uint16_t login_retry_count; + __le16 login_retry_count; - uint16_t prio_request_q_outpointer; + __le16 prio_request_q_outpointer; - uint16_t response_q_length; - uint16_t request_q_length; + __le16 response_q_length; + __le16 request_q_length; - uint16_t link_down_on_nos; /* Milliseconds. */ + __le16 link_down_on_nos; /* Milliseconds. */ - uint16_t prio_request_q_length; + __le16 prio_request_q_length; __le64 request_q_address __packed; __le64 response_q_address __packed; __le64 prio_request_q_address __packed; - uint16_t msix; - uint16_t msix_atio; + __le16 msix; + __le16 msix_atio; uint8_t reserved_2[4]; - uint16_t atio_q_inpointer; - uint16_t atio_q_length; - __le64 atio_q_address __packed; + __le16 atio_q_inpointer; + __le16 atio_q_length; + __le64 atio_q_address __packed; - uint16_t interrupt_delay_timer; /* 100us increments. */ - uint16_t login_timeout; + __le16 interrupt_delay_timer; /* 100us increments. */ + __le16 login_timeout; /* * BIT 0 = Enable Hard Loop Id @@ -378,7 +378,7 @@ struct init_cb_24xx { * BIT 14 = Node Name Option * BIT 15-31 = Reserved */ - uint32_t firmware_options_1; + __le32 firmware_options_1; /* * BIT 0 = Operation Mode bit 0 @@ -399,7 +399,7 @@ struct init_cb_24xx { * BIT 14 = Enable Target PRLI Control * BIT 15-31 = Reserved */ - uint32_t firmware_options_2; + __le32 firmware_options_2; /* * BIT 0 = Reserved @@ -425,9 +425,9 @@ struct init_cb_24xx { * BIT 30 = Enable request queue 0 out index shadowing * BIT 31 = Reserved */ - uint32_t firmware_options_3; - uint16_t qos; - uint16_t rid; + __le32 firmware_options_3; + __le16 qos; + __le16 rid; uint8_t reserved_3[20]; }; @@ -443,27 +443,27 @@ struct cmd_bidir { uint32_t handle; /* System handle. */ - uint16_t nport_handle; /* N_PORT hanlde. */ + __le16 nport_handle; /* N_PORT handle. */ - uint16_t timeout; /* Commnad timeout. */ + __le16 timeout; /* Command timeout. */ - uint16_t wr_dseg_count; /* Write Data segment count. */ - uint16_t rd_dseg_count; /* Read Data segment count. */ + __le16 wr_dseg_count; /* Write Data segment count. */ + __le16 rd_dseg_count; /* Read Data segment count. */ struct scsi_lun lun; /* FCP LUN (BE). */ - uint16_t control_flags; /* Control flags. */ + __le16 control_flags; /* Control flags. */ #define BD_WRAP_BACK BIT_3 #define BD_READ_DATA BIT_1 #define BD_WRITE_DATA BIT_0 - uint16_t fcp_cmnd_dseg_len; /* Data segment length. */ + __le16 fcp_cmnd_dseg_len; /* Data segment length. */ __le64 fcp_cmnd_dseg_address __packed;/* Data segment address. */ uint16_t reserved[2]; /* Reserved */ - uint32_t rd_byte_count; /* Total Byte count Read. */ - uint32_t wr_byte_count; /* Total Byte count write. */ + __le32 rd_byte_count; /* Total Byte count Read. */ + __le32 wr_byte_count; /* Total Byte count write. */ uint8_t port_id[3]; /* PortID of destination port.*/ uint8_t vp_index; @@ -480,28 +480,28 @@ struct cmd_type_6 { uint32_t handle; /* System handle. */ - uint16_t nport_handle; /* N_PORT handle. */ - uint16_t timeout; /* Command timeout. */ + __le16 nport_handle; /* N_PORT handle. */ + __le16 timeout; /* Command timeout. */ - uint16_t dseg_count; /* Data segment count. */ + __le16 dseg_count; /* Data segment count. */ - uint16_t fcp_rsp_dsd_len; /* FCP_RSP DSD length. */ + __le16 fcp_rsp_dsd_len; /* FCP_RSP DSD length. */ struct scsi_lun lun; /* FCP LUN (BE). */ - uint16_t control_flags; /* Control flags. */ + __le16 control_flags; /* Control flags. */ #define CF_DIF_SEG_DESCR_ENABLE BIT_3 #define CF_DATA_SEG_DESCR_ENABLE BIT_2 #define CF_READ_DATA BIT_1 #define CF_WRITE_DATA BIT_0 - uint16_t fcp_cmnd_dseg_len; /* Data segment length. */ + __le16 fcp_cmnd_dseg_len; /* Data segment length. */ /* Data segment address. */ __le64 fcp_cmnd_dseg_address __packed; /* Data segment address. */ __le64 fcp_rsp_dseg_address __packed; - uint32_t byte_count; /* Total byte count. */ + __le32 byte_count; /* Total byte count. */ uint8_t port_id[3]; /* PortID of destination port. */ uint8_t vp_index; @@ -518,16 +518,16 @@ struct cmd_type_7 { uint32_t handle; /* System handle. */ - uint16_t nport_handle; /* N_PORT handle. */ - uint16_t timeout; /* Command timeout. */ + __le16 nport_handle; /* N_PORT handle. */ + __le16 timeout; /* Command timeout. */ #define FW_MAX_TIMEOUT 0x1999 - uint16_t dseg_count; /* Data segment count. */ + __le16 dseg_count; /* Data segment count. */ uint16_t reserved_1; struct scsi_lun lun; /* FCP LUN (BE). */ - uint16_t task_mgmt_flags; /* Task management flags. */ + __le16 task_mgmt_flags; /* Task management flags. */ #define TMF_CLEAR_ACA BIT_14 #define TMF_TARGET_RESET BIT_13 #define TMF_LUN_RESET BIT_12 @@ -547,7 +547,7 @@ struct cmd_type_7 { uint8_t crn; uint8_t fcp_cdb[MAX_CMDSZ]; /* SCSI command words. */ - uint32_t byte_count; /* Total byte count. */ + __le32 byte_count; /* Total byte count. */ uint8_t port_id[3]; /* PortID of destination port. */ uint8_t vp_index; @@ -565,29 +565,29 @@ struct cmd_type_crc_2 { uint32_t handle; /* System handle. */ - uint16_t nport_handle; /* N_PORT handle. */ - uint16_t timeout; /* Command timeout. */ + __le16 nport_handle; /* N_PORT handle. */ + __le16 timeout; /* Command timeout. */ - uint16_t dseg_count; /* Data segment count. */ + __le16 dseg_count; /* Data segment count. */ - uint16_t fcp_rsp_dseg_len; /* FCP_RSP DSD length. */ + __le16 fcp_rsp_dseg_len; /* FCP_RSP DSD length. */ struct scsi_lun lun; /* FCP LUN (BE). */ - uint16_t control_flags; /* Control flags. */ + __le16 control_flags; /* Control flags. */ - uint16_t fcp_cmnd_dseg_len; /* Data segment length. */ + __le16 fcp_cmnd_dseg_len; /* Data segment length. */ __le64 fcp_cmnd_dseg_address __packed; /* Data segment address. */ __le64 fcp_rsp_dseg_address __packed; - uint32_t byte_count; /* Total byte count. */ + __le32 byte_count; /* Total byte count. */ uint8_t port_id[3]; /* PortID of destination port. */ uint8_t vp_index; __le64 crc_context_address __packed; /* Data segment address. */ - uint16_t crc_context_len; /* Data segment length. */ + __le16 crc_context_len; /* Data segment length. */ uint16_t reserved_1; /* MUST be set to 0. */ }; @@ -604,32 +604,32 @@ struct sts_entry_24xx { uint32_t handle; /* System handle. */ - uint16_t comp_status; /* Completion status. */ - uint16_t ox_id; /* OX_ID used by the firmware. */ + __le16 comp_status; /* Completion status. */ + __le16 ox_id; /* OX_ID used by the firmware. */ - uint32_t residual_len; /* FW calc residual transfer length. */ + __le32 residual_len; /* FW calc residual transfer length. */ union { uint16_t reserved_1; - uint16_t nvme_rsp_pyld_len; + __le16 nvme_rsp_pyld_len; }; - uint16_t state_flags; /* State flags. */ + __le16 state_flags; /* State flags. */ #define SF_TRANSFERRED_DATA BIT_11 #define SF_NVME_ERSP BIT_6 #define SF_FCP_RSP_DMA BIT_0 - uint16_t retry_delay; - uint16_t scsi_status; /* SCSI status. */ + __le16 retry_delay; + __le16 scsi_status; /* SCSI status. */ #define SS_CONFIRMATION_REQ BIT_12 - uint32_t rsp_residual_count; /* FCP RSP residual count. */ + __le32 rsp_residual_count; /* FCP RSP residual count. */ - uint32_t sense_len; /* FCP SENSE length. */ + __le32 sense_len; /* FCP SENSE length. */ union { struct { - uint32_t rsp_data_len; /* FCP response data length */ + __le32 rsp_data_len; /* FCP response data length */ uint8_t data[28]; /* FCP rsp/sense information */ }; struct nvme_fc_ersp_iu nvme_ersp; @@ -672,7 +672,7 @@ struct mrk_entry_24xx { uint32_t handle; /* System handle. */ - uint16_t nport_handle; /* N_PORT handle. */ + __le16 nport_handle; /* N_PORT handle. */ uint8_t modifier; /* Modifier (7-0). */ #define MK_SYNC_ID_LUN 0 /* Synchronize ID/LUN */ @@ -701,24 +701,24 @@ struct ct_entry_24xx { uint32_t handle; /* System handle. */ - uint16_t comp_status; /* Completion status. */ + __le16 comp_status; /* Completion status. */ - uint16_t nport_handle; /* N_PORT handle. */ + __le16 nport_handle; /* N_PORT handle. */ - uint16_t cmd_dsd_count; + __le16 cmd_dsd_count; uint8_t vp_index; uint8_t reserved_1; - uint16_t timeout; /* Command timeout. */ + __le16 timeout; /* Command timeout. */ uint16_t reserved_2; - uint16_t rsp_dsd_count; + __le16 rsp_dsd_count; uint8_t reserved_3[10]; - uint32_t rsp_byte_count; - uint32_t cmd_byte_count; + __le32 rsp_byte_count; + __le32 cmd_byte_count; struct dsd64 dsd[2]; }; @@ -733,17 +733,17 @@ struct purex_entry_24xx { uint8_t sys_define; /* System defined. */ uint8_t entry_status; /* Entry Status. */ - uint16_t reserved1; + __le16 reserved1; uint8_t vp_idx; uint8_t reserved2; - uint16_t status_flags; - uint16_t nport_handle; + __le16 status_flags; + __le16 nport_handle; - uint16_t frame_size; - uint16_t trunc_frame_size; + __le16 frame_size; + __le16 trunc_frame_size; - uint32_t rx_xchg_addr; + __le32 rx_xchg_addr; uint8_t d_id[3]; uint8_t r_ctl; @@ -754,13 +754,13 @@ struct purex_entry_24xx { uint8_t f_ctl[3]; uint8_t type; - uint16_t seq_cnt; + __le16 seq_cnt; uint8_t df_ctl; uint8_t seq_id; - uint16_t rx_id; - uint16_t ox_id; - uint32_t param; + __le16 rx_id; + __le16 ox_id; + __le32 param; uint8_t els_frame_payload[20]; }; @@ -777,18 +777,18 @@ struct els_entry_24xx { uint32_t handle; /* System handle. */ - uint16_t comp_status; /* response only */ - uint16_t nport_handle; + __le16 comp_status; /* response only */ + __le16 nport_handle; - uint16_t tx_dsd_count; + __le16 tx_dsd_count; uint8_t vp_index; uint8_t sof_type; #define EST_SOFI3 (1 << 4) #define EST_SOFI2 (3 << 4) - uint32_t rx_xchg_address; /* Receive exchange address. */ - uint16_t rx_dsd_count; + __le32 rx_xchg_address; /* Receive exchange address. */ + __le16 rx_dsd_count; uint8_t opcode; uint8_t reserved_2; @@ -796,7 +796,7 @@ struct els_entry_24xx { uint8_t d_id[3]; uint8_t s_id[3]; - uint16_t control_flags; /* Control flags. */ + __le16 control_flags; /* Control flags. */ #define ECF_PAYLOAD_DESCR_MASK (BIT_15|BIT_14|BIT_13) #define EPD_ELS_COMMAND (0 << 13) #define EPD_ELS_ACC (1 << 13) @@ -817,10 +817,10 @@ struct els_entry_24xx { __le32 rx_len; /* DSD 1 length. */ }; struct { - uint32_t total_byte_count; - uint32_t error_subcode_1; - uint32_t error_subcode_2; - uint32_t error_subcode_3; + __le32 total_byte_count; + __le32 error_subcode_1; + __le32 error_subcode_2; + __le32 error_subcode_3; }; }; }; @@ -831,19 +831,19 @@ struct els_sts_entry_24xx { uint8_t sys_define; /* System Defined. */ uint8_t entry_status; /* Entry Status. */ - uint32_t handle; /* System handle. */ + __le32 handle; /* System handle. */ - uint16_t comp_status; + __le16 comp_status; - uint16_t nport_handle; /* N_PORT handle. */ + __le16 nport_handle; /* N_PORT handle. */ - uint16_t reserved_1; + __le16 reserved_1; uint8_t vp_index; uint8_t sof_type; - uint32_t rx_xchg_address; /* Receive exchange address. */ - uint16_t reserved_2; + __le32 rx_xchg_address; /* Receive exchange address. */ + __le16 reserved_2; uint8_t opcode; uint8_t reserved_3; @@ -851,13 +851,13 @@ struct els_sts_entry_24xx { uint8_t d_id[3]; uint8_t s_id[3]; - uint16_t control_flags; /* Control flags. */ - uint32_t total_byte_count; - uint32_t error_subcode_1; - uint32_t error_subcode_2; - uint32_t error_subcode_3; + __le16 control_flags; /* Control flags. */ + __le32 total_byte_count; + __le32 error_subcode_1; + __le32 error_subcode_2; + __le32 error_subcode_3; - uint32_t reserved_4[4]; + __le32 reserved_4[4]; }; /* * ISP queue - Mailbox Command entry structure definition. @@ -884,12 +884,12 @@ struct logio_entry_24xx { uint32_t handle; /* System handle. */ - uint16_t comp_status; /* Completion status. */ + __le16 comp_status; /* Completion status. */ #define CS_LOGIO_ERROR 0x31 /* Login/Logout IOCB error. */ - uint16_t nport_handle; /* N_PORT handle. */ + __le16 nport_handle; /* N_PORT handle. */ - uint16_t control_flags; /* Control flags. */ + __le16 control_flags; /* Control flags. */ /* Modifiers. */ #define LCF_INCLUDE_SNS BIT_10 /* Include SNS (FFFFFC) during LOGO. */ #define LCF_FCP2_OVERRIDE BIT_9 /* Set/Reset word 3 of PRLI. */ @@ -918,7 +918,7 @@ struct logio_entry_24xx { uint8_t rsp_size; /* Response size in 32bit words. */ - uint32_t io_parameter[11]; /* General I/O parameters. */ + __le32 io_parameter[11]; /* General I/O parameters. */ #define LSC_SCODE_NOLINK 0x01 #define LSC_SCODE_NOIOCB 0x02 #define LSC_SCODE_NOXCB 0x03 @@ -946,17 +946,17 @@ struct tsk_mgmt_entry { uint32_t handle; /* System handle. */ - uint16_t nport_handle; /* N_PORT handle. */ + __le16 nport_handle; /* N_PORT handle. */ uint16_t reserved_1; - uint16_t delay; /* Activity delay in seconds. */ + __le16 delay; /* Activity delay in seconds. */ - uint16_t timeout; /* Command timeout. */ + __le16 timeout; /* Command timeout. */ struct scsi_lun lun; /* FCP LUN (BE). */ - uint32_t control_flags; /* Control Flags. */ + __le32 control_flags; /* Control Flags. */ #define TCF_NOTMCMD_TO_TARGET BIT_31 #define TCF_LUN_RESET BIT_4 #define TCF_ABORT_TASK_SET BIT_3 @@ -981,15 +981,15 @@ struct abort_entry_24xx { uint32_t handle; /* System handle. */ - uint16_t nport_handle; /* N_PORT handle. */ + __le16 nport_handle; /* N_PORT handle. */ /* or Completion status. */ - uint16_t options; /* Options. */ + __le16 options; /* Options. */ #define AOF_NO_ABTS BIT_0 /* Do not send any ABTS. */ uint32_t handle_to_abort; /* System handle to abort. */ - uint16_t req_que_no; + __le16 req_que_no; uint8_t reserved_1[30]; uint8_t port_id[3]; /* PortID of destination port. */ @@ -1006,16 +1006,16 @@ struct abts_entry_24xx { uint8_t handle_count; uint8_t entry_status; - uint32_t handle; /* type 0x55 only */ + __le32 handle; /* type 0x55 only */ - uint16_t comp_status; /* type 0x55 only */ - uint16_t nport_handle; /* type 0x54 only */ + __le16 comp_status; /* type 0x55 only */ + __le16 nport_handle; /* type 0x54 only */ - uint16_t control_flags; /* type 0x55 only */ + __le16 control_flags; /* type 0x55 only */ uint8_t vp_idx; uint8_t sof_type; /* sof_type is upper nibble */ - uint32_t rx_xch_addr; + __le32 rx_xch_addr; uint8_t d_id[3]; uint8_t r_ctl; @@ -1026,30 +1026,30 @@ struct abts_entry_24xx { uint8_t f_ctl[3]; uint8_t type; - uint16_t seq_cnt; + __le16 seq_cnt; uint8_t df_ctl; uint8_t seq_id; - uint16_t rx_id; - uint16_t ox_id; + __le16 rx_id; + __le16 ox_id; - uint32_t param; + __le32 param; union { struct { - uint32_t subcode3; - uint32_t rsvd; - uint32_t subcode1; - uint32_t subcode2; + __le32 subcode3; + __le32 rsvd; + __le32 subcode1; + __le32 subcode2; } error; struct { - uint16_t rsrvd1; + __le16 rsrvd1; uint8_t last_seq_id; uint8_t seq_id_valid; - uint16_t aborted_rx_id; - uint16_t aborted_ox_id; - uint16_t high_seq_cnt; - uint16_t low_seq_cnt; + __le16 aborted_rx_id; + __le16 aborted_ox_id; + __le16 high_seq_cnt; + __le16 low_seq_cnt; } ba_acc; struct { uint8_t vendor_unique; @@ -1058,7 +1058,7 @@ struct abts_entry_24xx { } ba_rjt; } payload; - uint32_t rx_xch_addr_to_abort; + __le32 rx_xch_addr_to_abort; } __packed; /* ABTS payload explanation values */ @@ -1087,7 +1087,7 @@ struct abts_entry_24xx { * ISP I/O Register Set structure definitions. */ struct device_reg_24xx { - uint32_t flash_addr; /* Flash/NVRAM BIOS address. */ + __le32 flash_addr; /* Flash/NVRAM BIOS address. */ #define FARX_DATA_FLAG BIT_31 #define FARX_ACCESS_FLASH_CONF 0x7FFD0000 #define FARX_ACCESS_FLASH_DATA 0x7FF00000 @@ -1138,9 +1138,9 @@ struct device_reg_24xx { #define HW_EVENT_NVRAM_CHKSUM_ERR 0xF023 #define HW_EVENT_FLASH_FW_ERR 0xF024 - uint32_t flash_data; /* Flash/NVRAM BIOS data. */ + __le32 flash_data; /* Flash/NVRAM BIOS data. */ - uint32_t ctrl_status; /* Control/Status. */ + __le32 ctrl_status; /* Control/Status. */ #define CSRX_FLASH_ACCESS_ERROR BIT_18 /* Flash/NVRAM Access Error. */ #define CSRX_DMA_ACTIVE BIT_17 /* DMA Active status. */ #define CSRX_DMA_SHUTDOWN BIT_16 /* DMA Shutdown control status. */ @@ -1166,35 +1166,35 @@ struct device_reg_24xx { #define CSRX_FLASH_ENABLE BIT_1 /* Flash BIOS Read/Write enable. */ #define CSRX_ISP_SOFT_RESET BIT_0 /* ISP soft reset. */ - uint32_t ictrl; /* Interrupt control. */ + __le32 ictrl; /* Interrupt control. */ #define ICRX_EN_RISC_INT BIT_3 /* Enable RISC interrupts on PCI. */ - uint32_t istatus; /* Interrupt status. */ + __le32 istatus; /* Interrupt status. */ #define ISRX_RISC_INT BIT_3 /* RISC interrupt. */ - uint32_t unused_1[2]; /* Gap. */ + __le32 unused_1[2]; /* Gap. */ /* Request Queue. */ - uint32_t req_q_in; /* In-Pointer. */ - uint32_t req_q_out; /* Out-Pointer. */ + __le32 req_q_in; /* In-Pointer. */ + __le32 req_q_out; /* Out-Pointer. */ /* Response Queue. */ - uint32_t rsp_q_in; /* In-Pointer. */ - uint32_t rsp_q_out; /* Out-Pointer. */ + __le32 rsp_q_in; /* In-Pointer. */ + __le32 rsp_q_out; /* Out-Pointer. */ /* Priority Request Queue. */ - uint32_t preq_q_in; /* In-Pointer. */ - uint32_t preq_q_out; /* Out-Pointer. */ + __le32 preq_q_in; /* In-Pointer. */ + __le32 preq_q_out; /* Out-Pointer. */ - uint32_t unused_2[2]; /* Gap. */ + __le32 unused_2[2]; /* Gap. */ /* ATIO Queue. */ - uint32_t atio_q_in; /* In-Pointer. */ - uint32_t atio_q_out; /* Out-Pointer. */ + __le32 atio_q_in; /* In-Pointer. */ + __le32 atio_q_out; /* Out-Pointer. */ - uint32_t host_status; + __le32 host_status; #define HSRX_RISC_INT BIT_15 /* RISC to Host interrupt. */ #define HSRX_RISC_PAUSED BIT_8 /* RISC Paused. */ - uint32_t hccr; /* Host command & control register. */ + __le32 hccr; /* Host command & control register. */ /* HCCR statuses. */ #define HCCRX_HOST_INT BIT_6 /* Host to RISC interrupt bit. */ #define HCCRX_RISC_RESET BIT_5 /* RISC Reset mode bit. */ @@ -1216,7 +1216,7 @@ struct device_reg_24xx { /* Clear RISC to PCI interrupt. */ #define HCCRX_CLR_RISC_INT 0xA0000000 - uint32_t gpiod; /* GPIO Data register. */ + __le32 gpiod; /* GPIO Data register. */ /* LED update mask. */ #define GPDX_LED_UPDATE_MASK (BIT_20|BIT_19|BIT_18) @@ -1235,7 +1235,7 @@ struct device_reg_24xx { /* Data in/out. */ #define GPDX_DATA_INOUT (BIT_1|BIT_0) - uint32_t gpioe; /* GPIO Enable register. */ + __le32 gpioe; /* GPIO Enable register. */ /* Enable update mask. */ #define GPEX_ENABLE_UPDATE_MASK (BIT_17|BIT_16) /* Enable update mask. */ @@ -1243,52 +1243,52 @@ struct device_reg_24xx { /* Enable. */ #define GPEX_ENABLE (BIT_1|BIT_0) - uint32_t iobase_addr; /* I/O Bus Base Address register. */ + __le32 iobase_addr; /* I/O Bus Base Address register. */ - uint32_t unused_3[10]; /* Gap. */ + __le32 unused_3[10]; /* Gap. */ - uint16_t mailbox0; - uint16_t mailbox1; - uint16_t mailbox2; - uint16_t mailbox3; - uint16_t mailbox4; - uint16_t mailbox5; - uint16_t mailbox6; - uint16_t mailbox7; - uint16_t mailbox8; - uint16_t mailbox9; - uint16_t mailbox10; - uint16_t mailbox11; - uint16_t mailbox12; - uint16_t mailbox13; - uint16_t mailbox14; - uint16_t mailbox15; - uint16_t mailbox16; - uint16_t mailbox17; - uint16_t mailbox18; - uint16_t mailbox19; - uint16_t mailbox20; - uint16_t mailbox21; - uint16_t mailbox22; - uint16_t mailbox23; - uint16_t mailbox24; - uint16_t mailbox25; - uint16_t mailbox26; - uint16_t mailbox27; - uint16_t mailbox28; - uint16_t mailbox29; - uint16_t mailbox30; - uint16_t mailbox31; + __le16 mailbox0; + __le16 mailbox1; + __le16 mailbox2; + __le16 mailbox3; + __le16 mailbox4; + __le16 mailbox5; + __le16 mailbox6; + __le16 mailbox7; + __le16 mailbox8; + __le16 mailbox9; + __le16 mailbox10; + __le16 mailbox11; + __le16 mailbox12; + __le16 mailbox13; + __le16 mailbox14; + __le16 mailbox15; + __le16 mailbox16; + __le16 mailbox17; + __le16 mailbox18; + __le16 mailbox19; + __le16 mailbox20; + __le16 mailbox21; + __le16 mailbox22; + __le16 mailbox23; + __le16 mailbox24; + __le16 mailbox25; + __le16 mailbox26; + __le16 mailbox27; + __le16 mailbox28; + __le16 mailbox29; + __le16 mailbox30; + __le16 mailbox31; - uint32_t iobase_window; - uint32_t iobase_c4; - uint32_t iobase_c8; - uint32_t unused_4_1[6]; /* Gap. */ - uint32_t iobase_q; - uint32_t unused_5[2]; /* Gap. */ - uint32_t iobase_select; - uint32_t unused_6[2]; /* Gap. */ - uint32_t iobase_sdata; + __le32 iobase_window; + __le32 iobase_c4; + __le32 iobase_c8; + __le32 unused_4_1[6]; /* Gap. */ + __le32 iobase_q; + __le32 unused_5[2]; /* Gap. */ + __le32 iobase_select; + __le32 unused_6[2]; /* Gap. */ + __le32 iobase_sdata; }; /* RISC-RISC semaphore register PCI offet */ #define RISC_REGISTER_BASE_OFFSET 0x7010 @@ -1354,8 +1354,8 @@ struct mid_conf_entry_24xx { struct mid_init_cb_24xx { struct init_cb_24xx init_cb; - uint16_t count; - uint16_t options; + __le16 count; + __le16 options; struct mid_conf_entry_24xx entries[MAX_MULTI_ID_FABRIC]; }; @@ -1389,27 +1389,27 @@ struct vp_ctrl_entry_24xx { uint32_t handle; /* System handle. */ - uint16_t vp_idx_failed; + __le16 vp_idx_failed; - uint16_t comp_status; /* Completion status. */ + __le16 comp_status; /* Completion status. */ #define CS_VCE_IOCB_ERROR 0x01 /* Error processing IOCB */ #define CS_VCE_ACQ_ID_ERROR 0x02 /* Error while acquireing ID. */ #define CS_VCE_BUSY 0x05 /* Firmware not ready to accept cmd. */ - uint16_t command; + __le16 command; #define VCE_COMMAND_ENABLE_VPS 0x00 /* Enable VPs. */ #define VCE_COMMAND_DISABLE_VPS 0x08 /* Disable VPs. */ #define VCE_COMMAND_DISABLE_VPS_REINIT 0x09 /* Disable VPs and reinit link. */ #define VCE_COMMAND_DISABLE_VPS_LOGO 0x0a /* Disable VPs and LOGO ports. */ #define VCE_COMMAND_DISABLE_VPS_LOGO_ALL 0x0b /* Disable VPs and LOGO ports. */ - uint16_t vp_count; + __le16 vp_count; uint8_t vp_idx_map[16]; - uint16_t flags; - uint16_t id; + __le16 flags; + __le16 id; uint16_t reserved_4; - uint16_t hopct; + __le16 hopct; uint8_t reserved_5[24]; }; @@ -1425,12 +1425,12 @@ struct vp_config_entry_24xx { uint32_t handle; /* System handle. */ - uint16_t flags; + __le16 flags; #define CS_VF_BIND_VPORTS_TO_VF BIT_0 #define CS_VF_SET_QOS_OF_VPORTS BIT_1 #define CS_VF_SET_HOPS_OF_VPORTS BIT_2 - uint16_t comp_status; /* Completion status. */ + __le16 comp_status; /* Completion status. */ #define CS_VCT_STS_ERROR 0x01 /* Specified VPs were not disabled. */ #define CS_VCT_CNT_ERROR 0x02 /* Invalid VP count. */ #define CS_VCT_ERROR 0x03 /* Unknown error. */ @@ -1457,9 +1457,9 @@ struct vp_config_entry_24xx { uint16_t reserved_vp2; uint8_t port_name_idx2[WWN_SIZE]; uint8_t node_name_idx2[WWN_SIZE]; - uint16_t id; + __le16 id; uint16_t reserved_4; - uint16_t hopct; + __le16 hopct; uint8_t reserved_5[2]; }; @@ -1486,7 +1486,7 @@ struct vp_rpt_id_entry_24xx { uint8_t entry_count; /* Entry count. */ uint8_t sys_define; /* System defined. */ uint8_t entry_status; /* Entry Status. */ - uint32_t resv1; + __le32 resv1; uint8_t vp_acquired; uint8_t vp_setup; uint8_t vp_idx; /* Format 0=reserved */ @@ -1550,15 +1550,15 @@ struct vf_evfp_entry_24xx { uint8_t entry_status; /* Entry Status. */ uint32_t handle; /* System handle. */ - uint16_t comp_status; /* Completion status. */ - uint16_t timeout; /* timeout */ - uint16_t adim_tagging_mode; + __le16 comp_status; /* Completion status. */ + __le16 timeout; /* timeout */ + __le16 adim_tagging_mode; - uint16_t vfport_id; + __le16 vfport_id; uint32_t exch_addr; - uint16_t nport_handle; /* N_PORT handle. */ - uint16_t control_flags; + __le16 nport_handle; /* N_PORT handle. */ + __le16 control_flags; uint32_t io_parameter_0; uint32_t io_parameter_1; __le64 tx_address __packed; /* Data segment 0 address. */ @@ -1573,13 +1573,13 @@ struct vf_evfp_entry_24xx { struct qla_fdt_layout { uint8_t sig[4]; - uint16_t version; - uint16_t len; - uint16_t checksum; + __le16 version; + __le16 len; + __le16 checksum; uint8_t unused1[2]; uint8_t model[16]; - uint16_t man_id; - uint16_t id; + __le16 man_id; + __le16 id; uint8_t flags; uint8_t erase_cmd; uint8_t alt_erase_cmd; @@ -1588,15 +1588,15 @@ struct qla_fdt_layout { uint8_t wrt_sts_reg_cmd; uint8_t unprotect_sec_cmd; uint8_t read_man_id_cmd; - uint32_t block_size; - uint32_t alt_block_size; - uint32_t flash_size; - uint32_t wrt_enable_data; + __le32 block_size; + __le32 alt_block_size; + __le32 flash_size; + __le32 wrt_enable_data; uint8_t read_id_addr_len; uint8_t wrt_disable_bits; uint8_t read_dev_id_len; uint8_t chip_erase_cmd; - uint16_t read_timeout; + __le16 read_timeout; uint8_t protect_sec_cmd; uint8_t unused2[65]; }; @@ -1605,11 +1605,11 @@ struct qla_fdt_layout { struct qla_flt_location { uint8_t sig[4]; - uint16_t start_lo; - uint16_t start_hi; + __le16 start_lo; + __le16 start_hi; uint8_t version; uint8_t unused[5]; - uint16_t checksum; + __le16 checksum; }; #define FLT_REG_FW 0x01 @@ -1664,19 +1664,19 @@ struct qla_flt_location { #define FLT_REG_PEP_SEC_28XX 0xF1 struct qla_flt_region { - uint16_t code; + __le16 code; uint8_t attribute; uint8_t reserved; - uint32_t size; - uint32_t start; - uint32_t end; + __le32 size; + __le32 start; + __le32 end; }; struct qla_flt_header { - uint16_t version; - uint16_t length; - uint16_t checksum; - uint16_t unused; + __le16 version; + __le16 length; + __le16 checksum; + __le16 unused; struct qla_flt_region region[0]; }; @@ -1688,18 +1688,18 @@ struct qla_flt_header { struct qla_npiv_header { uint8_t sig[2]; - uint16_t version; - uint16_t entries; - uint16_t unused[4]; - uint16_t checksum; + __le16 version; + __le16 entries; + __le16 unused[4]; + __le16 checksum; }; struct qla_npiv_entry { - uint16_t flags; - uint16_t vf_id; + __le16 flags; + __le16 vf_id; uint8_t q_qos; uint8_t f_qos; - uint16_t unused1; + __le16 unused1; uint8_t port_name[WWN_SIZE]; uint8_t node_name[WWN_SIZE]; }; @@ -1729,7 +1729,7 @@ struct verify_chip_entry_84xx { uint32_t handle; - uint16_t options; + __le16 options; #define VCO_DONT_UPDATE_FW BIT_0 #define VCO_FORCE_UPDATE BIT_1 #define VCO_DONT_RESET_UPDATE BIT_2 @@ -1737,18 +1737,18 @@ struct verify_chip_entry_84xx { #define VCO_END_OF_DATA BIT_14 #define VCO_ENABLE_DSD BIT_15 - uint16_t reserved_1; + __le16 reserved_1; - uint16_t data_seg_cnt; - uint16_t reserved_2[3]; + __le16 data_seg_cnt; + __le16 reserved_2[3]; - uint32_t fw_ver; - uint32_t exchange_address; + __le32 fw_ver; + __le32 exchange_address; - uint32_t reserved_3[3]; - uint32_t fw_size; - uint32_t fw_seq_size; - uint32_t relative_offset; + __le32 reserved_3[3]; + __le32 fw_size; + __le32 fw_seq_size; + __le32 relative_offset; struct dsd64 dsd; }; @@ -1761,22 +1761,22 @@ struct verify_chip_rsp_84xx { uint32_t handle; - uint16_t comp_status; + __le16 comp_status; #define CS_VCS_CHIP_FAILURE 0x3 #define CS_VCS_BAD_EXCHANGE 0x8 #define CS_VCS_SEQ_COMPLETEi 0x40 - uint16_t failure_code; + __le16 failure_code; #define VFC_CHECKSUM_ERROR 0x1 #define VFC_INVALID_LEN 0x2 #define VFC_ALREADY_IN_PROGRESS 0x8 - uint16_t reserved_1[4]; + __le16 reserved_1[4]; - uint32_t fw_ver; - uint32_t exchange_address; + __le32 fw_ver; + __le32 exchange_address; - uint32_t reserved_2[6]; + __le32 reserved_2[6]; }; #define ACCESS_CHIP_IOCB_TYPE 0x2B @@ -1788,24 +1788,24 @@ struct access_chip_84xx { uint32_t handle; - uint16_t options; + __le16 options; #define ACO_DUMP_MEMORY 0x0 #define ACO_LOAD_MEMORY 0x1 #define ACO_CHANGE_CONFIG_PARAM 0x2 #define ACO_REQUEST_INFO 0x3 - uint16_t reserved1; + __le16 reserved1; - uint16_t dseg_count; - uint16_t reserved2[3]; + __le16 dseg_count; + __le16 reserved2[3]; - uint32_t parameter1; - uint32_t parameter2; - uint32_t parameter3; + __le32 parameter1; + __le32 parameter2; + __le32 parameter3; - uint32_t reserved3[3]; - uint32_t total_byte_cnt; - uint32_t reserved4; + __le32 reserved3[3]; + __le32 total_byte_cnt; + __le32 reserved4; struct dsd64 dsd; }; @@ -1818,11 +1818,11 @@ struct access_chip_rsp_84xx { uint32_t handle; - uint16_t comp_status; - uint16_t failure_code; - uint32_t residual_count; + __le16 comp_status; + __le16 failure_code; + __le32 residual_count; - uint32_t reserved[12]; + __le32 reserved[12]; }; /* 81XX Support **************************************************************/ @@ -1877,52 +1877,52 @@ struct access_chip_rsp_84xx { struct nvram_81xx { /* NVRAM header. */ uint8_t id[4]; - uint16_t nvram_version; - uint16_t reserved_0; + __le16 nvram_version; + __le16 reserved_0; /* Firmware Initialization Control Block. */ - uint16_t version; - uint16_t reserved_1; - uint16_t frame_payload_size; - uint16_t execution_throttle; - uint16_t exchange_count; - uint16_t reserved_2; + __le16 version; + __le16 reserved_1; + __le16 frame_payload_size; + __le16 execution_throttle; + __le16 exchange_count; + __le16 reserved_2; uint8_t port_name[WWN_SIZE]; uint8_t node_name[WWN_SIZE]; - uint16_t login_retry_count; - uint16_t reserved_3; - uint16_t interrupt_delay_timer; - uint16_t login_timeout; + __le16 login_retry_count; + __le16 reserved_3; + __le16 interrupt_delay_timer; + __le16 login_timeout; - uint32_t firmware_options_1; - uint32_t firmware_options_2; - uint32_t firmware_options_3; + __le32 firmware_options_1; + __le32 firmware_options_2; + __le32 firmware_options_3; - uint16_t reserved_4[4]; + __le16 reserved_4[4]; /* Offset 64. */ uint8_t enode_mac[6]; - uint16_t reserved_5[5]; + __le16 reserved_5[5]; /* Offset 80. */ - uint16_t reserved_6[24]; + __le16 reserved_6[24]; /* Offset 128. */ - uint16_t ex_version; + __le16 ex_version; uint8_t prio_fcf_matching_flags; uint8_t reserved_6_1[3]; - uint16_t pri_fcf_vlan_id; + __le16 pri_fcf_vlan_id; uint8_t pri_fcf_fabric_name[8]; - uint16_t reserved_6_2[7]; + __le16 reserved_6_2[7]; uint8_t spma_mac_addr[6]; - uint16_t reserved_6_3[14]; + __le16 reserved_6_3[14]; /* Offset 192. */ uint8_t min_supported_speed; uint8_t reserved_7_0; - uint16_t reserved_7[31]; + __le16 reserved_7[31]; /* * BIT 0 = Enable spinup delay @@ -1955,26 +1955,26 @@ struct nvram_81xx { * BIT 25 = Temp WWPN * BIT 26-31 = */ - uint32_t host_p; + __le32 host_p; uint8_t alternate_port_name[WWN_SIZE]; uint8_t alternate_node_name[WWN_SIZE]; uint8_t boot_port_name[WWN_SIZE]; - uint16_t boot_lun_number; - uint16_t reserved_8; + __le16 boot_lun_number; + __le16 reserved_8; uint8_t alt1_boot_port_name[WWN_SIZE]; - uint16_t alt1_boot_lun_number; - uint16_t reserved_9; + __le16 alt1_boot_lun_number; + __le16 reserved_9; uint8_t alt2_boot_port_name[WWN_SIZE]; - uint16_t alt2_boot_lun_number; - uint16_t reserved_10; + __le16 alt2_boot_lun_number; + __le16 reserved_10; uint8_t alt3_boot_port_name[WWN_SIZE]; - uint16_t alt3_boot_lun_number; - uint16_t reserved_11; + __le16 alt3_boot_lun_number; + __le16 reserved_11; /* * BIT 0 = Selective Login @@ -1986,35 +1986,35 @@ struct nvram_81xx { * BIT 6 = Reserved * BIT 7-31 = */ - uint32_t efi_parameters; + __le32 efi_parameters; uint8_t reset_delay; uint8_t reserved_12; - uint16_t reserved_13; + __le16 reserved_13; - uint16_t boot_id_number; - uint16_t reserved_14; + __le16 boot_id_number; + __le16 reserved_14; - uint16_t max_luns_per_target; - uint16_t reserved_15; + __le16 max_luns_per_target; + __le16 reserved_15; - uint16_t port_down_retry_count; - uint16_t link_down_timeout; + __le16 port_down_retry_count; + __le16 link_down_timeout; /* FCode parameters. */ - uint16_t fcode_parameter; + __le16 fcode_parameter; - uint16_t reserved_16[3]; + __le16 reserved_16[3]; /* Offset 352. */ uint8_t reserved_17[4]; - uint16_t reserved_18[5]; + __le16 reserved_18[5]; uint8_t reserved_19[2]; - uint16_t reserved_20[8]; + __le16 reserved_20[8]; /* Offset 384. */ uint8_t reserved_21[16]; - uint16_t reserved_22[3]; + __le16 reserved_22[3]; /* Offset 406 (0x196) Enhanced Features * BIT 0 = Extended BB credits for LR @@ -2027,20 +2027,20 @@ struct nvram_81xx { uint16_t reserved_24[4]; /* Offset 416. */ - uint16_t reserved_25[32]; + __le16 reserved_25[32]; /* Offset 480. */ uint8_t model_name[16]; /* Offset 496. */ - uint16_t feature_mask_l; - uint16_t feature_mask_h; - uint16_t reserved_26[2]; + __le16 feature_mask_l; + __le16 feature_mask_h; + __le16 reserved_26[2]; - uint16_t subsystem_vendor_id; - uint16_t subsystem_device_id; + __le16 subsystem_vendor_id; + __le16 subsystem_device_id; - uint32_t checksum; + __le32 checksum; }; /* @@ -2049,31 +2049,31 @@ struct nvram_81xx { */ #define ICB_VERSION 1 struct init_cb_81xx { - uint16_t version; - uint16_t reserved_1; + __le16 version; + __le16 reserved_1; - uint16_t frame_payload_size; - uint16_t execution_throttle; - uint16_t exchange_count; + __le16 frame_payload_size; + __le16 execution_throttle; + __le16 exchange_count; - uint16_t reserved_2; + __le16 reserved_2; uint8_t port_name[WWN_SIZE]; /* Big endian. */ uint8_t node_name[WWN_SIZE]; /* Big endian. */ - uint16_t response_q_inpointer; - uint16_t request_q_outpointer; + __le16 response_q_inpointer; + __le16 request_q_outpointer; - uint16_t login_retry_count; + __le16 login_retry_count; - uint16_t prio_request_q_outpointer; + __le16 prio_request_q_outpointer; - uint16_t response_q_length; - uint16_t request_q_length; + __le16 response_q_length; + __le16 request_q_length; - uint16_t reserved_3; + __le16 reserved_3; - uint16_t prio_request_q_length; + __le16 prio_request_q_length; __le64 request_q_address __packed; __le64 response_q_address __packed; @@ -2081,12 +2081,12 @@ struct init_cb_81xx { uint8_t reserved_4[8]; - uint16_t atio_q_inpointer; - uint16_t atio_q_length; + __le16 atio_q_inpointer; + __le16 atio_q_length; __le64 atio_q_address __packed; - uint16_t interrupt_delay_timer; /* 100us increments. */ - uint16_t login_timeout; + __le16 interrupt_delay_timer; /* 100us increments. */ + __le16 login_timeout; /* * BIT 0-3 = Reserved @@ -2099,7 +2099,7 @@ struct init_cb_81xx { * BIT 14 = Node Name Option * BIT 15-31 = Reserved */ - uint32_t firmware_options_1; + __le32 firmware_options_1; /* * BIT 0 = Operation Mode bit 0 @@ -2117,7 +2117,7 @@ struct init_cb_81xx { * BIT 14 = Enable Target PRLI Control * BIT 15-31 = Reserved */ - uint32_t firmware_options_2; + __le32 firmware_options_2; /* * BIT 0-3 = Reserved @@ -2138,7 +2138,7 @@ struct init_cb_81xx { * BIT 28 = SPMA selection bit 1 * BIT 30-31 = Reserved */ - uint32_t firmware_options_3; + __le32 firmware_options_3; uint8_t reserved_5[8]; diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h index cd3c15086c70..1fb6ccac07cc 100644 --- a/drivers/scsi/qla2xxx/qla_inline.h +++ b/drivers/scsi/qla2xxx/qla_inline.h @@ -40,7 +40,7 @@ qla24xx_calc_iocbs(scsi_qla_host_t *vha, uint16_t dsds) * register value. */ static __inline__ uint16_t -qla2x00_debounce_register(volatile uint16_t __iomem *addr) +qla2x00_debounce_register(volatile __le16 __iomem *addr) { volatile uint16_t first; volatile uint16_t second; diff --git a/drivers/scsi/qla2xxx/qla_mr.h b/drivers/scsi/qla2xxx/qla_mr.h index 3aa9bfd1c840..762250891a8f 100644 --- a/drivers/scsi/qla2xxx/qla_mr.h +++ b/drivers/scsi/qla2xxx/qla_mr.h @@ -96,7 +96,7 @@ struct tsk_mgmt_entry_fx00 { uint8_t sys_define; uint8_t entry_status; /* Entry Status. */ - __le32 handle; /* System handle. */ + uint32_t handle; /* System handle. */ uint32_t reserved_0; @@ -121,13 +121,13 @@ struct abort_iocb_entry_fx00 { uint8_t sys_define; /* System defined. */ uint8_t entry_status; /* Entry Status. */ - __le32 handle; /* System handle. */ + uint32_t handle; /* System handle. */ __le32 reserved_0; __le16 tgt_id_sts; /* Completion status. */ __le16 options; - __le32 abort_handle; /* System handle. */ + uint32_t abort_handle; /* System handle. */ __le32 reserved_2; __le16 req_que_no; @@ -166,7 +166,7 @@ struct fxdisc_entry_fx00 { uint8_t sys_define; /* System Defined. */ uint8_t entry_status; /* Entry Status. */ - __le32 handle; /* System handle. */ + uint32_t handle; /* System handle. */ __le32 reserved_0; /* System handle. */ __le16 func_num; diff --git a/drivers/scsi/qla2xxx/qla_nvme.h b/drivers/scsi/qla2xxx/qla_nvme.h index ef912902d4e5..fbb844226630 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.h +++ b/drivers/scsi/qla2xxx/qla_nvme.h @@ -48,26 +48,26 @@ struct cmd_nvme { uint8_t entry_status; /* Entry Status. */ uint32_t handle; /* System handle. */ - uint16_t nport_handle; /* N_PORT handle. */ - uint16_t timeout; /* Command timeout. */ + __le16 nport_handle; /* N_PORT handle. */ + __le16 timeout; /* Command timeout. */ - uint16_t dseg_count; /* Data segment count. */ - uint16_t nvme_rsp_dsd_len; /* NVMe RSP DSD length */ + __le16 dseg_count; /* Data segment count. */ + __le16 nvme_rsp_dsd_len; /* NVMe RSP DSD length */ uint64_t rsvd; - uint16_t control_flags; /* Control Flags */ + __le16 control_flags; /* Control Flags */ #define CF_NVME_FIRST_BURST_ENABLE BIT_11 #define CF_DIF_SEG_DESCR_ENABLE BIT_3 #define CF_DATA_SEG_DESCR_ENABLE BIT_2 #define CF_READ_DATA BIT_1 #define CF_WRITE_DATA BIT_0 - uint16_t nvme_cmnd_dseg_len; /* Data segment length. */ + __le16 nvme_cmnd_dseg_len; /* Data segment length. */ __le64 nvme_cmnd_dseg_address __packed;/* Data segment address. */ __le64 nvme_rsp_dseg_address __packed; /* Data segment address. */ - uint32_t byte_count; /* Total byte count. */ + __le32 byte_count; /* Total byte count. */ uint8_t port_id[3]; /* PortID of destination port. */ uint8_t vp_index; @@ -82,24 +82,24 @@ struct pt_ls4_request { uint8_t sys_define; uint8_t entry_status; uint32_t handle; - uint16_t status; - uint16_t nport_handle; - uint16_t tx_dseg_count; + __le16 status; + __le16 nport_handle; + __le16 tx_dseg_count; uint8_t vp_index; uint8_t rsvd; - uint16_t timeout; - uint16_t control_flags; + __le16 timeout; + __le16 control_flags; #define CF_LS4_SHIFT 13 #define CF_LS4_ORIGINATOR 0 #define CF_LS4_RESPONDER 1 #define CF_LS4_RESPONDER_TERM 2 - uint16_t rx_dseg_count; - uint16_t rsvd2; - uint32_t exchange_address; - uint32_t rsvd3; - uint32_t rx_byte_count; - uint32_t tx_byte_count; + __le16 rx_dseg_count; + __le16 rsvd2; + __le32 exchange_address; + __le32 rsvd3; + __le32 rx_byte_count; + __le32 tx_byte_count; struct dsd64 dsd[2]; }; @@ -107,32 +107,32 @@ struct pt_ls4_request { struct pt_ls4_rx_unsol { uint8_t entry_type; uint8_t entry_count; - uint16_t rsvd0; - uint16_t rsvd1; + __le16 rsvd0; + __le16 rsvd1; uint8_t vp_index; uint8_t rsvd2; - uint16_t rsvd3; - uint16_t nport_handle; - uint16_t frame_size; - uint16_t rsvd4; - uint32_t exchange_address; + __le16 rsvd3; + __le16 nport_handle; + __le16 frame_size; + __le16 rsvd4; + __le32 exchange_address; uint8_t d_id[3]; uint8_t r_ctl; be_id_t s_id; uint8_t cs_ctl; uint8_t f_ctl[3]; uint8_t type; - uint16_t seq_cnt; + __le16 seq_cnt; uint8_t df_ctl; uint8_t seq_id; - uint16_t rx_id; - uint16_t ox_id; - uint32_t param; - uint32_t desc0; + __le16 rx_id; + __le16 ox_id; + __le32 param; + __le32 desc0; #define PT_LS4_PAYLOAD_OFFSET 0x2c #define PT_LS4_FIRST_PACKET_LEN 20 - uint32_t desc_len; - uint32_t payload[3]; + __le32 desc_len; + __le32 payload[3]; }; /* diff --git a/drivers/scsi/qla2xxx/qla_nx.h b/drivers/scsi/qla2xxx/qla_nx.h index 230abee10598..93344a05910a 100644 --- a/drivers/scsi/qla2xxx/qla_nx.h +++ b/drivers/scsi/qla2xxx/qla_nx.h @@ -800,16 +800,16 @@ struct qla82xx_legacy_intr_set { #define QLA82XX_URI_FIRMWARE_IDX_OFF 29 struct qla82xx_uri_table_desc{ - uint32_t findex; - uint32_t num_entries; - uint32_t entry_size; - uint32_t reserved[5]; + __le32 findex; + __le32 num_entries; + __le32 entry_size; + __le32 reserved[5]; }; struct qla82xx_uri_data_desc{ - uint32_t findex; - uint32_t size; - uint32_t reserved[5]; + __le32 findex; + __le32 size; + __le32 reserved[5]; }; /* UNIFIED ROMIMAGE END */ @@ -829,22 +829,22 @@ struct qla82xx_uri_data_desc{ * ISP 8021 I/O Register Set structure definitions. */ struct device_reg_82xx { - uint32_t req_q_out[64]; /* Request Queue out-Pointer (64 * 4) */ - uint32_t rsp_q_in[64]; /* Response Queue In-Pointer. */ - uint32_t rsp_q_out[64]; /* Response Queue Out-Pointer. */ + __le32 req_q_out[64]; /* Request Queue out-Pointer (64 * 4) */ + __le32 rsp_q_in[64]; /* Response Queue In-Pointer. */ + __le32 rsp_q_out[64]; /* Response Queue Out-Pointer. */ - uint16_t mailbox_in[32]; /* Mail box In registers */ - uint16_t unused_1[32]; - uint32_t hint; /* Host interrupt register */ + __le16 mailbox_in[32]; /* Mailbox In registers */ + __le16 unused_1[32]; + __le32 hint; /* Host interrupt register */ #define HINT_MBX_INT_PENDING BIT_0 - uint16_t unused_2[62]; - uint16_t mailbox_out[32]; /* Mail box Out registers */ - uint32_t unused_3[48]; + __le16 unused_2[62]; + __le16 mailbox_out[32]; /* Mailbox Out registers */ + __le32 unused_3[48]; - uint32_t host_status; /* host status */ + __le32 host_status; /* host status */ #define HSRX_RISC_INT BIT_15 /* RISC to Host interrupt. */ #define HSRX_RISC_PAUSED BIT_8 /* RISC Paused. */ - uint32_t host_int; /* Interrupt status. */ + __le32 host_int; /* Interrupt status. */ #define ISRX_NX_RISC_INT BIT_0 /* RISC interrupt. */ }; diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index 3cf8590feeac..010f12523b2a 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -135,37 +135,37 @@ struct nack_to_isp { uint8_t entry_status; /* Entry Status. */ union { struct { - uint32_t sys_define_2; /* System defined. */ + __le32 sys_define_2; /* System defined. */ target_id_t target; uint8_t target_id; uint8_t reserved_1; - uint16_t flags; - uint16_t resp_code; - uint16_t status; - uint16_t task_flags; - uint16_t seq_id; - uint16_t srr_rx_id; - uint32_t srr_rel_offs; - uint16_t srr_ui; - uint16_t srr_flags; - uint16_t srr_reject_code; + __le16 flags; + __le16 resp_code; + __le16 status; + __le16 task_flags; + __le16 seq_id; + __le16 srr_rx_id; + __le32 srr_rel_offs; + __le16 srr_ui; + __le16 srr_flags; + __le16 srr_reject_code; uint8_t srr_reject_vendor_uniq; uint8_t srr_reject_code_expl; uint8_t reserved_2[24]; } isp2x; struct { uint32_t handle; - uint16_t nport_handle; + __le16 nport_handle; uint16_t reserved_1; - uint16_t flags; - uint16_t srr_rx_id; - uint16_t status; + __le16 flags; + __le16 srr_rx_id; + __le16 status; uint8_t status_subcode; uint8_t fw_handle; - uint32_t exchange_address; - uint32_t srr_rel_offs; - uint16_t srr_ui; - uint16_t srr_flags; + __le32 exchange_address; + __le32 srr_rel_offs; + __le16 srr_ui; + __le16 srr_flags; uint8_t reserved_4[19]; uint8_t vp_index; uint8_t srr_reject_vendor_uniq; @@ -175,7 +175,7 @@ struct nack_to_isp { } isp24; } u; uint8_t reserved[2]; - uint16_t ox_id; + __le16 ox_id; } __packed; #define NOTIFY_ACK_FLAGS_TERMINATE BIT_3 #define NOTIFY_ACK_SRR_FLAGS_ACCEPT 0 @@ -206,16 +206,16 @@ struct ctio_to_2xxx { uint8_t entry_status; /* Entry Status. */ uint32_t handle; /* System defined handle */ target_id_t target; - uint16_t rx_id; - uint16_t flags; - uint16_t status; - uint16_t timeout; /* 0 = 30 seconds, 0xFFFF = disable */ - uint16_t dseg_count; /* Data segment count. */ - uint32_t relative_offset; - uint32_t residual; - uint16_t reserved_1[3]; - uint16_t scsi_status; - uint32_t transfer_length; + __le16 rx_id; + __le16 flags; + __le16 status; + __le16 timeout; /* 0 = 30 seconds, 0xFFFF = disable */ + __le16 dseg_count; /* Data segment count. */ + __le32 relative_offset; + __le32 residual; + __le16 reserved_1[3]; + __le16 scsi_status; + __le32 transfer_length; struct dsd32 dsd[3]; } __packed; #define ATIO_PATH_INVALID 0x07 @@ -257,7 +257,7 @@ struct fcp_hdr { uint16_t seq_cnt; __be16 ox_id; uint16_t rx_id; - uint32_t parameter; + __le32 parameter; } __packed; struct fcp_hdr_le { @@ -267,12 +267,12 @@ struct fcp_hdr_le { uint8_t cs_ctl; uint8_t f_ctl[3]; uint8_t type; - uint16_t seq_cnt; + __le16 seq_cnt; uint8_t df_ctl; uint8_t seq_id; - uint16_t rx_id; - uint16_t ox_id; - uint32_t parameter; + __le16 rx_id; + __le16 ox_id; + __le32 parameter; } __packed; #define F_CTL_EXCH_CONTEXT_RESP BIT_23 @@ -306,7 +306,7 @@ struct atio7_fcp_cmnd { * BUILD_BUG_ON in qlt_init(). */ uint8_t add_cdb[4]; - /* uint32_t data_length; */ + /* __le32 data_length; */ } __packed; /* @@ -316,31 +316,31 @@ struct atio7_fcp_cmnd { struct atio_from_isp { union { struct { - uint16_t entry_hdr; + __le16 entry_hdr; uint8_t sys_define; /* System defined. */ uint8_t entry_status; /* Entry Status. */ - uint32_t sys_define_2; /* System defined. */ + __le32 sys_define_2; /* System defined. */ target_id_t target; - uint16_t rx_id; - uint16_t flags; - uint16_t status; + __le16 rx_id; + __le16 flags; + __le16 status; uint8_t command_ref; uint8_t task_codes; uint8_t task_flags; uint8_t execution_codes; uint8_t cdb[MAX_CMDSZ]; - uint32_t data_length; - uint16_t lun; + __le32 data_length; + __le16 lun; uint8_t initiator_port_name[WWN_SIZE]; /* on qla23xx */ - uint16_t reserved_32[6]; - uint16_t ox_id; + __le16 reserved_32[6]; + __le16 ox_id; } isp2x; struct { - uint16_t entry_hdr; + __le16 entry_hdr; uint8_t fcp_cmnd_len_low; uint8_t fcp_cmnd_len_high:4; uint8_t attr:4; - uint32_t exchange_addr; + __le32 exchange_addr; #define ATIO_EXCHANGE_ADDRESS_UNKNOWN 0xFFFFFFFF struct fcp_hdr fcp_hdr; struct atio7_fcp_cmnd fcp_cmnd; @@ -352,7 +352,7 @@ struct atio_from_isp { #define FCP_CMD_LENGTH_MASK 0x0fff #define FCP_CMD_LENGTH_MIN 0x38 uint8_t data[56]; - uint32_t signature; + __le32 signature; #define ATIO_PROCESSED 0xDEADDEAD /* Signature */ } raw; } u; @@ -395,36 +395,36 @@ struct ctio7_to_24xx { uint8_t sys_define; /* System defined. */ uint8_t entry_status; /* Entry Status. */ uint32_t handle; /* System defined handle */ - uint16_t nport_handle; + __le16 nport_handle; #define CTIO7_NHANDLE_UNRECOGNIZED 0xFFFF - uint16_t timeout; - uint16_t dseg_count; /* Data segment count. */ + __le16 timeout; + __le16 dseg_count; /* Data segment count. */ uint8_t vp_index; uint8_t add_flags; le_id_t initiator_id; uint8_t reserved; - uint32_t exchange_addr; + __le32 exchange_addr; union { struct { - uint16_t reserved1; + __le16 reserved1; __le16 flags; - uint32_t residual; + __le32 residual; __le16 ox_id; - uint16_t scsi_status; - uint32_t relative_offset; - uint32_t reserved2; - uint32_t transfer_length; - uint32_t reserved3; + __le16 scsi_status; + __le32 relative_offset; + __le32 reserved2; + __le32 transfer_length; + __le32 reserved3; struct dsd64 dsd; } status0; struct { - uint16_t sense_length; + __le16 sense_length; __le16 flags; - uint32_t residual; + __le32 residual; __le16 ox_id; - uint16_t scsi_status; - uint16_t response_len; - uint16_t reserved; + __le16 scsi_status; + __le16 response_len; + __le16 reserved; uint8_t sense_data[24]; } status1; } u; @@ -440,18 +440,18 @@ struct ctio7_from_24xx { uint8_t sys_define; /* System defined. */ uint8_t entry_status; /* Entry Status. */ uint32_t handle; /* System defined handle */ - uint16_t status; - uint16_t timeout; - uint16_t dseg_count; /* Data segment count. */ + __le16 status; + __le16 timeout; + __le16 dseg_count; /* Data segment count. */ uint8_t vp_index; uint8_t reserved1[5]; - uint32_t exchange_address; - uint16_t reserved2; - uint16_t flags; - uint32_t residual; - uint16_t ox_id; - uint16_t reserved3; - uint32_t relative_offset; + __le32 exchange_address; + __le16 reserved2; + __le16 flags; + __le32 residual; + __le16 ox_id; + __le16 reserved3; + __le32 relative_offset; uint8_t reserved4[24]; } __packed; @@ -489,29 +489,29 @@ struct ctio_crc2_to_fw { uint8_t entry_status; /* Entry Status. */ uint32_t handle; /* System handle. */ - uint16_t nport_handle; /* N_PORT handle. */ + __le16 nport_handle; /* N_PORT handle. */ __le16 timeout; /* Command timeout. */ - uint16_t dseg_count; /* Data segment count. */ + __le16 dseg_count; /* Data segment count. */ uint8_t vp_index; uint8_t add_flags; /* additional flags */ #define CTIO_CRC2_AF_DIF_DSD_ENA BIT_3 le_id_t initiator_id; /* initiator ID */ uint8_t reserved1; - uint32_t exchange_addr; /* rcv exchange address */ - uint16_t reserved2; + __le32 exchange_addr; /* rcv exchange address */ + __le16 reserved2; __le16 flags; /* refer to CTIO7 flags values */ - uint32_t residual; + __le32 residual; __le16 ox_id; - uint16_t scsi_status; + __le16 scsi_status; __le32 relative_offset; - uint32_t reserved5; + __le32 reserved5; __le32 transfer_length; /* total fc transfer length */ - uint32_t reserved6; + __le32 reserved6; __le64 crc_context_address __packed; /* Data segment address. */ - uint16_t crc_context_len; /* Data segment length. */ - uint16_t reserved_1; /* MUST be set to 0. */ + __le16 crc_context_len; /* Data segment length. */ + __le16 reserved_1; /* MUST be set to 0. */ }; /* CTIO Type CRC_x Status IOCB */ @@ -522,20 +522,20 @@ struct ctio_crc_from_fw { uint8_t entry_status; /* Entry Status. */ uint32_t handle; /* System handle. */ - uint16_t status; - uint16_t timeout; /* Command timeout. */ - uint16_t dseg_count; /* Data segment count. */ - uint32_t reserved1; - uint16_t state_flags; + __le16 status; + __le16 timeout; /* Command timeout. */ + __le16 dseg_count; /* Data segment count. */ + __le32 reserved1; + __le16 state_flags; #define CTIO_CRC_SF_DIF_CHOPPED BIT_4 - uint32_t exchange_address; /* rcv exchange address */ - uint16_t reserved2; - uint16_t flags; - uint32_t resid_xfer_length; - uint16_t ox_id; + __le32 exchange_address; /* rcv exchange address */ + __le16 reserved2; + __le16 flags; + __le32 resid_xfer_length; + __le16 ox_id; uint8_t reserved3[12]; - uint16_t runt_guard; /* reported runt blk guard */ + __le16 runt_guard; /* reported runt blk guard */ uint8_t actual_dif[8]; uint8_t expected_dif[8]; } __packed; @@ -558,29 +558,29 @@ struct abts_recv_from_24xx { uint8_t sys_define; /* System defined. */ uint8_t entry_status; /* Entry Status. */ uint8_t reserved_1[6]; - uint16_t nport_handle; + __le16 nport_handle; uint8_t reserved_2[2]; uint8_t vp_index; uint8_t reserved_3:4; uint8_t sof_type:4; - uint32_t exchange_address; + __le32 exchange_address; struct fcp_hdr_le fcp_hdr_le; uint8_t reserved_4[16]; - uint32_t exchange_addr_to_abort; + __le32 exchange_addr_to_abort; } __packed; #define ABTS_PARAM_ABORT_SEQ BIT_0 struct ba_acc_le { - uint16_t reserved; + __le16 reserved; uint8_t seq_id_last; uint8_t seq_id_valid; #define SEQ_ID_VALID 0x80 #define SEQ_ID_INVALID 0x00 - uint16_t rx_id; - uint16_t ox_id; - uint16_t high_seq_cnt; - uint16_t low_seq_cnt; + __le16 rx_id; + __le16 ox_id; + __le16 high_seq_cnt; + __le16 low_seq_cnt; } __packed; struct ba_rjt_le { @@ -604,21 +604,21 @@ struct abts_resp_to_24xx { uint8_t sys_define; /* System defined. */ uint8_t entry_status; /* Entry Status. */ uint32_t handle; - uint16_t reserved_1; - uint16_t nport_handle; - uint16_t control_flags; + __le16 reserved_1; + __le16 nport_handle; + __le16 control_flags; #define ABTS_CONTR_FLG_TERM_EXCHG BIT_0 uint8_t vp_index; uint8_t reserved_3:4; uint8_t sof_type:4; - uint32_t exchange_address; + __le32 exchange_address; struct fcp_hdr_le fcp_hdr_le; union { struct ba_acc_le ba_acct; struct ba_rjt_le ba_rjt; } __packed payload; - uint32_t reserved_4; - uint32_t exchange_addr_to_abort; + __le32 reserved_4; + __le32 exchange_addr_to_abort; } __packed; /* @@ -634,21 +634,21 @@ struct abts_resp_from_24xx_fw { uint8_t sys_define; /* System defined. */ uint8_t entry_status; /* Entry Status. */ uint32_t handle; - uint16_t compl_status; + __le16 compl_status; #define ABTS_RESP_COMPL_SUCCESS 0 #define ABTS_RESP_COMPL_SUBCODE_ERROR 0x31 - uint16_t nport_handle; - uint16_t reserved_1; + __le16 nport_handle; + __le16 reserved_1; uint8_t reserved_2; uint8_t reserved_3:4; uint8_t sof_type:4; - uint32_t exchange_address; + __le32 exchange_address; struct fcp_hdr_le fcp_hdr_le; uint8_t reserved_4[8]; - uint32_t error_subcode1; + __le32 error_subcode1; #define ABTS_RESP_SUBCODE_ERR_ABORTED_EXCH_NOT_TERM 0x1E - uint32_t error_subcode2; - uint32_t exchange_addr_to_abort; + __le32 error_subcode2; + __le32 exchange_addr_to_abort; } __packed; /********************************************************************\ diff --git a/drivers/scsi/qla2xxx/qla_tmpl.h b/drivers/scsi/qla2xxx/qla_tmpl.h index bba8dc90acfb..89280b3477aa 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.h +++ b/drivers/scsi/qla2xxx/qla_tmpl.h @@ -27,7 +27,7 @@ struct __packed qla27xx_fwdt_template { uint32_t saved_state[16]; uint32_t reserved_3[8]; - uint32_t firmware_version[5]; + __le32 firmware_version[5]; }; #define TEMPLATE_TYPE_FWDUMP 99 From 7ffa5b939751b6638e4a99518775c8503fbb46be Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2020 14:17:12 -0700 Subject: [PATCH 422/562] scsi: qla2xxx: Fix endianness annotations in source files Fix all endianness complaints reported by sparse (C=2) without affecting the behavior of the code on little endian CPUs. Link: https://lore.kernel.org/r/20200518211712.11395-16-bvanassche@acm.org Cc: Nilesh Javali Cc: Quinn Tran Cc: Martin Wilck Cc: Daniel Wagner Cc: Roman Bolshakov Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Reviewed-by: Daniel Wagner Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_attr.c | 3 +- drivers/scsi/qla2xxx/qla_bsg.c | 4 +- drivers/scsi/qla2xxx/qla_dbg.c | 87 +++++++++++++++-------------- drivers/scsi/qla2xxx/qla_init.c | 59 ++++++++++---------- drivers/scsi/qla2xxx/qla_iocb.c | 71 ++++++++++++----------- drivers/scsi/qla2xxx/qla_isr.c | 93 +++++++++++++++---------------- drivers/scsi/qla2xxx/qla_mbx.c | 37 ++++++------ drivers/scsi/qla2xxx/qla_mr.c | 9 ++- drivers/scsi/qla2xxx/qla_nvme.c | 8 +-- drivers/scsi/qla2xxx/qla_nx.c | 89 ++++++++++++++--------------- drivers/scsi/qla2xxx/qla_os.c | 27 ++++----- drivers/scsi/qla2xxx/qla_sup.c | 69 ++++++++++++----------- drivers/scsi/qla2xxx/qla_target.c | 86 ++++++++++++++-------------- drivers/scsi/qla2xxx/qla_tmpl.c | 6 +- 14 files changed, 329 insertions(+), 319 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 4ee1a75e54ad..9bf1e7daeb2a 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -227,10 +227,9 @@ qla2x00_sysfs_write_nvram(struct file *filp, struct kobject *kobj, /* Checksum NVRAM. */ if (IS_FWI2_CAPABLE(ha)) { - uint32_t *iter; + __le32 *iter = (__force __le32 *)buf; uint32_t chksum; - iter = (uint32_t *)buf; chksum = 0; for (cnt = 0; cnt < ((count >> 2) - 1); cnt++, iter++) chksum += le32_to_cpu(*iter); diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 3af7ca68ec44..88c0338a2ec7 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -490,7 +490,7 @@ qla2x00_process_ct(struct bsg_job *bsg_job) >> 24; switch (loop_id) { case 0xFC: - loop_id = cpu_to_le16(NPH_SNS); + loop_id = NPH_SNS; break; case 0xFA: loop_id = vha->mgmt_svr_loop_id; @@ -2042,7 +2042,7 @@ qlafx00_mgmt_cmd(struct bsg_job *bsg_job) /* Initialize all required fields of fcport */ fcport->vha = vha; - fcport->loop_id = piocb_rqst->dataword; + fcport->loop_id = le32_to_cpu(piocb_rqst->dataword); sp->type = SRB_FXIOCB_BCMD; sp->name = "bsg_fx_mgmt"; diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index d020c23a5106..2ed0b849fbfe 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -189,8 +189,8 @@ qla27xx_dump_mpi_ram(struct qla_hw_data *ha, uint32_t addr, uint32_t *ram, } int -qla24xx_dump_ram(struct qla_hw_data *ha, uint32_t addr, uint32_t *ram, - uint32_t ram_dwords, void **nxt) +qla24xx_dump_ram(struct qla_hw_data *ha, uint32_t addr, __be32 *ram, + uint32_t ram_dwords, void **nxt) { int rval = QLA_FUNCTION_FAILED; struct device_reg_24xx __iomem *reg = &ha->iobase->isp24; @@ -254,9 +254,9 @@ qla24xx_dump_ram(struct qla_hw_data *ha, uint32_t addr, uint32_t *ram, return rval; } for (j = 0; j < dwords; j++) { - ram[i + j] = - (IS_QLA27XX(ha) || IS_QLA28XX(ha)) ? - chunk[j] : swab32(chunk[j]); + ram[i + j] = (__force __be32) + ((IS_QLA27XX(ha) || IS_QLA28XX(ha)) ? + chunk[j] : swab32(chunk[j])); } } @@ -265,8 +265,8 @@ qla24xx_dump_ram(struct qla_hw_data *ha, uint32_t addr, uint32_t *ram, } static int -qla24xx_dump_memory(struct qla_hw_data *ha, uint32_t *code_ram, - uint32_t cram_size, void **nxt) +qla24xx_dump_memory(struct qla_hw_data *ha, __be32 *code_ram, + uint32_t cram_size, void **nxt) { int rval; @@ -286,11 +286,11 @@ qla24xx_dump_memory(struct qla_hw_data *ha, uint32_t *code_ram, return rval; } -static uint32_t * +static __be32 * qla24xx_read_window(struct device_reg_24xx __iomem *reg, uint32_t iobase, - uint32_t count, uint32_t *buf) + uint32_t count, __be32 *buf) { - uint32_t __iomem *dmp_reg; + __le32 __iomem *dmp_reg; wrt_reg_dword(®->iobase_addr, iobase); dmp_reg = ®->iobase_window; @@ -368,7 +368,7 @@ qla24xx_soft_reset(struct qla_hw_data *ha) } static int -qla2xxx_dump_ram(struct qla_hw_data *ha, uint32_t addr, uint16_t *ram, +qla2xxx_dump_ram(struct qla_hw_data *ha, uint32_t addr, __be16 *ram, uint32_t ram_words, void **nxt) { int rval; @@ -376,7 +376,7 @@ qla2xxx_dump_ram(struct qla_hw_data *ha, uint32_t addr, uint16_t *ram, uint16_t mb0; struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; dma_addr_t dump_dma = ha->gid_list_dma; - uint16_t *dump = (uint16_t *)ha->gid_list; + __le16 *dump = (__force __le16 *)ha->gid_list; rval = QLA_SUCCESS; mb0 = 0; @@ -441,7 +441,8 @@ qla2xxx_dump_ram(struct qla_hw_data *ha, uint32_t addr, uint16_t *ram, if (test_and_clear_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags)) { rval = mb0 & MBS_MASK; for (idx = 0; idx < words; idx++) - ram[cnt + idx] = swab16(dump[idx]); + ram[cnt + idx] = + cpu_to_be16(le16_to_cpu(dump[idx])); } else { rval = QLA_FUNCTION_FAILED; } @@ -453,9 +454,9 @@ qla2xxx_dump_ram(struct qla_hw_data *ha, uint32_t addr, uint16_t *ram, static inline void qla2xxx_read_window(struct device_reg_2xxx __iomem *reg, uint32_t count, - uint16_t *buf) + __be16 *buf) { - uint16_t __iomem *dmp_reg = ®->u.isp2300.fb_cmd; + __le16 __iomem *dmp_reg = ®->u.isp2300.fb_cmd; for ( ; count--; dmp_reg++) *buf++ = htons(rd_reg_word(dmp_reg)); @@ -472,10 +473,10 @@ qla24xx_copy_eft(struct qla_hw_data *ha, void *ptr) } static inline void * -qla25xx_copy_fce(struct qla_hw_data *ha, void *ptr, uint32_t **last_chain) +qla25xx_copy_fce(struct qla_hw_data *ha, void *ptr, __be32 **last_chain) { uint32_t cnt; - uint32_t *iter_reg; + __be32 *iter_reg; struct qla2xxx_fce_chain *fcec = ptr; if (!ha->fce) @@ -499,7 +500,7 @@ qla25xx_copy_fce(struct qla_hw_data *ha, void *ptr, uint32_t **last_chain) } static inline void * -qla25xx_copy_exlogin(struct qla_hw_data *ha, void *ptr, uint32_t **last_chain) +qla25xx_copy_exlogin(struct qla_hw_data *ha, void *ptr, __be32 **last_chain) { struct qla2xxx_offld_chain *c = ptr; @@ -517,11 +518,11 @@ qla25xx_copy_exlogin(struct qla_hw_data *ha, void *ptr, uint32_t **last_chain) ptr += sizeof(struct qla2xxx_offld_chain); memcpy(ptr, ha->exlogin_buf, ha->exlogin_size); - return (char *)ptr + cpu_to_be32(c->size); + return (char *)ptr + be32_to_cpu(c->size); } static inline void * -qla81xx_copy_exchoffld(struct qla_hw_data *ha, void *ptr, uint32_t **last_chain) +qla81xx_copy_exchoffld(struct qla_hw_data *ha, void *ptr, __be32 **last_chain) { struct qla2xxx_offld_chain *c = ptr; @@ -539,12 +540,12 @@ qla81xx_copy_exchoffld(struct qla_hw_data *ha, void *ptr, uint32_t **last_chain) ptr += sizeof(struct qla2xxx_offld_chain); memcpy(ptr, ha->exchoffld_buf, ha->exchoffld_size); - return (char *)ptr + cpu_to_be32(c->size); + return (char *)ptr + be32_to_cpu(c->size); } static inline void * qla2xxx_copy_atioqueues(struct qla_hw_data *ha, void *ptr, - uint32_t **last_chain) + __be32 **last_chain) { struct qla2xxx_mqueue_chain *q; struct qla2xxx_mqueue_header *qh; @@ -591,7 +592,7 @@ qla2xxx_copy_atioqueues(struct qla_hw_data *ha, void *ptr, } static inline void * -qla25xx_copy_mqueues(struct qla_hw_data *ha, void *ptr, uint32_t **last_chain) +qla25xx_copy_mqueues(struct qla_hw_data *ha, void *ptr, __be32 **last_chain) { struct qla2xxx_mqueue_chain *q; struct qla2xxx_mqueue_header *qh; @@ -662,7 +663,7 @@ qla25xx_copy_mqueues(struct qla_hw_data *ha, void *ptr, uint32_t **last_chain) } static inline void * -qla25xx_copy_mq(struct qla_hw_data *ha, void *ptr, uint32_t **last_chain) +qla25xx_copy_mq(struct qla_hw_data *ha, void *ptr, __be32 **last_chain) { uint32_t cnt, que_idx; uint8_t que_cnt; @@ -736,7 +737,7 @@ qla2300_fw_dump(scsi_qla_host_t *vha) uint32_t cnt; struct qla_hw_data *ha = vha->hw; struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; - uint16_t __iomem *dmp_reg; + __le16 __iomem *dmp_reg; struct qla2300_fw_dump *fw; void *nxt; struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); @@ -893,7 +894,7 @@ qla2100_fw_dump(scsi_qla_host_t *vha) uint16_t mb0 = 0, mb2 = 0; struct qla_hw_data *ha = vha->hw; struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; - uint16_t __iomem *dmp_reg; + __le16 __iomem *dmp_reg; struct qla2100_fw_dump *fw; struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); @@ -1074,13 +1075,13 @@ qla24xx_fw_dump(scsi_qla_host_t *vha) uint32_t cnt; struct qla_hw_data *ha = vha->hw; struct device_reg_24xx __iomem *reg = &ha->iobase->isp24; - uint32_t __iomem *dmp_reg; - uint32_t *iter_reg; - uint16_t __iomem *mbx_reg; + __le32 __iomem *dmp_reg; + __be32 *iter_reg; + __le16 __iomem *mbx_reg; struct qla24xx_fw_dump *fw; void *nxt; void *nxt_chain; - uint32_t *last_chain = NULL; + __be32 *last_chain = NULL; struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); lockdep_assert_held(&ha->hardware_lock); @@ -1320,12 +1321,12 @@ qla25xx_fw_dump(scsi_qla_host_t *vha) uint32_t cnt; struct qla_hw_data *ha = vha->hw; struct device_reg_24xx __iomem *reg = &ha->iobase->isp24; - uint32_t __iomem *dmp_reg; - uint32_t *iter_reg; - uint16_t __iomem *mbx_reg; + __le32 __iomem *dmp_reg; + __be32 *iter_reg; + __le16 __iomem *mbx_reg; struct qla25xx_fw_dump *fw; void *nxt, *nxt_chain; - uint32_t *last_chain = NULL; + __be32 *last_chain = NULL; struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); lockdep_assert_held(&ha->hardware_lock); @@ -1633,12 +1634,12 @@ qla81xx_fw_dump(scsi_qla_host_t *vha) uint32_t cnt; struct qla_hw_data *ha = vha->hw; struct device_reg_24xx __iomem *reg = &ha->iobase->isp24; - uint32_t __iomem *dmp_reg; - uint32_t *iter_reg; - uint16_t __iomem *mbx_reg; + __le32 __iomem *dmp_reg; + __be32 *iter_reg; + __le16 __iomem *mbx_reg; struct qla81xx_fw_dump *fw; void *nxt, *nxt_chain; - uint32_t *last_chain = NULL; + __be32 *last_chain = NULL; struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); lockdep_assert_held(&ha->hardware_lock); @@ -1948,12 +1949,12 @@ qla83xx_fw_dump(scsi_qla_host_t *vha) uint32_t cnt; struct qla_hw_data *ha = vha->hw; struct device_reg_24xx __iomem *reg = &ha->iobase->isp24; - uint32_t __iomem *dmp_reg; - uint32_t *iter_reg; - uint16_t __iomem *mbx_reg; + __le32 __iomem *dmp_reg; + __be32 *iter_reg; + __le16 __iomem *mbx_reg; struct qla83xx_fw_dump *fw; void *nxt, *nxt_chain; - uint32_t *last_chain = NULL; + __be32 *last_chain = NULL; struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); lockdep_assert_held(&ha->hardware_lock); @@ -2659,7 +2660,7 @@ ql_dump_regs(uint level, scsi_qla_host_t *vha, uint id) struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; struct device_reg_24xx __iomem *reg24 = &ha->iobase->isp24; struct device_reg_82xx __iomem *reg82 = &ha->iobase->isp82; - uint16_t __iomem *mbx_reg; + __le16 __iomem *mbx_reg; if (!ql_mask_match(level)) return; diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 135440f4a922..6196d874d599 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -120,7 +120,7 @@ static void qla24xx_abort_iocb_timeout(void *data) if (sp->cmd_sp) sp->cmd_sp->done(sp->cmd_sp, QLA_OS_TIMER_EXPIRED); - abt->u.abt.comp_status = CS_TIMEOUT; + abt->u.abt.comp_status = cpu_to_le16(CS_TIMEOUT); sp->done(sp, QLA_OS_TIMER_EXPIRED); } @@ -1791,7 +1791,7 @@ qla2x00_tmf_iocb_timeout(void *data) } } spin_unlock_irqrestore(sp->qpair->qp_lock_ptr, flags); - tmf->u.tmf.comp_status = CS_TIMEOUT; + tmf->u.tmf.comp_status = cpu_to_le16(CS_TIMEOUT); tmf->u.tmf.data = QLA_FUNCTION_FAILED; complete(&tmf->u.tmf.comp); } @@ -4093,7 +4093,7 @@ qla24xx_config_rings(struct scsi_qla_host *vha) ql_dbg(ql_dbg_init, vha, 0x00fd, "Speed set by user : %s Gbps \n", qla2x00_get_link_speed_str(ha, ha->set_data_rate)); - icb->firmware_options_3 = (ha->set_data_rate << 13); + icb->firmware_options_3 = cpu_to_le32(ha->set_data_rate << 13); } /* PCI posting */ @@ -4184,12 +4184,14 @@ qla2x00_init_rings(scsi_qla_host_t *vha) mid_init_cb->init_cb.execution_throttle = cpu_to_le16(ha->cur_fw_xcb_count); ha->flags.dport_enabled = - (mid_init_cb->init_cb.firmware_options_1 & BIT_7) != 0; + (le32_to_cpu(mid_init_cb->init_cb.firmware_options_1) & + BIT_7) != 0; ql_dbg(ql_dbg_init, vha, 0x0191, "DPORT Support: %s.\n", (ha->flags.dport_enabled) ? "enabled" : "disabled"); /* FA-WWPN Status */ ha->flags.fawwpn_enabled = - (mid_init_cb->init_cb.firmware_options_1 & BIT_6) != 0; + (le32_to_cpu(mid_init_cb->init_cb.firmware_options_1) & + BIT_6) != 0; ql_dbg(ql_dbg_init, vha, 0x00bc, "FA-WWPN Support: %s.\n", (ha->flags.fawwpn_enabled) ? "enabled" : "disabled"); } @@ -7154,7 +7156,7 @@ qla24xx_nvram_config(scsi_qla_host_t *vha) int rval; struct init_cb_24xx *icb; struct nvram_24xx *nv; - uint32_t *dptr; + __le32 *dptr; uint8_t *dptr1, *dptr2; uint32_t chksum; uint16_t cnt; @@ -7182,7 +7184,7 @@ qla24xx_nvram_config(scsi_qla_host_t *vha) ha->nvram_base - FA_NVRAM_FUNC0_ADDR, FA_NVRAM_VPD_SIZE * 4); /* Get NVRAM data into cache and calculate checksum. */ - dptr = (uint32_t *)nv; + dptr = (__force __le32 *)nv; ha->isp_ops->read_nvram(vha, dptr, ha->nvram_base, ha->nvram_size); for (cnt = 0, chksum = 0; cnt < ha->nvram_size >> 2; cnt++, dptr++) chksum += le32_to_cpu(*dptr); @@ -7210,7 +7212,7 @@ qla24xx_nvram_config(scsi_qla_host_t *vha) memset(nv, 0, ha->nvram_size); nv->nvram_version = cpu_to_le16(ICB_VERSION); nv->version = cpu_to_le16(ICB_VERSION); - nv->frame_payload_size = 2048; + nv->frame_payload_size = cpu_to_le16(2048); nv->execution_throttle = cpu_to_le16(0xFFFF); nv->exchange_count = cpu_to_le16(0); nv->hard_address = cpu_to_le16(124); @@ -7378,7 +7380,7 @@ qla24xx_nvram_config(scsi_qla_host_t *vha) ha->login_retry_count = ql2xloginretrycount; /* N2N: driver will initiate Login instead of FW */ - icb->firmware_options_3 |= BIT_8; + icb->firmware_options_3 |= cpu_to_le32(BIT_8); /* Enable ZIO. */ if (!vha->flags.init_done) { @@ -7446,7 +7448,7 @@ qla27xx_check_image_status_signature(struct qla27xx_image_status *image_status) static ulong qla27xx_image_status_checksum(struct qla27xx_image_status *image_status) { - uint32_t *p = (uint32_t *)image_status; + __le32 *p = (__force __le32 *)image_status; uint n = sizeof(*image_status) / sizeof(*p); uint32_t sum = 0; @@ -7734,11 +7736,11 @@ qla24xx_load_risc_flash(scsi_qla_host_t *vha, uint32_t *srisc_addr, ql_dbg(ql_dbg_init, vha, 0x008d, "-> Loading segment %u...\n", j); qla24xx_read_flash_data(vha, dcode, faddr, 10); - risc_addr = be32_to_cpu(dcode[2]); - risc_size = be32_to_cpu(dcode[3]); + risc_addr = be32_to_cpu((__force __be32)dcode[2]); + risc_size = be32_to_cpu((__force __be32)dcode[3]); if (!*srisc_addr) { *srisc_addr = risc_addr; - risc_attr = be32_to_cpu(dcode[9]); + risc_attr = be32_to_cpu((__force __be32)dcode[9]); } dlen = ha->fw_transfer_size >> 2; @@ -7780,7 +7782,7 @@ qla24xx_load_risc_flash(scsi_qla_host_t *vha, uint32_t *srisc_addr, dcode = (uint32_t *)req->ring; qla24xx_read_flash_data(vha, dcode, faddr, 7); - risc_size = be32_to_cpu(dcode[2]); + risc_size = be32_to_cpu((__force __be32)dcode[2]); ql_dbg(ql_dbg_init, vha, 0x0161, "-> fwdt%u template array at %#x (%#x dwords)\n", j, faddr, risc_size); @@ -7849,7 +7851,8 @@ qla2x00_load_risc(scsi_qla_host_t *vha, uint32_t *srisc_addr) { int rval; int i, fragment; - uint16_t *wcode, *fwcode; + uint16_t *wcode; + __be16 *fwcode; uint32_t risc_addr, risc_size, fwclen, wlen, *seg; struct fw_blob *blob; struct qla_hw_data *ha = vha->hw; @@ -7869,7 +7872,7 @@ qla2x00_load_risc(scsi_qla_host_t *vha, uint32_t *srisc_addr) wcode = (uint16_t *)req->ring; *srisc_addr = 0; - fwcode = (uint16_t *)blob->fw->data; + fwcode = (__force __be16 *)blob->fw->data; fwclen = 0; /* Validate firmware image by checking version. */ @@ -7917,7 +7920,7 @@ qla2x00_load_risc(scsi_qla_host_t *vha, uint32_t *srisc_addr) "words 0x%x.\n", risc_addr, wlen); for (i = 0; i < wlen; i++) - wcode[i] = swab16(fwcode[i]); + wcode[i] = swab16((__force u32)fwcode[i]); rval = qla2x00_load_ram(vha, req->dma, risc_addr, wlen); @@ -7954,7 +7957,7 @@ qla24xx_load_risc_blob(scsi_qla_host_t *vha, uint32_t *srisc_addr) ulong i; uint j; struct fw_blob *blob; - uint32_t *fwcode; + __be32 *fwcode; struct qla_hw_data *ha = vha->hw; struct req_que *req = ha->req_q_map[0]; struct fwdt *fwdt = ha->fwdt; @@ -7970,8 +7973,8 @@ qla24xx_load_risc_blob(scsi_qla_host_t *vha, uint32_t *srisc_addr) return QLA_FUNCTION_FAILED; } - fwcode = (uint32_t *)blob->fw->data; - dcode = fwcode; + fwcode = (__force __be32 *)blob->fw->data; + dcode = (__force uint32_t *)fwcode; if (qla24xx_risc_firmware_invalid(dcode)) { ql_log(ql_log_fatal, vha, 0x0093, "Unable to verify integrity of firmware image (%zd).\n", @@ -8008,7 +8011,7 @@ qla24xx_load_risc_blob(scsi_qla_host_t *vha, uint32_t *srisc_addr) dlen); for (i = 0; i < dlen; i++) - dcode[i] = swab32(fwcode[i]); + dcode[i] = swab32((__force u32)fwcode[i]); rval = qla2x00_load_ram(vha, req->dma, risc_addr, dlen); if (rval) { @@ -8062,7 +8065,7 @@ qla24xx_load_risc_blob(scsi_qla_host_t *vha, uint32_t *srisc_addr) dcode = fwdt->template; for (i = 0; i < risc_size; i++) - dcode[i] = fwcode[i]; + dcode[i] = (__force u32)fwcode[i]; if (!qla27xx_fwdt_template_valid(dcode)) { ql_log(ql_log_warn, vha, 0x0175, @@ -8333,7 +8336,7 @@ qla81xx_nvram_config(scsi_qla_host_t *vha) int rval; struct init_cb_81xx *icb; struct nvram_81xx *nv; - uint32_t *dptr; + __le32 *dptr; uint8_t *dptr1, *dptr2; uint32_t chksum; uint16_t cnt; @@ -8380,7 +8383,7 @@ qla81xx_nvram_config(scsi_qla_host_t *vha) "primary" : "secondary"); ha->isp_ops->read_optrom(vha, ha->nvram, faddr << 2, ha->nvram_size); - dptr = (uint32_t *)nv; + dptr = (__force __le32 *)nv; for (cnt = 0, chksum = 0; cnt < ha->nvram_size >> 2; cnt++, dptr++) chksum += le32_to_cpu(*dptr); @@ -8407,7 +8410,7 @@ qla81xx_nvram_config(scsi_qla_host_t *vha) memset(nv, 0, ha->nvram_size); nv->nvram_version = cpu_to_le16(ICB_VERSION); nv->version = cpu_to_le16(ICB_VERSION); - nv->frame_payload_size = 2048; + nv->frame_payload_size = cpu_to_le16(2048); nv->execution_throttle = cpu_to_le16(0xFFFF); nv->exchange_count = cpu_to_le16(0); nv->port_name[0] = 0x21; @@ -8451,7 +8454,7 @@ qla81xx_nvram_config(scsi_qla_host_t *vha) } if (IS_T10_PI_CAPABLE(ha)) - nv->frame_payload_size &= ~7; + nv->frame_payload_size &= cpu_to_le16(~7); qlt_81xx_config_nvram_stage1(vha, nv); @@ -8614,10 +8617,10 @@ qla81xx_nvram_config(scsi_qla_host_t *vha) } /* enable RIDA Format2 */ - icb->firmware_options_3 |= BIT_0; + icb->firmware_options_3 |= cpu_to_le32(BIT_0); /* N2N: driver will initiate Login instead of FW */ - icb->firmware_options_3 |= BIT_8; + icb->firmware_options_3 |= cpu_to_le32(BIT_8); /* Determine NVMe/FCP priority for target ports */ ha->fc4_type_priority = qla2xxx_get_fc4_priority(vha); diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 3e31a175304c..b039bd83f947 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -661,7 +661,7 @@ qla24xx_build_scsi_type_6_iocbs(srb_t *sp, struct cmd_type_6 *cmd_pkt, cur_dsd->address = 0; cur_dsd->length = 0; cur_dsd++; - cmd_pkt->control_flags |= CF_DATA_SEG_DESCR_ENABLE; + cmd_pkt->control_flags |= cpu_to_le16(CF_DATA_SEG_DESCR_ENABLE); return 0; } @@ -755,8 +755,8 @@ qla24xx_build_scsi_iocbs(srb_t *sp, struct cmd_type_7 *cmd_pkt, } struct fw_dif_context { - uint32_t ref_tag; - uint16_t app_tag; + __le32 ref_tag; + __le16 app_tag; uint8_t ref_tag_mask[4]; /* Validation/Replacement Mask*/ uint8_t app_tag_mask[2]; /* Validation/Replacement Mask*/ }; @@ -1389,7 +1389,7 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt, uint16_t tot_dsds, uint16_t tot_prot_dsds, uint16_t fw_prot_opts) { struct dsd64 *cur_dsd; - uint32_t *fcp_dl; + __be32 *fcp_dl; scsi_qla_host_t *vha; struct scsi_cmnd *cmd; uint32_t total_bytes = 0; @@ -1456,7 +1456,7 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt, &crc_ctx_pkt->ref_tag, tot_prot_dsds); put_unaligned_le64(crc_ctx_dma, &cmd_pkt->crc_context_address); - cmd_pkt->crc_context_len = CRC_CONTEXT_LEN_FW; + cmd_pkt->crc_context_len = cpu_to_le16(CRC_CONTEXT_LEN_FW); /* Determine SCSI command length -- align to 4 byte boundary */ if (cmd->cmd_len > 16) { @@ -1545,7 +1545,7 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt, crc_ctx_pkt->guard_seed = cpu_to_le16(0); /* Fibre channel byte count */ cmd_pkt->byte_count = cpu_to_le32(total_bytes); - fcp_dl = (uint32_t *)(crc_ctx_pkt->fcp_cmnd.cdb + 16 + + fcp_dl = (__be32 *)(crc_ctx_pkt->fcp_cmnd.cdb + 16 + additional_fcpcdb_len); *fcp_dl = htonl(total_bytes); @@ -2344,9 +2344,10 @@ qla24xx_prli_iocb(srb_t *sp, struct logio_entry_24xx *logio) logio->entry_type = LOGINOUT_PORT_IOCB_TYPE; logio->control_flags = cpu_to_le16(LCF_COMMAND_PRLI); if (lio->u.logio.flags & SRB_LOGIN_NVME_PRLI) { - logio->control_flags |= LCF_NVME_PRLI; + logio->control_flags |= cpu_to_le16(LCF_NVME_PRLI); if (sp->vha->flags.nvme_first_burst) - logio->io_parameter[0] = NVME_PRLI_SP_FIRST_BURST; + logio->io_parameter[0] = + cpu_to_le32(NVME_PRLI_SP_FIRST_BURST); } logio->nport_handle = cpu_to_le16(sp->fcport->loop_id); @@ -2680,7 +2681,7 @@ qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb) els_iocb->entry_status = 0; els_iocb->handle = sp->handle; els_iocb->nport_handle = cpu_to_le16(sp->fcport->loop_id); - els_iocb->tx_dsd_count = 1; + els_iocb->tx_dsd_count = cpu_to_le16(1); els_iocb->vp_index = vha->vp_idx; els_iocb->sof_type = EST_SOFI3; els_iocb->rx_dsd_count = 0; @@ -2700,7 +2701,7 @@ qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb) cpu_to_le32(sizeof(struct els_plogi_payload)); put_unaligned_le64(elsio->u.els_plogi.els_plogi_pyld_dma, &els_iocb->tx_address); - els_iocb->rx_dsd_count = 1; + els_iocb->rx_dsd_count = cpu_to_le16(1); els_iocb->rx_byte_count = els_iocb->rx_len = cpu_to_le32(sizeof(struct els_plogi_payload)); put_unaligned_le64(elsio->u.els_plogi.els_resp_pyld_dma, @@ -2712,7 +2713,7 @@ qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb) (uint8_t *)els_iocb, sizeof(*els_iocb)); } else { - els_iocb->control_flags = 1 << 13; + els_iocb->control_flags = cpu_to_le16(1 << 13); els_iocb->tx_byte_count = cpu_to_le32(sizeof(struct els_logo_payload)); put_unaligned_le64(elsio->u.els_logo.els_logo_pyld_dma, @@ -2787,7 +2788,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) struct qla_work_evt *e; struct fc_port *conflict_fcport; port_id_t cid; /* conflict Nport id */ - u32 *fw_status = sp->u.iocb_cmd.u.els_plogi.fw_status; + const __le32 *fw_status = sp->u.iocb_cmd.u.els_plogi.fw_status; u16 lid; ql_dbg(ql_dbg_disc, vha, 0x3072, @@ -2800,7 +2801,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) if (sp->flags & SRB_WAKEUP_ON_COMP) complete(&lio->u.els_plogi.comp); else { - switch (fw_status[0]) { + switch (le32_to_cpu(fw_status[0])) { case CS_DATA_UNDERRUN: case CS_COMPLETE: memset(&ea, 0, sizeof(ea)); @@ -2810,9 +2811,9 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) break; case CS_IOCB_ERROR: - switch (fw_status[1]) { + switch (le32_to_cpu(fw_status[1])) { case LSC_SCODE_PORTID_USED: - lid = fw_status[2] & 0xffff; + lid = le32_to_cpu(fw_status[2]) & 0xffff; qlt_find_sess_invalidate_other(vha, wwn_to_u64(fcport->port_name), fcport->d_id, lid, &conflict_fcport); @@ -2846,9 +2847,11 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) break; case LSC_SCODE_NPORT_USED: - cid.b.domain = (fw_status[2] >> 16) & 0xff; - cid.b.area = (fw_status[2] >> 8) & 0xff; - cid.b.al_pa = fw_status[2] & 0xff; + cid.b.domain = (le32_to_cpu(fw_status[2]) >> 16) + & 0xff; + cid.b.area = (le32_to_cpu(fw_status[2]) >> 8) + & 0xff; + cid.b.al_pa = le32_to_cpu(fw_status[2]) & 0xff; cid.b.rsvd_1 = 0; ql_dbg(ql_dbg_disc, vha, 0x20ec, @@ -3022,7 +3025,7 @@ qla24xx_els_iocb(srb_t *sp, struct els_entry_24xx *els_iocb) els_iocb->sys_define = 0; els_iocb->entry_status = 0; els_iocb->handle = sp->handle; - els_iocb->nport_handle = cpu_to_le16(sp->fcport->loop_id); + els_iocb->nport_handle = cpu_to_le16(sp->fcport->loop_id); els_iocb->tx_dsd_count = cpu_to_le16(bsg_job->request_payload.sg_cnt); els_iocb->vp_index = sp->vha->vp_idx; els_iocb->sof_type = EST_SOFI3; @@ -3216,7 +3219,7 @@ qla82xx_start_scsi(srb_t *sp) uint16_t tot_dsds; struct device_reg_82xx __iomem *reg; uint32_t dbval; - uint32_t *fcp_dl; + __be32 *fcp_dl; uint8_t additional_cdb_len; struct ct6_dsd *ctx; struct scsi_qla_host *vha = sp->vha; @@ -3398,7 +3401,7 @@ qla82xx_start_scsi(srb_t *sp) memcpy(ctx->fcp_cmnd->cdb, cmd->cmnd, cmd->cmd_len); - fcp_dl = (uint32_t *)(ctx->fcp_cmnd->cdb + 16 + + fcp_dl = (__be32 *)(ctx->fcp_cmnd->cdb + 16 + additional_cdb_len); *fcp_dl = htonl((uint32_t)scsi_bufflen(cmd)); @@ -3536,7 +3539,7 @@ qla24xx_abort_iocb(srb_t *sp, struct abort_entry_24xx *abt_iocb) memset(abt_iocb, 0, sizeof(struct abort_entry_24xx)); abt_iocb->entry_type = ABORT_IOCB_TYPE; abt_iocb->entry_count = 1; - abt_iocb->handle = cpu_to_le32(make_handle(req->id, sp->handle)); + abt_iocb->handle = make_handle(req->id, sp->handle); if (sp->fcport) { abt_iocb->nport_handle = cpu_to_le16(sp->fcport->loop_id); abt_iocb->port_id[0] = sp->fcport->d_id.b.al_pa; @@ -3544,10 +3547,10 @@ qla24xx_abort_iocb(srb_t *sp, struct abort_entry_24xx *abt_iocb) abt_iocb->port_id[2] = sp->fcport->d_id.b.domain; } abt_iocb->handle_to_abort = - cpu_to_le32(make_handle(aio->u.abt.req_que_no, - aio->u.abt.cmd_hndl)); + make_handle(le16_to_cpu(aio->u.abt.req_que_no), + aio->u.abt.cmd_hndl); abt_iocb->vp_index = vha->vp_idx; - abt_iocb->req_que_no = cpu_to_le16(aio->u.abt.req_que_no); + abt_iocb->req_que_no = aio->u.abt.req_que_no; /* Send the command to the firmware */ wmb(); } @@ -3562,7 +3565,7 @@ qla2x00_mb_iocb(srb_t *sp, struct mbx_24xx_entry *mbx) sz = min(ARRAY_SIZE(mbx->mb), ARRAY_SIZE(sp->u.iocb_cmd.u.mbx.out_mb)); for (i = 0; i < sz; i++) - mbx->mb[i] = cpu_to_le16(sp->u.iocb_cmd.u.mbx.out_mb[i]); + mbx->mb[i] = sp->u.iocb_cmd.u.mbx.out_mb[i]; } static void @@ -3586,7 +3589,7 @@ static void qla2x00_send_notify_ack_iocb(srb_t *sp, nack->u.isp24.nport_handle = ntfy->u.isp24.nport_handle; if (le16_to_cpu(ntfy->u.isp24.status) == IMM_NTFY_ELS) { nack->u.isp24.flags = ntfy->u.isp24.flags & - cpu_to_le32(NOTIFY24XX_FLAGS_PUREX_IOCB); + cpu_to_le16(NOTIFY24XX_FLAGS_PUREX_IOCB); } nack->u.isp24.srr_rx_id = ntfy->u.isp24.srr_rx_id; nack->u.isp24.status = ntfy->u.isp24.status; @@ -3613,20 +3616,20 @@ qla_nvme_ls(srb_t *sp, struct pt_ls4_request *cmd_pkt) nvme = &sp->u.iocb_cmd; cmd_pkt->entry_type = PT_LS4_REQUEST; cmd_pkt->entry_count = 1; - cmd_pkt->control_flags = CF_LS4_ORIGINATOR << CF_LS4_SHIFT; + cmd_pkt->control_flags = cpu_to_le16(CF_LS4_ORIGINATOR << CF_LS4_SHIFT); cmd_pkt->timeout = cpu_to_le16(nvme->u.nvme.timeout_sec); cmd_pkt->nport_handle = cpu_to_le16(sp->fcport->loop_id); cmd_pkt->vp_index = sp->fcport->vha->vp_idx; - cmd_pkt->tx_dseg_count = 1; - cmd_pkt->tx_byte_count = nvme->u.nvme.cmd_len; - cmd_pkt->dsd[0].length = nvme->u.nvme.cmd_len; + cmd_pkt->tx_dseg_count = cpu_to_le16(1); + cmd_pkt->tx_byte_count = cpu_to_le32(nvme->u.nvme.cmd_len); + cmd_pkt->dsd[0].length = cpu_to_le32(nvme->u.nvme.cmd_len); put_unaligned_le64(nvme->u.nvme.cmd_dma, &cmd_pkt->dsd[0].address); - cmd_pkt->rx_dseg_count = 1; - cmd_pkt->rx_byte_count = nvme->u.nvme.rsp_len; - cmd_pkt->dsd[1].length = nvme->u.nvme.rsp_len; + cmd_pkt->rx_dseg_count = cpu_to_le16(1); + cmd_pkt->rx_byte_count = cpu_to_le32(nvme->u.nvme.rsp_len); + cmd_pkt->dsd[1].length = cpu_to_le32(nvme->u.nvme.rsp_len); put_unaligned_le64(nvme->u.nvme.rsp_dma, &cmd_pkt->dsd[1].address); return rval; diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 0a9a838c7f20..96d64a7034cc 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -90,9 +90,9 @@ qla24xx_process_abts(struct scsi_qla_host *vha, void *pkt) memset(rsp_els, 0, sizeof(*rsp_els)); rsp_els->entry_type = ELS_IOCB_TYPE; rsp_els->entry_count = 1; - rsp_els->nport_handle = ~0; + rsp_els->nport_handle = cpu_to_le16(~0); rsp_els->rx_xchg_address = abts->rx_xch_addr_to_abort; - rsp_els->control_flags = EPD_RX_XCHG; + rsp_els->control_flags = cpu_to_le16(EPD_RX_XCHG); ql_dbg(ql_dbg_init, vha, 0x0283, "Sending ELS Response to terminate exchange %#x...\n", abts->rx_xch_addr_to_abort); @@ -142,7 +142,7 @@ qla24xx_process_abts(struct scsi_qla_host *vha, void *pkt) abts_rsp->ox_id = abts->ox_id; abts_rsp->payload.ba_acc.aborted_rx_id = abts->rx_id; abts_rsp->payload.ba_acc.aborted_ox_id = abts->ox_id; - abts_rsp->payload.ba_acc.high_seq_cnt = ~0; + abts_rsp->payload.ba_acc.high_seq_cnt = cpu_to_le16(~0); abts_rsp->rx_xch_addr_to_abort = abts->rx_xch_addr_to_abort; ql_dbg(ql_dbg_init, vha, 0x028b, "Sending BA ACC response to ABTS %#x...\n", @@ -413,7 +413,7 @@ qla2x00_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0) { uint16_t cnt; uint32_t mboxes; - uint16_t __iomem *wptr; + __le16 __iomem *wptr; struct qla_hw_data *ha = vha->hw; struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; @@ -429,11 +429,11 @@ qla2x00_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0) ha->flags.mbox_int = 1; ha->mailbox_out[0] = mb0; mboxes >>= 1; - wptr = (uint16_t __iomem *)MAILBOX_REG(ha, reg, 1); + wptr = MAILBOX_REG(ha, reg, 1); for (cnt = 1; cnt < ha->mbx_count; cnt++) { if (IS_QLA2200(ha) && cnt == 8) - wptr = (uint16_t __iomem *)MAILBOX_REG(ha, reg, 8); + wptr = MAILBOX_REG(ha, reg, 8); if ((cnt == 4 || cnt == 5) && (mboxes & BIT_0)) ha->mailbox_out[cnt] = qla2x00_debounce_register(wptr); else if (mboxes & BIT_0) @@ -457,9 +457,9 @@ qla81xx_idc_event(scsi_qla_host_t *vha, uint16_t aen, uint16_t descr) /* Seed data -- mailbox1 -> mailbox7. */ if (IS_QLA81XX(vha->hw) || IS_QLA83XX(vha->hw)) - wptr = (uint16_t __iomem *)®24->mailbox1; + wptr = ®24->mailbox1; else if (IS_QLA8044(vha->hw)) - wptr = (uint16_t __iomem *)®82->mailbox_out[1]; + wptr = ®82->mailbox_out[1]; else return; @@ -819,7 +819,7 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb) goto skip_rio; switch (mb[0]) { case MBA_SCSI_COMPLETION: - handles[0] = le32_to_cpu(make_handle(mb[2], mb[1])); + handles[0] = make_handle(mb[2], mb[1]); handle_cnt = 1; break; case MBA_CMPLT_1_16BIT: @@ -858,10 +858,9 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb) mb[0] = MBA_SCSI_COMPLETION; break; case MBA_CMPLT_2_32BIT: - handles[0] = le32_to_cpu(make_handle(mb[2], mb[1])); - handles[1] = - le32_to_cpu(make_handle(RD_MAILBOX_REG(ha, reg, 7), - RD_MAILBOX_REG(ha, reg, 6))); + handles[0] = make_handle(mb[2], mb[1]); + handles[1] = make_handle(RD_MAILBOX_REG(ha, reg, 7), + RD_MAILBOX_REG(ha, reg, 6)); handle_cnt = 2; mb[0] = MBA_SCSI_COMPLETION; break; @@ -1667,7 +1666,7 @@ qla24xx_mbx_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, sz = min(ARRAY_SIZE(pkt->mb), ARRAY_SIZE(sp->u.iocb_cmd.u.mbx.in_mb)); for (i = 0; i < sz; i++) - si->u.mbx.in_mb[i] = le16_to_cpu(pkt->mb[i]); + si->u.mbx.in_mb[i] = pkt->mb[i]; res = (si->u.mbx.in_mb[0] & MBS_MASK); @@ -1768,6 +1767,7 @@ static void qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req, struct sts_entry_24xx *pkt, int iocb_type) { + struct els_sts_entry_24xx *ese = (struct els_sts_entry_24xx *)pkt; const char func[] = "ELS_CT_IOCB"; const char *type; srb_t *sp; @@ -1817,23 +1817,22 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req, } comp_status = fw_status[0] = le16_to_cpu(pkt->comp_status); - fw_status[1] = le16_to_cpu(((struct els_sts_entry_24xx *)pkt)->error_subcode_1); - fw_status[2] = le16_to_cpu(((struct els_sts_entry_24xx *)pkt)->error_subcode_2); + fw_status[1] = le32_to_cpu(ese->error_subcode_1); + fw_status[2] = le32_to_cpu(ese->error_subcode_2); if (iocb_type == ELS_IOCB_TYPE) { els = &sp->u.iocb_cmd; - els->u.els_plogi.fw_status[0] = fw_status[0]; - els->u.els_plogi.fw_status[1] = fw_status[1]; - els->u.els_plogi.fw_status[2] = fw_status[2]; - els->u.els_plogi.comp_status = fw_status[0]; + els->u.els_plogi.fw_status[0] = cpu_to_le32(fw_status[0]); + els->u.els_plogi.fw_status[1] = cpu_to_le32(fw_status[1]); + els->u.els_plogi.fw_status[2] = cpu_to_le32(fw_status[2]); + els->u.els_plogi.comp_status = cpu_to_le16(fw_status[0]); if (comp_status == CS_COMPLETE) { res = DID_OK << 16; } else { if (comp_status == CS_DATA_UNDERRUN) { res = DID_OK << 16; - els->u.els_plogi.len = - le16_to_cpu(((struct els_sts_entry_24xx *) - pkt)->total_byte_count); + els->u.els_plogi.len = cpu_to_le16(le32_to_cpu( + ese->total_byte_count)); } else { els->u.els_plogi.len = 0; res = DID_ERROR << 16; @@ -1842,8 +1841,7 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req, ql_dbg(ql_dbg_user, vha, 0x503f, "ELS IOCB Done -%s error hdl=%x comp_status=0x%x error subcode 1=0x%x error subcode 2=0x%x total_byte=0x%x\n", type, sp->handle, comp_status, fw_status[1], fw_status[2], - le16_to_cpu(((struct els_sts_entry_24xx *) - pkt)->total_byte_count)); + le32_to_cpu(ese->total_byte_count)); goto els_ct_done; } @@ -1859,23 +1857,20 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req, if (comp_status == CS_DATA_UNDERRUN) { res = DID_OK << 16; bsg_reply->reply_payload_rcv_len = - le16_to_cpu(((struct els_sts_entry_24xx *)pkt)->total_byte_count); + le32_to_cpu(ese->total_byte_count); ql_dbg(ql_dbg_user, vha, 0x503f, "ELS-CT pass-through-%s error hdl=%x comp_status-status=0x%x " "error subcode 1=0x%x error subcode 2=0x%x total_byte = 0x%x.\n", type, sp->handle, comp_status, fw_status[1], fw_status[2], - le16_to_cpu(((struct els_sts_entry_24xx *) - pkt)->total_byte_count)); + le32_to_cpu(ese->total_byte_count)); } else { ql_dbg(ql_dbg_user, vha, 0x5040, "ELS-CT pass-through-%s error hdl=%x comp_status-status=0x%x " "error subcode 1=0x%x error subcode 2=0x%x.\n", type, sp->handle, comp_status, - le16_to_cpu(((struct els_sts_entry_24xx *) - pkt)->error_subcode_1), - le16_to_cpu(((struct els_sts_entry_24xx *) - pkt)->error_subcode_2)); + le32_to_cpu(ese->error_subcode_1), + le32_to_cpu(ese->error_subcode_2)); res = DID_ERROR << 16; bsg_reply->reply_payload_rcv_len = 0; } @@ -2083,7 +2078,7 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, uint16_t state_flags; struct nvmefc_fcp_req *fd; uint16_t ret = QLA_SUCCESS; - uint16_t comp_status = le16_to_cpu(sts->comp_status); + __le16 comp_status = sts->comp_status; int logit = 0; iocb = &sp->u.iocb_cmd; @@ -2114,7 +2109,7 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, } else if ((state_flags & (SF_FCP_RSP_DMA | SF_NVME_ERSP)) == (SF_FCP_RSP_DMA | SF_NVME_ERSP)) { /* Response already DMA'd to fd->rspaddr. */ - iocb->u.nvme.rsp_pyld_len = le16_to_cpu(sts->nvme_rsp_pyld_len); + iocb->u.nvme.rsp_pyld_len = sts->nvme_rsp_pyld_len; } else if ((state_flags & SF_FCP_RSP_DMA)) { /* * Non-zero value in first 12 bytes of NVMe_RSP IU, treat this @@ -2131,8 +2126,8 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, inbuf = (uint32_t *)&sts->nvme_ersp_data; outbuf = (uint32_t *)fd->rspaddr; - iocb->u.nvme.rsp_pyld_len = le16_to_cpu(sts->nvme_rsp_pyld_len); - if (unlikely(iocb->u.nvme.rsp_pyld_len > + iocb->u.nvme.rsp_pyld_len = sts->nvme_rsp_pyld_len; + if (unlikely(le16_to_cpu(iocb->u.nvme.rsp_pyld_len) > sizeof(struct nvme_fc_ersp_iu))) { if (ql_mask_match(ql_dbg_io)) { WARN_ONCE(1, "Unexpected response payload length %u.\n", @@ -2142,9 +2137,9 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, iocb->u.nvme.rsp_pyld_len); } iocb->u.nvme.rsp_pyld_len = - sizeof(struct nvme_fc_ersp_iu); + cpu_to_le16(sizeof(struct nvme_fc_ersp_iu)); } - iter = iocb->u.nvme.rsp_pyld_len >> 2; + iter = le16_to_cpu(iocb->u.nvme.rsp_pyld_len) >> 2; for (; iter; iter--) *outbuf++ = swab32(*inbuf++); } @@ -2159,7 +2154,7 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, "Dropped frame(s) detected (sent/rcvd=%u/%u).\n", tgt_xfer_len, fd->transferred_length); logit = 1; - } else if (comp_status == CS_DATA_UNDERRUN) { + } else if (le16_to_cpu(comp_status) == CS_DATA_UNDERRUN) { /* * Do not log if this is just an underflow and there * is no data loss. @@ -2179,7 +2174,7 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, * If transport error then Failure (HBA rejects request) * otherwise transport will handle. */ - switch (comp_status) { + switch (le16_to_cpu(comp_status)) { case CS_COMPLETE: break; @@ -2412,9 +2407,9 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24) * For type 3: ref & app tag is all 'f's * For type 0,1,2: app tag is all 'f's */ - if ((a_app_tag == T10_PI_APP_ESCAPE) && - ((scsi_get_prot_type(cmd) != SCSI_PROT_DIF_TYPE3) || - (a_ref_tag == T10_PI_REF_ESCAPE))) { + if (a_app_tag == be16_to_cpu(T10_PI_APP_ESCAPE) && + (scsi_get_prot_type(cmd) != SCSI_PROT_DIF_TYPE3 || + a_ref_tag == be32_to_cpu(T10_PI_REF_ESCAPE))) { uint32_t blocks_done, resid; sector_t lba_s = scsi_get_lba(cmd); @@ -2772,6 +2767,8 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt) sense_len = par_sense_len = rsp_info_len = resid_len = fw_resid_len = 0; if (IS_FWI2_CAPABLE(ha)) { + u16 sts24_retry_delay = le16_to_cpu(sts24->retry_delay); + if (scsi_status & SS_SENSE_LEN_VALID) sense_len = le32_to_cpu(sts24->sense_len); if (scsi_status & SS_RESPONSE_INFO_LEN_VALID) @@ -2786,11 +2783,11 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt) ox_id = le16_to_cpu(sts24->ox_id); par_sense_len = sizeof(sts24->data); /* Valid values of the retry delay timer are 0x1-0xffef */ - if (sts24->retry_delay > 0 && sts24->retry_delay < 0xfff1) { - retry_delay = sts24->retry_delay & 0x3fff; + if (sts24_retry_delay > 0 && sts24_retry_delay < 0xfff1) { + retry_delay = sts24_retry_delay & 0x3fff; ql_dbg(ql_dbg_io, sp->vha, 0x3033, "%s: scope=%#x retry_delay=%#x\n", __func__, - sts24->retry_delay >> 14, retry_delay); + sts24_retry_delay >> 14, retry_delay); } } else { if (scsi_status & SS_SENSE_LEN_VALID) @@ -3180,7 +3177,7 @@ qla24xx_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0) ha->flags.mbox_int = 1; ha->mailbox_out[0] = mb0; mboxes >>= 1; - wptr = (uint16_t __iomem *)®->mailbox1; + wptr = ®->mailbox1; for (cnt = 1; cnt < ha->mbx_count; cnt++) { if (mboxes & BIT_0) @@ -3204,7 +3201,7 @@ qla24xx_abort_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, return; abt = &sp->u.iocb_cmd; - abt->u.abt.comp_status = le16_to_cpu(pkt->nport_handle); + abt->u.abt.comp_status = pkt->nport_handle; sp->done(sp, 0); } diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index e6ab5f07406d..3b4760e80f00 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -208,11 +208,11 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) /* Load mailbox registers. */ if (IS_P3P_TYPE(ha)) - optr = (uint16_t __iomem *)®->isp82.mailbox_in[0]; + optr = ®->isp82.mailbox_in[0]; else if (IS_FWI2_CAPABLE(ha) && !(IS_P3P_TYPE(ha))) - optr = (uint16_t __iomem *)®->isp24.mailbox0; + optr = ®->isp24.mailbox0; else - optr = (uint16_t __iomem *)MAILBOX_REG(ha, ®->isp, 0); + optr = MAILBOX_REG(ha, ®->isp, 0); iptr = mcp->mb; command = mcp->mb[0]; @@ -222,8 +222,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) "Mailbox registers (OUT):\n"); for (cnt = 0; cnt < ha->mbx_count; cnt++) { if (IS_QLA2200(ha) && cnt == 8) - optr = - (uint16_t __iomem *)MAILBOX_REG(ha, ®->isp, 8); + optr = MAILBOX_REG(ha, ®->isp, 8); if (mboxes & BIT_0) { ql_dbg(ql_dbg_mbx, vha, 0x1112, "mbox[%d]<-0x%04x\n", cnt, *iptr); @@ -3110,8 +3109,8 @@ qla24xx_get_isp_stats(scsi_qla_host_t *vha, struct link_statistics *stats, mc.mb[6] = MSW(MSD(stats_dma)); mc.mb[7] = LSW(MSD(stats_dma)); mc.mb[8] = dwords; - mc.mb[9] = cpu_to_le16(vha->vp_idx); - mc.mb[10] = cpu_to_le16(options); + mc.mb[9] = vha->vp_idx; + mc.mb[10] = options; rval = qla24xx_send_mb_cmd(vha, &mc); @@ -3204,7 +3203,7 @@ qla24xx_abort_command(srb_t *sp) ql_dbg(ql_dbg_mbx, vha, 0x1090, "Failed to complete IOCB -- completion status (%x).\n", le16_to_cpu(abt->nport_handle)); - if (abt->nport_handle == CS_IOCB_ERROR) + if (abt->nport_handle == cpu_to_le16(CS_IOCB_ERROR)) rval = QLA_FUNCTION_PARAMETER_ERROR; else rval = QLA_FUNCTION_FAILED; @@ -4727,7 +4726,7 @@ qla82xx_set_driver_version(scsi_qla_host_t *vha, char *version) mbx_cmd_t *mcp = &mc; int i; int len; - uint16_t *str; + __le16 *str; struct qla_hw_data *ha = vha->hw; if (!IS_P3P_TYPE(ha)) @@ -4736,14 +4735,14 @@ qla82xx_set_driver_version(scsi_qla_host_t *vha, char *version) ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x117b, "Entered %s.\n", __func__); - str = (uint16_t *)version; + str = (__force __le16 *)version; len = strlen(version); mcp->mb[0] = MBC_SET_RNID_PARAMS; mcp->mb[1] = RNID_TYPE_SET_VERSION << 8; mcp->out_mb = MBX_1|MBX_0; for (i = 4; i < 16 && len; i++, str++, len -= 2) { - mcp->mb[i] = cpu_to_le16p(str); + mcp->mb[i] = le16_to_cpup(str); mcp->out_mb |= 1<loop_id); + mc.mb[1] = fcport->loop_id; mc.mb[2] = MSW(pd_dma); mc.mb[3] = LSW(pd_dma); mc.mb[6] = MSW(MSD(pd_dma)); mc.mb[7] = LSW(MSD(pd_dma)); - mc.mb[9] = cpu_to_le16(vha->vp_idx); - mc.mb[10] = cpu_to_le16((uint16_t)opt); + mc.mb[9] = vha->vp_idx; + mc.mb[10] = opt; rval = qla24xx_send_mb_cmd(vha, &mc); if (rval != QLA_SUCCESS) { @@ -6589,7 +6588,7 @@ int qla24xx_gidlist_wait(struct scsi_qla_host *vha, mc.mb[6] = MSW(MSD(id_list_dma)); mc.mb[7] = LSW(MSD(id_list_dma)); mc.mb[8] = 0; - mc.mb[9] = cpu_to_le16(vha->vp_idx); + mc.mb[9] = vha->vp_idx; rval = qla24xx_send_mb_cmd(vha, &mc); if (rval != QLA_SUCCESS) { @@ -6615,8 +6614,8 @@ int qla27xx_set_zio_threshold(scsi_qla_host_t *vha, uint16_t value) memset(mcp->mb, 0 , sizeof(mcp->mb)); mcp->mb[0] = MBC_GET_SET_ZIO_THRESHOLD; - mcp->mb[1] = cpu_to_le16(1); - mcp->mb[2] = cpu_to_le16(value); + mcp->mb[1] = 1; + mcp->mb[2] = value; mcp->out_mb = MBX_2 | MBX_1 | MBX_0; mcp->in_mb = MBX_2 | MBX_0; mcp->tov = MBX_TOV_SECONDS; @@ -6641,7 +6640,7 @@ int qla27xx_get_zio_threshold(scsi_qla_host_t *vha, uint16_t *value) memset(mcp->mb, 0, sizeof(mcp->mb)); mcp->mb[0] = MBC_GET_SET_ZIO_THRESHOLD; - mcp->mb[1] = cpu_to_le16(0); + mcp->mb[1] = 0; mcp->out_mb = MBX_1 | MBX_0; mcp->in_mb = MBX_2 | MBX_0; mcp->tov = MBX_TOV_SECONDS; diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c index 908594c1541e..a8fe4f725fa0 100644 --- a/drivers/scsi/qla2xxx/qla_mr.c +++ b/drivers/scsi/qla2xxx/qla_mr.c @@ -3202,7 +3202,7 @@ qlafx00_tm_iocb(srb_t *sp, struct tsk_mgmt_entry_fx00 *ptm_iocb) memset(&tm_iocb, 0, sizeof(struct tsk_mgmt_entry_fx00)); tm_iocb.entry_type = TSK_MGMT_IOCB_TYPE_FX00; tm_iocb.entry_count = 1; - tm_iocb.handle = cpu_to_le32(make_handle(req->id, sp->handle)); + tm_iocb.handle = make_handle(req->id, sp->handle); tm_iocb.reserved_0 = 0; tm_iocb.tgt_id = cpu_to_le16(sp->fcport->tgt_id); tm_iocb.control_flags = cpu_to_le32(fxio->u.tmf.flags); @@ -3228,9 +3228,8 @@ qlafx00_abort_iocb(srb_t *sp, struct abort_iocb_entry_fx00 *pabt_iocb) memset(&abt_iocb, 0, sizeof(struct abort_iocb_entry_fx00)); abt_iocb.entry_type = ABORT_IOCB_TYPE_FX00; abt_iocb.entry_count = 1; - abt_iocb.handle = cpu_to_le32(make_handle(req->id, sp->handle)); - abt_iocb.abort_handle = - cpu_to_le32(make_handle(req->id, fxio->u.abt.cmd_hndl)); + abt_iocb.handle = make_handle(req->id, sp->handle); + abt_iocb.abort_handle = make_handle(req->id, fxio->u.abt.cmd_hndl); abt_iocb.tgt_id_sts = cpu_to_le16(sp->fcport->tgt_id); abt_iocb.req_que_no = cpu_to_le16(req->id); @@ -3251,7 +3250,7 @@ qlafx00_fxdisc_iocb(srb_t *sp, struct fxdisc_entry_fx00 *pfxiocb) memset(&fx_iocb, 0, sizeof(struct fxdisc_entry_fx00)); fx_iocb.entry_type = FX00_IOCB_TYPE; - fx_iocb.handle = cpu_to_le32(sp->handle); + fx_iocb.handle = sp->handle; fx_iocb.entry_count = entry_cnt; if (sp->type == SRB_FXIOCB_DCMD) { diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 6f20e20559bb..d66d47a0f958 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -138,7 +138,7 @@ static void qla_nvme_release_fcp_cmd_kref(struct kref *kref) priv->sp = NULL; sp->priv = NULL; if (priv->comp_status == QLA_SUCCESS) { - fd->rcv_rsplen = nvme->u.nvme.rsp_pyld_len; + fd->rcv_rsplen = le16_to_cpu(nvme->u.nvme.rsp_pyld_len); } else { fd->rcv_rsplen = 0; fd->transferred_length = 0; @@ -426,11 +426,11 @@ static inline int qla2x00_start_nvme_mq(srb_t *sp) /* No data transfer how do we check buffer len == 0?? */ if (fd->io_dir == NVMEFC_FCP_READ) { - cmd_pkt->control_flags = CF_READ_DATA; + cmd_pkt->control_flags = cpu_to_le16(CF_READ_DATA); vha->qla_stats.input_bytes += fd->payload_length; vha->qla_stats.input_requests++; } else if (fd->io_dir == NVMEFC_FCP_WRITE) { - cmd_pkt->control_flags = CF_WRITE_DATA; + cmd_pkt->control_flags = cpu_to_le16(CF_WRITE_DATA); if ((vha->flags.nvme_first_burst) && (sp->fcport->nvme_prli_service_param & NVME_PRLI_SP_FIRST_BURST)) { @@ -438,7 +438,7 @@ static inline int qla2x00_start_nvme_mq(srb_t *sp) sp->fcport->nvme_first_burst_size) || (sp->fcport->nvme_first_burst_size == 0)) cmd_pkt->control_flags |= - CF_NVME_FIRST_BURST_ENABLE; + cpu_to_le16(CF_NVME_FIRST_BURST_ENABLE); } vha->qla_stats.output_bytes += fd->payload_length; vha->qla_stats.output_requests++; diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index 293dbde1d6e4..21f968e4a584 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c @@ -1561,14 +1561,14 @@ qla82xx_get_table_desc(const u8 *unirom, int section) uint32_t i; struct qla82xx_uri_table_desc *directory = (struct qla82xx_uri_table_desc *)&unirom[0]; - __le32 offset; - __le32 tab_type; - __le32 entries = cpu_to_le32(directory->num_entries); + uint32_t offset; + uint32_t tab_type; + uint32_t entries = le32_to_cpu(directory->num_entries); for (i = 0; i < entries; i++) { - offset = cpu_to_le32(directory->findex) + - (i * cpu_to_le32(directory->entry_size)); - tab_type = cpu_to_le32(*((u32 *)&unirom[offset] + 8)); + offset = le32_to_cpu(directory->findex) + + (i * le32_to_cpu(directory->entry_size)); + tab_type = get_unaligned_le32((u32 *)&unirom[offset] + 8); if (tab_type == section) return (struct qla82xx_uri_table_desc *)&unirom[offset]; @@ -1582,16 +1582,17 @@ qla82xx_get_data_desc(struct qla_hw_data *ha, u32 section, u32 idx_offset) { const u8 *unirom = ha->hablob->fw->data; - int idx = cpu_to_le32(*((int *)&unirom[ha->file_prd_off] + idx_offset)); + int idx = get_unaligned_le32((u32 *)&unirom[ha->file_prd_off] + + idx_offset); struct qla82xx_uri_table_desc *tab_desc = NULL; - __le32 offset; + uint32_t offset; tab_desc = qla82xx_get_table_desc(unirom, section); if (!tab_desc) return NULL; - offset = cpu_to_le32(tab_desc->findex) + - (cpu_to_le32(tab_desc->entry_size) * idx); + offset = le32_to_cpu(tab_desc->findex) + + (le32_to_cpu(tab_desc->entry_size) * idx); return (struct qla82xx_uri_data_desc *)&unirom[offset]; } @@ -1606,7 +1607,7 @@ qla82xx_get_bootld_offset(struct qla_hw_data *ha) uri_desc = qla82xx_get_data_desc(ha, QLA82XX_URI_DIR_SECT_BOOTLD, QLA82XX_URI_BOOTLD_IDX_OFF); if (uri_desc) - offset = cpu_to_le32(uri_desc->findex); + offset = le32_to_cpu(uri_desc->findex); } return (u8 *)&ha->hablob->fw->data[offset]; @@ -1620,7 +1621,7 @@ static u32 qla82xx_get_fw_size(struct qla_hw_data *ha) uri_desc = qla82xx_get_data_desc(ha, QLA82XX_URI_DIR_SECT_FW, QLA82XX_URI_FIRMWARE_IDX_OFF); if (uri_desc) - return cpu_to_le32(uri_desc->size); + return le32_to_cpu(uri_desc->size); } return get_unaligned_le32(&ha->hablob->fw->data[FW_SIZE_OFFSET]); @@ -1636,7 +1637,7 @@ qla82xx_get_fw_offs(struct qla_hw_data *ha) uri_desc = qla82xx_get_data_desc(ha, QLA82XX_URI_DIR_SECT_FW, QLA82XX_URI_FIRMWARE_IDX_OFF); if (uri_desc) - offset = cpu_to_le32(uri_desc->findex); + offset = le32_to_cpu(uri_desc->findex); } return (u8 *)&ha->hablob->fw->data[offset]; @@ -1847,8 +1848,8 @@ qla82xx_set_product_offset(struct qla_hw_data *ha) struct qla82xx_uri_table_desc *ptab_desc = NULL; const uint8_t *unirom = ha->hablob->fw->data; uint32_t i; - __le32 entries; - __le32 flags, file_chiprev, offset; + uint32_t entries; + uint32_t flags, file_chiprev, offset; uint8_t chiprev = ha->chip_revision; /* Hardcoding mn_present flag for P3P */ int mn_present = 0; @@ -1859,14 +1860,14 @@ qla82xx_set_product_offset(struct qla_hw_data *ha) if (!ptab_desc) return -1; - entries = cpu_to_le32(ptab_desc->num_entries); + entries = le32_to_cpu(ptab_desc->num_entries); for (i = 0; i < entries; i++) { - offset = cpu_to_le32(ptab_desc->findex) + - (i * cpu_to_le32(ptab_desc->entry_size)); - flags = cpu_to_le32(*((int *)&unirom[offset] + + offset = le32_to_cpu(ptab_desc->findex) + + (i * le32_to_cpu(ptab_desc->entry_size)); + flags = le32_to_cpu(*((__le32 *)&unirom[offset] + QLA82XX_URI_FLAGS_OFF)); - file_chiprev = cpu_to_le32(*((int *)&unirom[offset] + + file_chiprev = le32_to_cpu(*((__le32 *)&unirom[offset] + QLA82XX_URI_CHIP_REV_OFF)); flagbit = mn_present ? 1 : 2; @@ -2549,8 +2550,8 @@ qla82xx_start_firmware(scsi_qla_host_t *vha) return qla82xx_check_rcvpeg_state(ha); } -static uint32_t * -qla82xx_read_flash_data(scsi_qla_host_t *vha, uint32_t *dwptr, uint32_t faddr, +static __le32 * +qla82xx_read_flash_data(scsi_qla_host_t *vha, __le32 *dwptr, uint32_t faddr, uint32_t length) { uint32_t i; @@ -2675,13 +2676,13 @@ qla82xx_read_optrom_data(struct scsi_qla_host *vha, void *buf, uint32_t offset, uint32_t length) { scsi_block_requests(vha->host); - qla82xx_read_flash_data(vha, (uint32_t *)buf, offset, length); + qla82xx_read_flash_data(vha, buf, offset, length); scsi_unblock_requests(vha->host); return buf; } static int -qla82xx_write_flash_data(struct scsi_qla_host *vha, uint32_t *dwptr, +qla82xx_write_flash_data(struct scsi_qla_host *vha, __le32 *dwptr, uint32_t faddr, uint32_t dwords) { int ret; @@ -2758,7 +2759,7 @@ qla82xx_write_flash_data(struct scsi_qla_host *vha, uint32_t *dwptr, } ret = qla82xx_write_flash_dword(ha, faddr, - cpu_to_le32(*dwptr)); + le32_to_cpu(*dwptr)); if (ret) { ql_dbg(ql_dbg_p3p, vha, 0xb020, "Unable to program flash address=%x data=%x.\n", @@ -3724,7 +3725,7 @@ qla82xx_chip_reset_cleanup(scsi_qla_host_t *vha) /* Minidump related functions */ static int qla82xx_minidump_process_control(scsi_qla_host_t *vha, - qla82xx_md_entry_hdr_t *entry_hdr, uint32_t **d_ptr) + qla82xx_md_entry_hdr_t *entry_hdr, __le32 **d_ptr) { struct qla_hw_data *ha = vha->hw; struct qla82xx_md_entry_crb *crb_entry; @@ -3841,12 +3842,12 @@ qla82xx_minidump_process_control(scsi_qla_host_t *vha, static void qla82xx_minidump_process_rdocm(scsi_qla_host_t *vha, - qla82xx_md_entry_hdr_t *entry_hdr, uint32_t **d_ptr) + qla82xx_md_entry_hdr_t *entry_hdr, __le32 **d_ptr) { struct qla_hw_data *ha = vha->hw; uint32_t r_addr, r_stride, loop_cnt, i, r_value; struct qla82xx_md_entry_rdocm *ocm_hdr; - uint32_t *data_ptr = *d_ptr; + __le32 *data_ptr = *d_ptr; ocm_hdr = (struct qla82xx_md_entry_rdocm *)entry_hdr; r_addr = ocm_hdr->read_addr; @@ -3863,12 +3864,12 @@ qla82xx_minidump_process_rdocm(scsi_qla_host_t *vha, static void qla82xx_minidump_process_rdmux(scsi_qla_host_t *vha, - qla82xx_md_entry_hdr_t *entry_hdr, uint32_t **d_ptr) + qla82xx_md_entry_hdr_t *entry_hdr, __le32 **d_ptr) { struct qla_hw_data *ha = vha->hw; uint32_t r_addr, s_stride, s_addr, s_value, loop_cnt, i, r_value; struct qla82xx_md_entry_mux *mux_hdr; - uint32_t *data_ptr = *d_ptr; + __le32 *data_ptr = *d_ptr; mux_hdr = (struct qla82xx_md_entry_mux *)entry_hdr; r_addr = mux_hdr->read_addr; @@ -3889,12 +3890,12 @@ qla82xx_minidump_process_rdmux(scsi_qla_host_t *vha, static void qla82xx_minidump_process_rdcrb(scsi_qla_host_t *vha, - qla82xx_md_entry_hdr_t *entry_hdr, uint32_t **d_ptr) + qla82xx_md_entry_hdr_t *entry_hdr, __le32 **d_ptr) { struct qla_hw_data *ha = vha->hw; uint32_t r_addr, r_stride, loop_cnt, i, r_value; struct qla82xx_md_entry_crb *crb_hdr; - uint32_t *data_ptr = *d_ptr; + __le32 *data_ptr = *d_ptr; crb_hdr = (struct qla82xx_md_entry_crb *)entry_hdr; r_addr = crb_hdr->addr; @@ -3912,7 +3913,7 @@ qla82xx_minidump_process_rdcrb(scsi_qla_host_t *vha, static int qla82xx_minidump_process_l2tag(scsi_qla_host_t *vha, - qla82xx_md_entry_hdr_t *entry_hdr, uint32_t **d_ptr) + qla82xx_md_entry_hdr_t *entry_hdr, __le32 **d_ptr) { struct qla_hw_data *ha = vha->hw; uint32_t addr, r_addr, c_addr, t_r_addr; @@ -3921,7 +3922,7 @@ qla82xx_minidump_process_l2tag(scsi_qla_host_t *vha, uint32_t c_value_w, c_value_r; struct qla82xx_md_entry_cache *cache_hdr; int rval = QLA_FUNCTION_FAILED; - uint32_t *data_ptr = *d_ptr; + __le32 *data_ptr = *d_ptr; cache_hdr = (struct qla82xx_md_entry_cache *)entry_hdr; loop_count = cache_hdr->op_count; @@ -3971,14 +3972,14 @@ qla82xx_minidump_process_l2tag(scsi_qla_host_t *vha, static void qla82xx_minidump_process_l1cache(scsi_qla_host_t *vha, - qla82xx_md_entry_hdr_t *entry_hdr, uint32_t **d_ptr) + qla82xx_md_entry_hdr_t *entry_hdr, __le32 **d_ptr) { struct qla_hw_data *ha = vha->hw; uint32_t addr, r_addr, c_addr, t_r_addr; uint32_t i, k, loop_count, t_value, r_cnt, r_value; uint32_t c_value_w; struct qla82xx_md_entry_cache *cache_hdr; - uint32_t *data_ptr = *d_ptr; + __le32 *data_ptr = *d_ptr; cache_hdr = (struct qla82xx_md_entry_cache *)entry_hdr; loop_count = cache_hdr->op_count; @@ -4006,14 +4007,14 @@ qla82xx_minidump_process_l1cache(scsi_qla_host_t *vha, static void qla82xx_minidump_process_queue(scsi_qla_host_t *vha, - qla82xx_md_entry_hdr_t *entry_hdr, uint32_t **d_ptr) + qla82xx_md_entry_hdr_t *entry_hdr, __le32 **d_ptr) { struct qla_hw_data *ha = vha->hw; uint32_t s_addr, r_addr; uint32_t r_stride, r_value, r_cnt, qid = 0; uint32_t i, k, loop_cnt; struct qla82xx_md_entry_queue *q_hdr; - uint32_t *data_ptr = *d_ptr; + __le32 *data_ptr = *d_ptr; q_hdr = (struct qla82xx_md_entry_queue *)entry_hdr; s_addr = q_hdr->select_addr; @@ -4036,13 +4037,13 @@ qla82xx_minidump_process_queue(scsi_qla_host_t *vha, static void qla82xx_minidump_process_rdrom(scsi_qla_host_t *vha, - qla82xx_md_entry_hdr_t *entry_hdr, uint32_t **d_ptr) + qla82xx_md_entry_hdr_t *entry_hdr, __le32 **d_ptr) { struct qla_hw_data *ha = vha->hw; uint32_t r_addr, r_value; uint32_t i, loop_cnt; struct qla82xx_md_entry_rdrom *rom_hdr; - uint32_t *data_ptr = *d_ptr; + __le32 *data_ptr = *d_ptr; rom_hdr = (struct qla82xx_md_entry_rdrom *)entry_hdr; r_addr = rom_hdr->read_addr; @@ -4062,7 +4063,7 @@ qla82xx_minidump_process_rdrom(scsi_qla_host_t *vha, static int qla82xx_minidump_process_rdmem(scsi_qla_host_t *vha, - qla82xx_md_entry_hdr_t *entry_hdr, uint32_t **d_ptr) + qla82xx_md_entry_hdr_t *entry_hdr, __le32 **d_ptr) { struct qla_hw_data *ha = vha->hw; uint32_t r_addr, r_value, r_data; @@ -4070,7 +4071,7 @@ qla82xx_minidump_process_rdmem(scsi_qla_host_t *vha, struct qla82xx_md_entry_rdmem *m_hdr; unsigned long flags; int rval = QLA_FUNCTION_FAILED; - uint32_t *data_ptr = *d_ptr; + __le32 *data_ptr = *d_ptr; m_hdr = (struct qla82xx_md_entry_rdmem *)entry_hdr; r_addr = m_hdr->read_addr; @@ -4163,12 +4164,12 @@ qla82xx_md_collect(scsi_qla_host_t *vha) int no_entry_hdr = 0; qla82xx_md_entry_hdr_t *entry_hdr; struct qla82xx_md_template_hdr *tmplt_hdr; - uint32_t *data_ptr; + __le32 *data_ptr; uint32_t total_data_size = 0, f_capture_mask, data_collected = 0; int i = 0, rval = QLA_FUNCTION_FAILED; tmplt_hdr = (struct qla82xx_md_template_hdr *)ha->md_tmplt_hdr; - data_ptr = (uint32_t *)ha->md_dump; + data_ptr = ha->md_dump; if (ha->fw_dumped) { ql_log(ql_log_warn, vha, 0xb037, diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 85c369fed9c5..3f532a3df8e3 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -5763,7 +5763,8 @@ qla25xx_rdp_rsp_reduce_size(struct scsi_qla_host *vha, if (!pdb) { ql_dbg(ql_dbg_init, vha, 0x0181, "%s: Failed allocate pdb\n", __func__); - } else if (qla24xx_get_port_database(vha, purex->nport_handle, pdb)) { + } else if (qla24xx_get_port_database(vha, + le16_to_cpu(purex->nport_handle), pdb)) { ql_dbg(ql_dbg_init, vha, 0x0181, "%s: Failed get pdb sid=%x\n", __func__, sid); } else if (pdb->current_login_state != PDS_PLOGI_COMPLETE && @@ -5957,7 +5958,7 @@ void qla24xx_process_purex_rdp(struct scsi_qla_host *vha, void *pkt) rsp_els->entry_status = 0; rsp_els->handle = 0; rsp_els->nport_handle = purex->nport_handle; - rsp_els->tx_dsd_count = 1; + rsp_els->tx_dsd_count = cpu_to_le16(1); rsp_els->vp_index = purex->vp_idx; rsp_els->sof_type = EST_SOFI3; rsp_els->rx_xchg_address = purex->rx_xchg_addr; @@ -5968,7 +5969,7 @@ void qla24xx_process_purex_rdp(struct scsi_qla_host *vha, void *pkt) rsp_els->d_id[1] = purex->s_id[1]; rsp_els->d_id[2] = purex->s_id[2]; - rsp_els->control_flags = EPD_ELS_ACC; + rsp_els->control_flags = cpu_to_le16(EPD_ELS_ACC); rsp_els->rx_byte_count = 0; rsp_els->tx_byte_count = cpu_to_le32(rsp_payload_length); @@ -5980,8 +5981,8 @@ void qla24xx_process_purex_rdp(struct scsi_qla_host *vha, void *pkt) /* Prepare Response Payload */ rsp_payload->hdr.cmd = cpu_to_be32(0x2 << 24); /* LS_ACC */ - rsp_payload->hdr.len = cpu_to_be32( - rsp_els->tx_byte_count - sizeof(rsp_payload->hdr)); + rsp_payload->hdr.len = cpu_to_be32(le32_to_cpu(rsp_els->tx_byte_count) - + sizeof(rsp_payload->hdr)); /* Link service Request Info Descriptor */ rsp_payload->ls_req_info_desc.desc_tag = cpu_to_be32(0x1); @@ -6031,7 +6032,7 @@ void qla24xx_process_purex_rdp(struct scsi_qla_host *vha, void *pkt) memset(sfp, 0, SFP_RTDI_LEN); rval = qla2x00_read_sfp(vha, sfp_dma, sfp, 0xa2, 0x60, 10, 0); if (!rval) { - uint16_t *trx = (uint16_t *)sfp; /* already be16 */ + __be16 *trx = (__force __be16 *)sfp; /* already be16 */ rsp_payload->sfp_diag_desc.temperature = trx[0]; rsp_payload->sfp_diag_desc.vcc = trx[1]; rsp_payload->sfp_diag_desc.tx_bias = trx[2]; @@ -6058,17 +6059,17 @@ void qla24xx_process_purex_rdp(struct scsi_qla_host *vha, void *pkt) rval = qla24xx_get_isp_stats(vha, stat, stat_dma, 0); if (!rval) { rsp_payload->ls_err_desc.link_fail_cnt = - cpu_to_be32(stat->link_fail_cnt); + cpu_to_be32(le32_to_cpu(stat->link_fail_cnt)); rsp_payload->ls_err_desc.loss_sync_cnt = - cpu_to_be32(stat->loss_sync_cnt); + cpu_to_be32(le32_to_cpu(stat->loss_sync_cnt)); rsp_payload->ls_err_desc.loss_sig_cnt = - cpu_to_be32(stat->loss_sig_cnt); + cpu_to_be32(le32_to_cpu(stat->loss_sig_cnt)); rsp_payload->ls_err_desc.prim_seq_err_cnt = - cpu_to_be32(stat->prim_seq_err_cnt); + cpu_to_be32(le32_to_cpu(stat->prim_seq_err_cnt)); rsp_payload->ls_err_desc.inval_xmit_word_cnt = - cpu_to_be32(stat->inval_xmit_word_cnt); + cpu_to_be32(le32_to_cpu(stat->inval_xmit_word_cnt)); rsp_payload->ls_err_desc.inval_crc_cnt = - cpu_to_be32(stat->inval_crc_cnt); + cpu_to_be32(le32_to_cpu(stat->inval_crc_cnt)); rsp_payload->ls_err_desc.pn_port_phy_type |= BIT_6; } } @@ -6140,7 +6141,7 @@ void qla24xx_process_purex_rdp(struct scsi_qla_host *vha, void *pkt) memset(sfp, 0, SFP_RTDI_LEN); rval = qla2x00_read_sfp(vha, sfp_dma, sfp, 0xa2, 0, 64, 0); if (!rval) { - uint16_t *trx = (uint16_t *)sfp; /* already be16 */ + __be16 *trx = (__force __be16 *)sfp; /* already be16 */ /* Optical Element Descriptor, Temperature */ rsp_payload->optical_elmt_desc[0].high_alarm = trx[0]; diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c index 749b0c197d31..e161c05d7d82 100644 --- a/drivers/scsi/qla2xxx/qla_sup.c +++ b/drivers/scsi/qla2xxx/qla_sup.c @@ -183,7 +183,7 @@ qla2x00_nv_deselect(struct qla_hw_data *ha) * @data: word to program */ static void -qla2x00_write_nvram_word(struct qla_hw_data *ha, uint32_t addr, uint16_t data) +qla2x00_write_nvram_word(struct qla_hw_data *ha, uint32_t addr, __le16 data) { int count; uint16_t word; @@ -202,7 +202,7 @@ qla2x00_write_nvram_word(struct qla_hw_data *ha, uint32_t addr, uint16_t data) /* Write data */ nv_cmd = (addr << 16) | NV_WRITE_OP; - nv_cmd |= data; + nv_cmd |= (__force u16)data; nv_cmd <<= 5; for (count = 0; count < 27; count++) { if (nv_cmd & BIT_31) @@ -241,7 +241,7 @@ qla2x00_write_nvram_word(struct qla_hw_data *ha, uint32_t addr, uint16_t data) static int qla2x00_write_nvram_word_tmo(struct qla_hw_data *ha, uint32_t addr, - uint16_t data, uint32_t tmo) + __le16 data, uint32_t tmo) { int ret, count; uint16_t word; @@ -261,7 +261,7 @@ qla2x00_write_nvram_word_tmo(struct qla_hw_data *ha, uint32_t addr, /* Write data */ nv_cmd = (addr << 16) | NV_WRITE_OP; - nv_cmd |= data; + nv_cmd |= (__force u16)data; nv_cmd <<= 5; for (count = 0; count < 27; count++) { if (nv_cmd & BIT_31) @@ -308,7 +308,7 @@ qla2x00_clear_nvram_protection(struct qla_hw_data *ha) int ret, stat; struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; uint32_t word, wait_cnt; - uint16_t wprot, wprot_old; + __le16 wprot, wprot_old; scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev); /* Clear NVRAM write protection. */ @@ -318,7 +318,7 @@ qla2x00_clear_nvram_protection(struct qla_hw_data *ha) stat = qla2x00_write_nvram_word_tmo(ha, ha->nvram_base, cpu_to_le16(0x1234), 100000); wprot = cpu_to_le16(qla2x00_get_nvram_word(ha, ha->nvram_base)); - if (stat != QLA_SUCCESS || wprot != 0x1234) { + if (stat != QLA_SUCCESS || wprot != cpu_to_le16(0x1234)) { /* Write enable. */ qla2x00_nv_write(ha, NVR_DATA_OUT); qla2x00_nv_write(ha, 0); @@ -549,7 +549,8 @@ qla2xxx_find_flt_start(scsi_qla_host_t *vha, uint32_t *start) { const char *loc, *locations[] = { "DEF", "PCI" }; uint32_t pcihdr, pcids; - uint16_t cnt, chksum, *wptr; + uint16_t cnt, chksum; + __le16 *wptr; struct qla_hw_data *ha = vha->hw; struct req_que *req = ha->req_q_map[0]; struct qla_flt_location *fltl = (void *)req->ring; @@ -610,7 +611,7 @@ qla2xxx_find_flt_start(scsi_qla_host_t *vha, uint32_t *start) if (memcmp(fltl->sig, "QFLT", 4)) goto end; - wptr = (uint16_t *)req->ring; + wptr = (__force __le16 *)req->ring; cnt = sizeof(*fltl) / sizeof(*wptr); for (chksum = 0; cnt--; wptr++) chksum += le16_to_cpu(*wptr); @@ -671,7 +672,8 @@ qla2xxx_get_flt_info(scsi_qla_host_t *vha, uint32_t flt_addr) uint32_t def = IS_QLA81XX(ha) ? 2 : IS_QLA25XX(ha) ? 1 : 0; struct qla_flt_header *flt = ha->flt; struct qla_flt_region *region = &flt->region[0]; - uint16_t *wptr, cnt, chksum; + __le16 *wptr; + uint16_t cnt, chksum; uint32_t start; /* Assign FCP prio region since older adapters may not have FLT, or @@ -681,7 +683,7 @@ qla2xxx_get_flt_info(scsi_qla_host_t *vha, uint32_t flt_addr) fcp_prio_cfg0[def] : fcp_prio_cfg1[def]; ha->flt_region_flt = flt_addr; - wptr = (uint16_t *)ha->flt; + wptr = (__force __le16 *)ha->flt; ha->isp_ops->read_optrom(vha, flt, flt_addr << 2, (sizeof(struct qla_flt_header) + FLT_REGIONS_SIZE)); @@ -949,7 +951,7 @@ qla2xxx_get_fdt_info(scsi_qla_host_t *vha) struct qla_hw_data *ha = vha->hw; struct req_que *req = ha->req_q_map[0]; uint16_t cnt, chksum; - uint16_t *wptr = (uint16_t *)req->ring; + __le16 *wptr = (__force __le16 *)req->ring; struct qla_fdt_layout *fdt = (struct qla_fdt_layout *)req->ring; uint8_t man_id, flash_id; uint16_t mid = 0, fid = 0; @@ -1042,14 +1044,14 @@ static void qla2xxx_get_idc_param(scsi_qla_host_t *vha) { #define QLA82XX_IDC_PARAM_ADDR 0x003e885c - uint32_t *wptr; + __le32 *wptr; struct qla_hw_data *ha = vha->hw; struct req_que *req = ha->req_q_map[0]; if (!(IS_P3P_TYPE(ha))) return; - wptr = (uint32_t *)req->ring; + wptr = (__force __le32 *)req->ring; ha->isp_ops->read_optrom(vha, req->ring, QLA82XX_IDC_PARAM_ADDR, 8); if (*wptr == cpu_to_le32(0xffffffff)) { @@ -1095,7 +1097,7 @@ qla2xxx_flash_npiv_conf(scsi_qla_host_t *vha) { #define NPIV_CONFIG_SIZE (16*1024) void *data; - uint16_t *wptr; + __le16 *wptr; uint16_t cnt, chksum; int i; struct qla_npiv_header hdr; @@ -1265,7 +1267,7 @@ qla24xx_erase_sector(scsi_qla_host_t *vha, uint32_t fdata) } static int -qla24xx_write_flash_data(scsi_qla_host_t *vha, uint32_t *dwptr, uint32_t faddr, +qla24xx_write_flash_data(scsi_qla_host_t *vha, __le32 *dwptr, uint32_t faddr, uint32_t dwords) { int ret; @@ -1352,7 +1354,7 @@ qla24xx_write_flash_data(scsi_qla_host_t *vha, uint32_t *dwptr, uint32_t faddr, /* Slow write */ ret = qla24xx_write_flash_dword(ha, - flash_data_addr(ha, faddr), cpu_to_le32(*dwptr)); + flash_data_addr(ha, faddr), le32_to_cpu(*dwptr)); if (ret) { ql_dbg(ql_dbg_user, vha, 0x7006, "Failed slopw write %x (%x)\n", faddr, *dwptr); @@ -1379,11 +1381,11 @@ qla2x00_read_nvram_data(scsi_qla_host_t *vha, void *buf, uint32_t naddr, uint32_t bytes) { uint32_t i; - uint16_t *wptr; + __le16 *wptr; struct qla_hw_data *ha = vha->hw; /* Word reads to NVRAM via registers. */ - wptr = (uint16_t *)buf; + wptr = buf; qla2x00_lock_nvram_access(ha); for (i = 0; i < bytes >> 1; i++, naddr++) wptr[i] = cpu_to_le16(qla2x00_get_nvram_word(ha, @@ -1456,7 +1458,7 @@ qla24xx_write_nvram_data(scsi_qla_host_t *vha, void *buf, uint32_t naddr, { struct qla_hw_data *ha = vha->hw; struct device_reg_24xx __iomem *reg = &ha->iobase->isp24; - uint32_t *dwptr = buf; + __le32 *dwptr = buf; uint32_t i; int ret; @@ -1478,7 +1480,7 @@ qla24xx_write_nvram_data(scsi_qla_host_t *vha, void *buf, uint32_t naddr, naddr = nvram_data_addr(ha, naddr); bytes >>= 2; for (i = 0; i < bytes; i++, naddr++, dwptr++) { - if (qla24xx_write_flash_dword(ha, naddr, cpu_to_le32(*dwptr))) { + if (qla24xx_write_flash_dword(ha, naddr, le32_to_cpu(*dwptr))) { ql_dbg(ql_dbg_user, vha, 0x709a, "Unable to program nvram address=%x data=%x.\n", naddr, *dwptr); @@ -2662,7 +2664,7 @@ qla28xx_get_flash_region(struct scsi_qla_host *vha, uint32_t start, cnt = le16_to_cpu(flt->length) / sizeof(struct qla_flt_region); for (; cnt; cnt--, flt_reg++) { - if (flt_reg->start == start) { + if (le32_to_cpu(flt_reg->start) == start) { memcpy((uint8_t *)region, flt_reg, sizeof(struct qla_flt_region)); rval = QLA_SUCCESS; @@ -2691,7 +2693,7 @@ qla28xx_write_flash_data(scsi_qla_host_t *vha, uint32_t *dwptr, uint32_t faddr, struct qla_flt_region region; bool reset_to_rom = false; uint32_t risc_size, risc_attr = 0; - uint32_t *fw_array = NULL; + __be32 *fw_array = NULL; /* Retrieve region info - must be a start address passed in */ rval = qla28xx_get_flash_region(vha, offset, ®ion); @@ -2722,12 +2724,12 @@ qla28xx_write_flash_data(scsi_qla_host_t *vha, uint32_t *dwptr, uint32_t faddr, ql_log(ql_log_warn + ql_dbg_verbose, vha, 0xffff, "Region %x is secure\n", region.code); - switch (region.code) { + switch (le16_to_cpu(region.code)) { case FLT_REG_FW: case FLT_REG_FW_SEC_27XX: case FLT_REG_MPI_PRI_28XX: case FLT_REG_MPI_SEC_28XX: - fw_array = dwptr; + fw_array = (__force __be32 *)dwptr; /* 1st fw array */ risc_size = be32_to_cpu(fw_array[3]); @@ -2761,7 +2763,7 @@ qla28xx_write_flash_data(scsi_qla_host_t *vha, uint32_t *dwptr, uint32_t faddr, case FLT_REG_PEP_PRI_28XX: case FLT_REG_PEP_SEC_28XX: - fw_array = dwptr; + fw_array = (__force __be32 *)dwptr; /* 1st fw array */ risc_size = be32_to_cpu(fw_array[3]); @@ -2892,7 +2894,8 @@ qla28xx_write_flash_data(scsi_qla_host_t *vha, uint32_t *dwptr, uint32_t faddr, if (region.attribute && buf_size_without_sfub) { ql_log(ql_log_warn + ql_dbg_verbose, vha, 0xffff, "Sending Secure Flash MB Cmd\n"); - rval = qla28xx_secure_flash_update(vha, 0, region.code, + rval = qla28xx_secure_flash_update(vha, 0, + le16_to_cpu(region.code), buf_size_without_sfub, sfub_dma, sizeof(struct secure_flash_update_block) >> 2); if (rval != QLA_SUCCESS) { @@ -2981,11 +2984,11 @@ qla24xx_write_optrom_data(struct scsi_qla_host *vha, void *buf, /* Go with write. */ if (IS_QLA28XX(ha)) - rval = qla28xx_write_flash_data(vha, (uint32_t *)buf, - offset >> 2, length >> 2); + rval = qla28xx_write_flash_data(vha, buf, offset >> 2, + length >> 2); else - rval = qla24xx_write_flash_data(vha, (uint32_t *)buf, - offset >> 2, length >> 2); + rval = qla24xx_write_flash_data(vha, buf, offset >> 2, + length >> 2); clear_bit(MBX_UPDATE_FLASH_ACTIVE, &ha->mbx_cmd_flags); scsi_unblock_requests(vha->host); @@ -3513,7 +3516,8 @@ qla24xx_get_flash_version(scsi_qla_host_t *vha, void *mbuf) ql_dump_buffer(ql_dbg_init, vha, 0x005f, dcode, 32); } else { for (i = 0; i < 4; i++) - ha->fw_revision[i] = be32_to_cpu(dcode[4+i]); + ha->fw_revision[i] = + be32_to_cpu((__force __be32)dcode[4+i]); ql_dbg(ql_dbg_init, vha, 0x0060, "Firmware revision (flash) %u.%u.%u (%x).\n", ha->fw_revision[0], ha->fw_revision[1], @@ -3537,7 +3541,8 @@ qla24xx_get_flash_version(scsi_qla_host_t *vha, void *mbuf) } for (i = 0; i < 4; i++) - ha->gold_fw_version[i] = be32_to_cpu(dcode[4+i]); + ha->gold_fw_version[i] = + be32_to_cpu((__force __be32)dcode[4+i]); return ret; } diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 77f976555159..fbb80a043b4f 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -378,7 +378,7 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha, qlt_issue_marker(vha, ha_locked); if ((entry->u.isp24.vp_index != 0xFF) && - (entry->u.isp24.nport_handle != 0xFFFF)) { + (entry->u.isp24.nport_handle != cpu_to_le16(0xFFFF))) { host = qlt_find_host_by_vp_idx(vha, entry->u.isp24.vp_index); if (unlikely(!host)) { @@ -1697,7 +1697,7 @@ static void qlt_send_notify_ack(struct qla_qpair *qpair, nack->u.isp24.nport_handle = ntfy->u.isp24.nport_handle; if (le16_to_cpu(ntfy->u.isp24.status) == IMM_NTFY_ELS) { nack->u.isp24.flags = ntfy->u.isp24.flags & - cpu_to_le32(NOTIFY24XX_FLAGS_PUREX_IOCB); + cpu_to_le16(NOTIFY24XX_FLAGS_PUREX_IOCB); } nack->u.isp24.srr_rx_id = ntfy->u.isp24.srr_rx_id; nack->u.isp24.status = ntfy->u.isp24.status; @@ -1725,7 +1725,8 @@ static int qlt_build_abts_resp_iocb(struct qla_tgt_mgmt_cmd *mcmd) struct scsi_qla_host *vha = mcmd->vha; struct qla_hw_data *ha = vha->hw; struct abts_resp_to_24xx *resp; - uint32_t f_ctl, h; + __le32 f_ctl; + uint32_t h; uint8_t *p; int rc; struct abts_recv_from_24xx *abts = &mcmd->orig_iocb.abts; @@ -1782,7 +1783,7 @@ static int qlt_build_abts_resp_iocb(struct qla_tgt_mgmt_cmd *mcmd) resp->fcp_hdr_le.r_ctl = R_CTL_BASIC_LINK_SERV | R_CTL_B_ACC; resp->payload.ba_acct.seq_id_valid = SEQ_ID_INVALID; resp->payload.ba_acct.low_seq_cnt = 0x0000; - resp->payload.ba_acct.high_seq_cnt = 0xFFFF; + resp->payload.ba_acct.high_seq_cnt = cpu_to_le16(0xFFFF); resp->payload.ba_acct.ox_id = abts->fcp_hdr_le.ox_id; resp->payload.ba_acct.rx_id = abts->fcp_hdr_le.rx_id; } else { @@ -1814,7 +1815,7 @@ static void qlt_24xx_send_abts_resp(struct qla_qpair *qpair, struct scsi_qla_host *vha = qpair->vha; struct qla_hw_data *ha = vha->hw; struct abts_resp_to_24xx *resp; - uint32_t f_ctl; + __le32 f_ctl; uint8_t *p; ql_dbg(ql_dbg_tgt, vha, 0xe006, @@ -1857,7 +1858,7 @@ static void qlt_24xx_send_abts_resp(struct qla_qpair *qpair, resp->fcp_hdr_le.r_ctl = R_CTL_BASIC_LINK_SERV | R_CTL_B_ACC; resp->payload.ba_acct.seq_id_valid = SEQ_ID_INVALID; resp->payload.ba_acct.low_seq_cnt = 0x0000; - resp->payload.ba_acct.high_seq_cnt = 0xFFFF; + resp->payload.ba_acct.high_seq_cnt = cpu_to_le16(0xFFFF); resp->payload.ba_acct.ox_id = abts->fcp_hdr_le.ox_id; resp->payload.ba_acct.rx_id = abts->fcp_hdr_le.rx_id; } else { @@ -2030,7 +2031,7 @@ static void qlt_do_tmr_work(struct work_struct *work) switch (mcmd->tmr_func) { case QLA_TGT_ABTS: - tag = mcmd->orig_iocb.abts.exchange_addr_to_abort; + tag = le32_to_cpu(mcmd->orig_iocb.abts.exchange_addr_to_abort); break; default: tag = 0; @@ -2110,7 +2111,7 @@ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha, struct qla_tgt_cmd *abort_cmd; abort_cmd = ha->tgt.tgt_ops->find_cmd_by_tag(sess, - abts->exchange_addr_to_abort); + le32_to_cpu(abts->exchange_addr_to_abort)); if (abort_cmd && abort_cmd->qpair) { mcmd->qpair = abort_cmd->qpair; mcmd->se_cmd.cpuid = abort_cmd->se_cmd.cpuid; @@ -2133,7 +2134,7 @@ static void qlt_24xx_handle_abts(struct scsi_qla_host *vha, { struct qla_hw_data *ha = vha->hw; struct fc_port *sess; - uint32_t tag = abts->exchange_addr_to_abort; + uint32_t tag = le32_to_cpu(abts->exchange_addr_to_abort); be_id_t s_id; int rc; unsigned long flags; @@ -2223,7 +2224,7 @@ static void qlt_24xx_send_task_mgmt_ctio(struct qla_qpair *qpair, ctio->entry_type = CTIO_TYPE7; ctio->entry_count = 1; ctio->handle = QLA_TGT_SKIP_HANDLE | CTIO_COMPLETION_HANDLE_MARK; - ctio->nport_handle = mcmd->sess->loop_id; + ctio->nport_handle = cpu_to_le16(mcmd->sess->loop_id); ctio->timeout = cpu_to_le16(QLA_TGT_TIMEOUT); ctio->vp_index = ha->vp_idx; ctio->initiator_id = be_id_to_le(atio->u.isp24.fcp_hdr.s_id); @@ -2280,7 +2281,7 @@ void qlt_send_resp_ctio(struct qla_qpair *qpair, struct qla_tgt_cmd *cmd, ctio->entry_type = CTIO_TYPE7; ctio->entry_count = 1; ctio->handle = QLA_TGT_SKIP_HANDLE; - ctio->nport_handle = cmd->sess->loop_id; + ctio->nport_handle = cpu_to_le16(cmd->sess->loop_id); ctio->timeout = cpu_to_le16(QLA_TGT_TIMEOUT); ctio->vp_index = vha->vp_idx; ctio->initiator_id = be_id_to_le(atio->u.isp24.fcp_hdr.s_id); @@ -2840,10 +2841,14 @@ static void qlt_24xx_init_ctio_to_isp(struct ctio7_to_24xx *ctio, cpu_to_le16(SS_SENSE_LEN_VALID); ctio->u.status1.sense_length = cpu_to_le16(prm->sense_buffer_len); - for (i = 0; i < prm->sense_buffer_len/4; i++) - ((uint32_t *)ctio->u.status1.sense_data)[i] = - cpu_to_be32(((uint32_t *)prm->sense_buffer)[i]); + for (i = 0; i < prm->sense_buffer_len/4; i++) { + uint32_t v; + v = get_unaligned_be32( + &((uint32_t *)prm->sense_buffer)[i]); + put_unaligned_le32(v, + &((uint32_t *)ctio->u.status1.sense_data)[i]); + } qlt_print_dif_err(prm); } else { @@ -3114,7 +3119,7 @@ qlt_build_ctio_crc2_pkt(struct qla_qpair *qpair, struct qla_tgt_prm *prm) else if (cmd->dma_data_direction == DMA_FROM_DEVICE) pkt->flags = cpu_to_le16(CTIO7_FLAGS_DATA_OUT); - pkt->dseg_count = prm->tot_dsds; + pkt->dseg_count = cpu_to_le16(prm->tot_dsds); /* Fibre channel byte count */ pkt->transfer_length = cpu_to_le32(transfer_length); @@ -3136,7 +3141,7 @@ qlt_build_ctio_crc2_pkt(struct qla_qpair *qpair, struct qla_tgt_prm *prm) qla_tgt_set_dif_tags(cmd, crc_ctx_pkt, &fw_prot_opts); put_unaligned_le64(crc_ctx_dma, &pkt->crc_context_address); - pkt->crc_context_len = CRC_CONTEXT_LEN_FW; + pkt->crc_context_len = cpu_to_le16(CRC_CONTEXT_LEN_FW); if (!bundling) { cur_dsd = &crc_ctx_pkt->u.nobundling.data_dsd[0]; @@ -3573,7 +3578,7 @@ static int __qlt_send_term_imm_notif(struct scsi_qla_host *vha, nack->u.isp24.nport_handle = ntfy->u.isp24.nport_handle; if (le16_to_cpu(ntfy->u.isp24.status) == IMM_NTFY_ELS) { nack->u.isp24.flags = ntfy->u.isp24.flags & - __constant_cpu_to_le32(NOTIFY24XX_FLAGS_PUREX_IOCB); + cpu_to_le16(NOTIFY24XX_FLAGS_PUREX_IOCB); } /* terminate */ @@ -3647,7 +3652,7 @@ static int __qlt_send_term_exchange(struct qla_qpair *qpair, ctio24 = (struct ctio7_to_24xx *)pkt; ctio24->entry_type = CTIO_TYPE7; - ctio24->nport_handle = CTIO7_NHANDLE_UNRECOGNIZED; + ctio24->nport_handle = cpu_to_le16(CTIO7_NHANDLE_UNRECOGNIZED); ctio24->timeout = cpu_to_le16(QLA_TGT_TIMEOUT); ctio24->vp_index = vha->vp_idx; ctio24->initiator_id = be_id_to_le(atio->u.isp24.fcp_hdr.s_id); @@ -4110,7 +4115,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd) spin_lock_init(&cmd->cmd_lock); cdb = &atio->u.isp24.fcp_cmnd.cdb[0]; - cmd->se_cmd.tag = atio->u.isp24.exchange_addr; + cmd->se_cmd.tag = le32_to_cpu(atio->u.isp24.exchange_addr); if (atio->u.isp24.fcp_cmnd.rddata && atio->u.isp24.fcp_cmnd.wrdata) { @@ -5302,7 +5307,7 @@ static int __qlt_send_busy(struct qla_qpair *qpair, ctio24 = (struct ctio7_to_24xx *)pkt; ctio24->entry_type = CTIO_TYPE7; - ctio24->nport_handle = sess->loop_id; + ctio24->nport_handle = cpu_to_le16(sess->loop_id); ctio24->timeout = cpu_to_le16(QLA_TGT_TIMEOUT); ctio24->vp_index = vha->vp_idx; ctio24->initiator_id = be_id_to_le(atio->u.isp24.fcp_hdr.s_id); @@ -5315,13 +5320,14 @@ static int __qlt_send_busy(struct qla_qpair *qpair, * CTIO from fw w/o se_cmd doesn't provide enough info to retry it, * if the explicit conformation is used. */ - ctio24->u.status1.ox_id = swab16(atio->u.isp24.fcp_hdr.ox_id); + ctio24->u.status1.ox_id = + cpu_to_le16(be16_to_cpu(atio->u.isp24.fcp_hdr.ox_id)); ctio24->u.status1.scsi_status = cpu_to_le16(status); - ctio24->u.status1.residual = get_datalen_for_atio(atio); + ctio24->u.status1.residual = cpu_to_le32(get_datalen_for_atio(atio)); if (ctio24->u.status1.residual != 0) - ctio24->u.status1.scsi_status |= SS_RESIDUAL_UNDER; + ctio24->u.status1.scsi_status |= cpu_to_le16(SS_RESIDUAL_UNDER); /* Memory Barrier */ wmb(); @@ -5550,7 +5556,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha, switch (atio->u.raw.entry_type) { case ATIO_TYPE7: if (unlikely(atio->u.isp24.exchange_addr == - ATIO_EXCHANGE_ADDRESS_UNKNOWN)) { + cpu_to_le32(ATIO_EXCHANGE_ADDRESS_UNKNOWN))) { ql_dbg(ql_dbg_io, vha, 0x3065, "qla_target(%d): ATIO_TYPE7 " "received with UNKNOWN exchange address, " @@ -5713,8 +5719,8 @@ static void qlt_handle_abts_completion(struct scsi_qla_host *vha, entry->compl_status); if (le16_to_cpu(entry->compl_status) != ABTS_RESP_COMPL_SUCCESS) { - if ((entry->error_subcode1 == 0x1E) && - (entry->error_subcode2 == 0)) { + if (le32_to_cpu(entry->error_subcode1) == 0x1E && + le32_to_cpu(entry->error_subcode2) == 0) { if (qlt_chk_unresolv_exchg(vha, rsp->qpair, entry)) { ha->tgt.tgt_ops->free_mcmd(mcmd); return; @@ -5928,8 +5934,7 @@ void qlt_async_event(uint16_t code, struct scsi_qla_host *vha, ql_dbg(ql_dbg_tgt_mgt, vha, 0xf03b, "qla_target(%d): Async LOOP_UP occurred " "(m[0]=%x, m[1]=%x, m[2]=%x, m[3]=%x)", vha->vp_idx, - le16_to_cpu(mailbox[0]), le16_to_cpu(mailbox[1]), - le16_to_cpu(mailbox[2]), le16_to_cpu(mailbox[3])); + mailbox[0], mailbox[1], mailbox[2], mailbox[3]); if (tgt->link_reinit_iocb_pending) { qlt_send_notify_ack(ha->base_qpair, &tgt->link_reinit_iocb, @@ -5946,18 +5951,16 @@ void qlt_async_event(uint16_t code, struct scsi_qla_host *vha, ql_dbg(ql_dbg_tgt_mgt, vha, 0xf03c, "qla_target(%d): Async event %#x occurred " "(m[0]=%x, m[1]=%x, m[2]=%x, m[3]=%x)", vha->vp_idx, code, - le16_to_cpu(mailbox[0]), le16_to_cpu(mailbox[1]), - le16_to_cpu(mailbox[2]), le16_to_cpu(mailbox[3])); + mailbox[0], mailbox[1], mailbox[2], mailbox[3]); break; case MBA_REJECTED_FCP_CMD: ql_dbg(ql_dbg_tgt_mgt, vha, 0xf017, "qla_target(%d): Async event LS_REJECT occurred (m[0]=%x, m[1]=%x, m[2]=%x, m[3]=%x)", vha->vp_idx, - le16_to_cpu(mailbox[0]), le16_to_cpu(mailbox[1]), - le16_to_cpu(mailbox[2]), le16_to_cpu(mailbox[3])); + mailbox[0], mailbox[1], mailbox[2], mailbox[3]); - if (le16_to_cpu(mailbox[3]) == 1) { + if (mailbox[3] == 1) { /* exchange starvation. */ vha->hw->exch_starvation++; if (vha->hw->exch_starvation > 5) { @@ -5981,10 +5984,9 @@ void qlt_async_event(uint16_t code, struct scsi_qla_host *vha, "qla_target(%d): Port update async event %#x " "occurred: updating the ports database (m[0]=%x, m[1]=%x, " "m[2]=%x, m[3]=%x)", vha->vp_idx, code, - le16_to_cpu(mailbox[0]), le16_to_cpu(mailbox[1]), - le16_to_cpu(mailbox[2]), le16_to_cpu(mailbox[3])); + mailbox[0], mailbox[1], mailbox[2], mailbox[3]); - login_code = le16_to_cpu(mailbox[2]); + login_code = mailbox[2]; if (login_code == 0x4) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf03e, "Async MB 2: Got PLOGI Complete\n"); @@ -6734,7 +6736,7 @@ qlt_init_atio_q_entries(struct scsi_qla_host *vha) return; for (cnt = 0; cnt < ha->tgt.atio_q_length; cnt++) { - pkt->u.raw.signature = ATIO_PROCESSED; + pkt->u.raw.signature = cpu_to_le32(ATIO_PROCESSED); pkt++; } @@ -6769,7 +6771,7 @@ qlt_24xx_process_atio_queue(struct scsi_qla_host *vha, uint8_t ha_locked) "corrupted fcp frame SID[%3phN] OXID[%04x] EXCG[%x] %64phN\n", &pkt->u.isp24.fcp_hdr.s_id, be16_to_cpu(pkt->u.isp24.fcp_hdr.ox_id), - le32_to_cpu(pkt->u.isp24.exchange_addr), pkt); + pkt->u.isp24.exchange_addr, pkt); adjust_corrupted_atio(pkt); qlt_send_term_exchange(ha->base_qpair, NULL, pkt, @@ -6787,7 +6789,7 @@ qlt_24xx_process_atio_queue(struct scsi_qla_host *vha, uint8_t ha_locked) } else ha->tgt.atio_ring_ptr++; - pkt->u.raw.signature = ATIO_PROCESSED; + pkt->u.raw.signature = cpu_to_le32(ATIO_PROCESSED); pkt = (struct atio_from_isp *)ha->tgt.atio_ring_ptr; } wmb(); @@ -6816,10 +6818,10 @@ qlt_24xx_config_rings(struct scsi_qla_host *vha) if (IS_QLA2071(ha)) { /* 4 ports Baker: Enable Interrupt Handshake */ icb->msix_atio = 0; - icb->firmware_options_2 |= BIT_26; + icb->firmware_options_2 |= cpu_to_le32(BIT_26); } else { icb->msix_atio = cpu_to_le16(msix->entry); - icb->firmware_options_2 &= ~BIT_26; + icb->firmware_options_2 &= cpu_to_le32(~BIT_26); } ql_dbg(ql_dbg_init, vha, 0xf072, "Registering ICB vector 0x%x for atio que.\n", @@ -6829,7 +6831,7 @@ qlt_24xx_config_rings(struct scsi_qla_host *vha) /* INTx|MSI */ if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) { icb->msix_atio = 0; - icb->firmware_options_2 |= BIT_26; + icb->firmware_options_2 |= cpu_to_le32(BIT_26); ql_dbg(ql_dbg_init, vha, 0xf072, "%s: Use INTx for ATIOQ.\n", __func__); } diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c index f05a4fa2b9d7..8dc82cfd38b2 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.c +++ b/drivers/scsi/qla2xxx/qla_tmpl.c @@ -919,9 +919,9 @@ static void qla27xx_firmware_info(struct scsi_qla_host *vha, struct qla27xx_fwdt_template *tmp) { - tmp->firmware_version[0] = vha->hw->fw_major_version; - tmp->firmware_version[1] = vha->hw->fw_minor_version; - tmp->firmware_version[2] = vha->hw->fw_subminor_version; + tmp->firmware_version[0] = cpu_to_le32(vha->hw->fw_major_version); + tmp->firmware_version[1] = cpu_to_le32(vha->hw->fw_minor_version); + tmp->firmware_version[2] = cpu_to_le32(vha->hw->fw_subminor_version); tmp->firmware_version[3] = cpu_to_le32( vha->hw->fw_attributes_h << 16 | vha->hw->fw_attributes); tmp->firmware_version[4] = cpu_to_le32( From 4dea170f4fb225984b4f2f1cf0a41d485177b905 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Mon, 18 May 2020 15:44:20 +0800 Subject: [PATCH 423/562] scsi: core: Fix incorrect usage of shost_for_each_device shost_for_each_device(sdev, shost) \ for ((sdev) = __scsi_iterate_devices((shost), NULL); \ (sdev); \ (sdev) = __scsi_iterate_devices((shost), (sdev))) When terminating shost_for_each_device() iteration with break or return, scsi_device_put() should be used to prevent stale scsi device references from being left behind. Link: https://lore.kernel.org/r/20200518074420.39275-1-yebin10@huawei.com Reviewed-by: Bart Van Assche Signed-off-by: Ye Bin Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_error.c | 2 ++ drivers/scsi/scsi_lib.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 978be1602f71..927b1e641842 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1412,6 +1412,7 @@ static int scsi_eh_stu(struct Scsi_Host *shost, sdev_printk(KERN_INFO, sdev, "%s: skip START_UNIT, past eh deadline\n", current->comm)); + scsi_device_put(sdev); break; } stu_scmd = NULL; @@ -1478,6 +1479,7 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host *shost, sdev_printk(KERN_INFO, sdev, "%s: skip BDR, past eh deadline\n", current->comm)); + scsi_device_put(sdev); break; } bdr_scmd = NULL; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 01d229ea4e1c..4ed694b9f99b 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2835,8 +2835,10 @@ scsi_host_unblock(struct Scsi_Host *shost, int new_state) shost_for_each_device(sdev, shost) { ret = scsi_internal_device_unblock(sdev, new_state); - if (ret) + if (ret) { + scsi_device_put(sdev); break; + } } return ret; } From 840e1b55bb752c6617a85eb01d15432bebe8c559 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Mon, 18 May 2020 15:47:32 +0800 Subject: [PATCH 424/562] scsi: core: Refactor scsi_mq_setup_tags function shost->tag_set is used too many times, introduce temporary parameter tag_set instead of &shost->tag_set. Link: https://lore.kernel.org/r/20200518074732.39679-1-yebin10@huawei.com Reviewed-by: Bart Van Assche Signed-off-by: Ye Bin Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 4ed694b9f99b..c163fa22267c 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1849,6 +1849,7 @@ struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev) int scsi_mq_setup_tags(struct Scsi_Host *shost) { unsigned int cmd_size, sgl_size; + struct blk_mq_tag_set *tag_set = &shost->tag_set; sgl_size = max_t(unsigned int, sizeof(struct scatterlist), scsi_mq_inline_sgl_size(shost)); @@ -1857,21 +1858,21 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost) cmd_size += sizeof(struct scsi_data_buffer) + sizeof(struct scatterlist) * SCSI_INLINE_PROT_SG_CNT; - memset(&shost->tag_set, 0, sizeof(shost->tag_set)); + memset(tag_set, 0, sizeof(*tag_set)); if (shost->hostt->commit_rqs) - shost->tag_set.ops = &scsi_mq_ops; + tag_set->ops = &scsi_mq_ops; else - shost->tag_set.ops = &scsi_mq_ops_no_commit; - shost->tag_set.nr_hw_queues = shost->nr_hw_queues ? : 1; - shost->tag_set.queue_depth = shost->can_queue; - shost->tag_set.cmd_size = cmd_size; - shost->tag_set.numa_node = NUMA_NO_NODE; - shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - shost->tag_set.flags |= + tag_set->ops = &scsi_mq_ops_no_commit; + tag_set->nr_hw_queues = shost->nr_hw_queues ? : 1; + tag_set->queue_depth = shost->can_queue; + tag_set->cmd_size = cmd_size; + tag_set->numa_node = NUMA_NO_NODE; + tag_set->flags = BLK_MQ_F_SHOULD_MERGE; + tag_set->flags |= BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy); - shost->tag_set.driver_data = shost; + tag_set->driver_data = shost; - return blk_mq_alloc_tag_set(&shost->tag_set); + return blk_mq_alloc_tag_set(tag_set); } void scsi_mq_destroy_tags(struct Scsi_Host *shost) From b6ff8ca733500a7394d926c74ac20b428b225db7 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Tue, 12 May 2020 21:39:43 -0400 Subject: [PATCH 425/562] scsi: scsi_debug: Parser tables and code interaction This patch is in response to a static analyser report from Dan Carpenter titled: "[bug report] scsi: scsi_debug: Add per_host_store option". This code may not clear the static analyzer reports, but may shed light on why they occur. Amongst other things this driver has a table driven SCSI command parser which also involves some C code. There are some invariants between the table entries and the corresponding C code (i.e. the resp_*() functions) that, if broken, may lead to a NULL dereference. And the report is valid, at least in the case of the PRE-FETCH command. Alas, that is not one of the cases that the static analyzer reported. In this particular corner case: when the fake_rw flag is set and the table entry for a "store"-accessing command does not have the required F_FAKE_RW flag set, do the following. Call BUG_ON() in the devip2sip() very close to a comment block explaining why it was called and how to fix it. checkpatch.pl complains about the BUG_ON() but there is no reasonable remedial action that can be taken at run time. This change allows the code reported by the static analyzer to be simplified. Comments were also added to the table flags (e.g. F_FAKE_RW) so developers who add commands might be more inclined to use them (properly). Link: https://lore.kernel.org/r/20200513013943.25285-1-dgilbert@interlog.com Reported-by: Dan Carpenter Signed-off-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 111 +++++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 49 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 73847366dc49..843cccb38cb7 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -236,21 +236,23 @@ static const char *sdebug_version_date = "20200421"; #define SDEBUG_CANQUEUE (SDEBUG_CANQUEUE_WORDS * BITS_PER_LONG) #define DEF_CMD_PER_LUN 255 -#define F_D_IN 1 -#define F_D_OUT 2 +/* UA - Unit Attention; SA - Service Action; SSU - Start Stop Unit */ +#define F_D_IN 1 /* Data-in command (e.g. READ) */ +#define F_D_OUT 2 /* Data-out command (e.g. WRITE) */ #define F_D_OUT_MAYBE 4 /* WRITE SAME, NDOB bit */ #define F_D_UNKN 8 -#define F_RL_WLUN_OK 0x10 -#define F_SKIP_UA 0x20 -#define F_DELAY_OVERR 0x40 -#define F_SA_LOW 0x80 /* cdb byte 1, bits 4 to 0 */ -#define F_SA_HIGH 0x100 /* as used by variable length cdbs */ -#define F_INV_OP 0x200 -#define F_FAKE_RW 0x400 -#define F_M_ACCESS 0x800 /* media access */ -#define F_SSU_DELAY 0x1000 -#define F_SYNC_DELAY 0x2000 +#define F_RL_WLUN_OK 0x10 /* allowed with REPORT LUNS W-LUN */ +#define F_SKIP_UA 0x20 /* bypass UAs (e.g. INQUIRY command) */ +#define F_DELAY_OVERR 0x40 /* for commands like INQUIRY */ +#define F_SA_LOW 0x80 /* SA is in cdb byte 1, bits 4 to 0 */ +#define F_SA_HIGH 0x100 /* SA is in cdb bytes 8 and 9 */ +#define F_INV_OP 0x200 /* invalid opcode (not supported) */ +#define F_FAKE_RW 0x400 /* bypass resp_*() when fake_rw set */ +#define F_M_ACCESS 0x800 /* media access, reacts to SSU state */ +#define F_SSU_DELAY 0x1000 /* SSU command delay (long-ish) */ +#define F_SYNC_DELAY 0x2000 /* SYNCHRONIZE CACHE delay */ +/* Useful combinations of the above flags */ #define FF_RESPOND (F_RL_WLUN_OK | F_SKIP_UA | F_DELAY_OVERR) #define FF_MEDIA_IO (F_M_ACCESS | F_FAKE_RW) #define FF_SA (F_SA_HIGH | F_SA_LOW) @@ -607,25 +609,25 @@ static const struct opcode_info_t sync_cache_iarr[] = { }; static const struct opcode_info_t pre_fetch_iarr[] = { - {0, 0x90, 0, F_SYNC_DELAY | F_M_ACCESS, resp_pre_fetch, NULL, + {0, 0x90, 0, F_SYNC_DELAY | FF_MEDIA_IO, resp_pre_fetch, NULL, {16, 0x2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xc7} }, /* PRE-FETCH (16) */ }; static const struct opcode_info_t zone_out_iarr[] = { /* ZONE OUT(16) */ - {0, 0x94, 0x1, F_SA_LOW, resp_close_zone, NULL, + {0, 0x94, 0x1, F_SA_LOW | F_M_ACCESS, resp_close_zone, NULL, {16, 0x1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0xff, 0xff, 0x1, 0xc7} }, /* CLOSE ZONE */ - {0, 0x94, 0x2, F_SA_LOW, resp_finish_zone, NULL, + {0, 0x94, 0x2, F_SA_LOW | F_M_ACCESS, resp_finish_zone, NULL, {16, 0x2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0xff, 0xff, 0x1, 0xc7} }, /* FINISH ZONE */ - {0, 0x94, 0x4, F_SA_LOW, resp_rwp_zone, NULL, + {0, 0x94, 0x4, F_SA_LOW | F_M_ACCESS, resp_rwp_zone, NULL, {16, 0x4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0xff, 0xff, 0x1, 0xc7} }, /* RESET WRITE POINTER */ }; static const struct opcode_info_t zone_in_iarr[] = { /* ZONE IN(16) */ - {0, 0x95, 0x6, F_SA_LOW | F_D_IN, NULL, NULL, + {0, 0x95, 0x6, F_SA_LOW | F_D_IN | F_M_ACCESS, NULL, NULL, {16, 0x6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xc7} }, /* REPORT ZONES */ }; @@ -726,17 +728,17 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEM_P1 + 1] = { {0, 0x89, 0, F_D_OUT | FF_MEDIA_IO, resp_comp_write, NULL, {16, 0xf8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0xff, 0x3f, 0xc7} }, /* COMPARE AND WRITE */ - {ARRAY_SIZE(pre_fetch_iarr), 0x34, 0, F_SYNC_DELAY | F_M_ACCESS, + {ARRAY_SIZE(pre_fetch_iarr), 0x34, 0, F_SYNC_DELAY | FF_MEDIA_IO, resp_pre_fetch, pre_fetch_iarr, {10, 0x2, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xc7, 0, 0, 0, 0, 0, 0} }, /* PRE-FETCH (10) */ /* 30 */ - {ARRAY_SIZE(zone_out_iarr), 0x94, 0x3, F_SA_LOW, + {ARRAY_SIZE(zone_out_iarr), 0x94, 0x3, F_SA_LOW | F_M_ACCESS, resp_open_zone, zone_out_iarr, /* ZONE_OUT(16), OPEN ZONE) */ {16, 0x3 /* SA */, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0, 0x0, 0xff, 0xff, 0x1, 0xc7} }, - {ARRAY_SIZE(zone_in_iarr), 0x95, 0x0, F_SA_LOW | F_D_IN, + {ARRAY_SIZE(zone_in_iarr), 0x95, 0x0, F_SA_LOW | F_M_ACCESS, resp_report_zones, zone_in_iarr, /* ZONE_IN(16), REPORT ZONES) */ {16, 0x0 /* SA */, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xbf, 0xc7} }, @@ -2875,10 +2877,20 @@ static inline int check_device_access_params return 0; } -static inline struct sdeb_store_info *devip2sip(struct sdebug_dev_info *devip) +/* + * Note: if BUG_ON() fires it usually indicates a problem with the parser + * tables. Perhaps a missing F_FAKE_RW or FF_MEDIA_IO flag. Response functions + * that access any of the "stores" in struct sdeb_store_info should call this + * function with bug_if_fake_rw set to true. + */ +static inline struct sdeb_store_info *devip2sip(struct sdebug_dev_info *devip, + bool bug_if_fake_rw) { - return sdebug_fake_rw ? - NULL : xa_load(per_store_ap, devip->sdbg_host->si_idx); + if (sdebug_fake_rw) { + BUG_ON(bug_if_fake_rw); /* See note above */ + return NULL; + } + return xa_load(per_store_ap, devip->sdbg_host->si_idx); } /* Returns number of bytes copied or -1 if error. */ @@ -3015,7 +3027,7 @@ static void dif_copy_prot(struct scsi_cmnd *scp, sector_t sector, size_t resid; void *paddr; struct sdeb_store_info *sip = devip2sip((struct sdebug_dev_info *) - scp->device->hostdata); + scp->device->hostdata, true); struct t10_pi_tuple *dif_storep = sip->dif_storep; const void *dif_store_end = dif_storep + sdebug_store_sectors; struct sg_mapping_iter miter; @@ -3061,7 +3073,7 @@ static int prot_verify_read(struct scsi_cmnd *scp, sector_t start_sec, unsigned int i; sector_t sector; struct sdeb_store_info *sip = devip2sip((struct sdebug_dev_info *) - scp->device->hostdata); + scp->device->hostdata, true); struct t10_pi_tuple *sdt; for (i = 0; i < sectors; i++, ei_lba++) { @@ -3094,7 +3106,7 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) u32 ei_lba; int ret; u64 lba; - struct sdeb_store_info *sip = devip2sip(devip); + struct sdeb_store_info *sip = devip2sip(devip, true); rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; u8 *cmd = scp->cmnd; struct sdebug_queued_cmd *sqcp; @@ -3403,8 +3415,8 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) u32 ei_lba; int ret; u64 lba; - struct sdeb_store_info *sip = devip2sip(devip); - rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; + struct sdeb_store_info *sip = devip2sip(devip, true); + rwlock_t *macc_lckp = &sip->macc_lck; u8 *cmd = scp->cmnd; switch (cmd[0]) { @@ -3524,8 +3536,8 @@ static int resp_write_scat(struct scsi_cmnd *scp, u8 *cmd = scp->cmnd; u8 *lrdp = NULL; u8 *up; - struct sdeb_store_info *sip = devip2sip(devip); - rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; + struct sdeb_store_info *sip = devip2sip(devip, true); + rwlock_t *macc_lckp = &sip->macc_lck; u8 wrprotect; u16 lbdof, num_lrd, k; u32 num, num_by, bt_len, lbdof_blen, sg_off, cum_lb; @@ -3695,8 +3707,8 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, u32 lb_size = sdebug_sector_size; int ret; struct sdeb_store_info *sip = devip2sip((struct sdebug_dev_info *) - scp->device->hostdata); - rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; + scp->device->hostdata, true); + rwlock_t *macc_lckp = &sip->macc_lck; u8 *fs1p; u8 *fsp; @@ -3855,8 +3867,8 @@ static int resp_comp_write(struct scsi_cmnd *scp, { u8 *cmd = scp->cmnd; u8 *arr; - struct sdeb_store_info *sip = devip2sip(devip); - rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; + struct sdeb_store_info *sip = devip2sip(devip, true); + rwlock_t *macc_lckp = &sip->macc_lck; u64 lba; u32 dnum; u32 lb_size = sdebug_sector_size; @@ -3922,8 +3934,8 @@ static int resp_unmap(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) { unsigned char *buf; struct unmap_block_desc *desc; - struct sdeb_store_info *sip = devip2sip(devip); - rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; + struct sdeb_store_info *sip = devip2sip(devip, true); + rwlock_t *macc_lckp = &sip->macc_lck; unsigned int i, payload_len, descriptors; int ret; @@ -3980,7 +3992,6 @@ static int resp_get_lba_status(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) { u8 *cmd = scp->cmnd; - struct sdeb_store_info *sip = devip2sip(devip); u64 lba; u32 alloc_len, mapped, num; int ret; @@ -3996,9 +4007,11 @@ static int resp_get_lba_status(struct scsi_cmnd *scp, if (ret) return ret; - if (scsi_debug_lbp()) + if (scsi_debug_lbp()) { + struct sdeb_store_info *sip = devip2sip(devip, true); + mapped = map_state(sip, lba, &num); - else { + } else { mapped = 1; /* following just in case virtual_gb changed */ sdebug_capacity = get_sdebug_capacity(); @@ -4058,9 +4071,9 @@ static int resp_pre_fetch(struct scsi_cmnd *scp, u64 block, rest = 0; u32 nblks; u8 *cmd = scp->cmnd; - struct sdeb_store_info *sip = devip2sip(devip); - rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; - u8 *fsp = sip ? sip->storep : NULL; + struct sdeb_store_info *sip = devip2sip(devip, true); + rwlock_t *macc_lckp = &sip->macc_lck; + u8 *fsp = sip->storep; if (cmd[0] == PRE_FETCH) { /* 10 byte cdb */ lba = get_unaligned_be32(cmd + 2); @@ -4204,8 +4217,8 @@ static int resp_verify(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) u64 lba; u8 *arr; u8 *cmd = scp->cmnd; - struct sdeb_store_info *sip = devip2sip(devip); - rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; + struct sdeb_store_info *sip = devip2sip(devip, true); + rwlock_t *macc_lckp = &sip->macc_lck; bytchk = (cmd[1] >> 1) & 0x3; if (bytchk == 0) { @@ -4283,7 +4296,7 @@ static int resp_report_zones(struct scsi_cmnd *scp, u8 *arr = NULL, *desc; u8 *cmd = scp->cmnd; struct sdeb_zone_state *zsp; - struct sdeb_store_info *sip = devip2sip(devip); + struct sdeb_store_info *sip = devip2sip(devip, false); rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; if (!sdebug_dev_is_zoned(devip)) { @@ -4424,7 +4437,7 @@ static int resp_open_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) u8 *cmd = scp->cmnd; struct sdeb_zone_state *zsp; bool all = cmd[14] & 0x01; - struct sdeb_store_info *sip = devip2sip(devip); + struct sdeb_store_info *sip = devip2sip(devip, false); rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; if (!sdebug_dev_is_zoned(devip)) { @@ -4503,7 +4516,7 @@ static int resp_close_zone(struct scsi_cmnd *scp, u8 *cmd = scp->cmnd; struct sdeb_zone_state *zsp; bool all = cmd[14] & 0x01; - struct sdeb_store_info *sip = devip2sip(devip); + struct sdeb_store_info *sip = devip2sip(devip, false); rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; if (!sdebug_dev_is_zoned(devip)) { @@ -4576,7 +4589,7 @@ static int resp_finish_zone(struct scsi_cmnd *scp, u64 z_id; u8 *cmd = scp->cmnd; bool all = cmd[14] & 0x01; - struct sdeb_store_info *sip = devip2sip(devip); + struct sdeb_store_info *sip = devip2sip(devip, false); rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; if (!sdebug_dev_is_zoned(devip)) { @@ -4652,7 +4665,7 @@ static int resp_rwp_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) u64 z_id; u8 *cmd = scp->cmnd; bool all = cmd[14] & 0x01; - struct sdeb_store_info *sip = devip2sip(devip); + struct sdeb_store_info *sip = devip2sip(devip, false); rwlock_t *macc_lckp = sip ? &sip->macc_lck : &sdeb_fake_rw_lck; if (!sdebug_dev_is_zoned(devip)) { From cc8a635e24acf2793605f243c913c51b8c3702ab Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 12 May 2020 18:22:03 +0300 Subject: [PATCH 426/562] RDMA/efa: Fix setting of wrong bit in get/set_feature commands When using a control buffer the ctrl_data bit should be set in order to indicate the control buffer address is valid, not ctrl_data_indirect which is used when the control buffer itself is indirect. Fixes: e9c6c5373088 ("RDMA/efa: Add common command handlers") Link: https://lore.kernel.org/r/20200512152204.93091-2-galpress@amazon.com Reviewed-by: Firas JahJah Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_com_cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index eea5574a62e8..69f842c92ff6 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -388,7 +388,7 @@ static int efa_com_get_feature_ex(struct efa_com_dev *edev, if (control_buff_size) EFA_SET(&get_cmd.aq_common_descriptor.flags, - EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT, 1); + EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA, 1); efa_com_set_dma_addr(control_buf_dma_addr, &get_cmd.control_buffer.address.mem_addr_high, @@ -540,7 +540,7 @@ static int efa_com_set_feature_ex(struct efa_com_dev *edev, if (control_buff_size) { set_cmd->aq_common_descriptor.flags = 0; EFA_SET(&set_cmd->aq_common_descriptor.flags, - EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT, 1); + EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA, 1); efa_com_set_dma_addr(control_buf_dma_addr, &set_cmd->control_buffer.address.mem_addr_high, &set_cmd->control_buffer.address.mem_addr_low); From e1ca01a902fedebd72978f606f4be767ea4a26ea Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 12 May 2020 18:22:04 +0300 Subject: [PATCH 427/562] RDMA/efa: Report host information to the device The host info feature allows the driver to infrom the EFA device firmware with system configuration for debugging and troubleshooting purposes. The host info buffer is passed as an admin command DMA mapped control buffer, and is unmapped and freed once the command CQE is consumed. Currently, the setting of host info is done for each device on its probe. Failing to set the host info for the device shall not disturb the probe flow, any errors will be discarded. Link: https://lore.kernel.org/r/20200512152204.93091-3-galpress@amazon.com Reviewed-by: Firas JahJah Reviewed-by: Guy Tzalik Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- .../infiniband/hw/efa/efa_admin_cmds_defs.h | 63 ++++++++++++++++++- drivers/infiniband/hw/efa/efa_com_cmd.c | 14 ++--- drivers/infiniband/hw/efa/efa_com_cmd.h | 11 +++- drivers/infiniband/hw/efa/efa_main.c | 52 ++++++++++++++- 4 files changed, 130 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 96b104ab5415..bef2bd291054 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -37,7 +37,7 @@ enum efa_admin_aq_feature_id { EFA_ADMIN_NETWORK_ATTR = 3, EFA_ADMIN_QUEUE_ATTR = 4, EFA_ADMIN_HW_HINTS = 5, - EFA_ADMIN_FEATURES_OPCODE_NUM = 8, + EFA_ADMIN_HOST_INFO = 6, }; /* QP transport type */ @@ -799,6 +799,54 @@ struct efa_admin_mmio_req_read_less_resp { u32 reg_val; }; +enum efa_admin_os_type { + EFA_ADMIN_OS_LINUX = 0, +}; + +struct efa_admin_host_info { + /* OS distribution string format */ + u8 os_dist_str[128]; + + /* Defined in enum efa_admin_os_type */ + u32 os_type; + + /* Kernel version string format */ + u8 kernel_ver_str[32]; + + /* Kernel version numeric format */ + u32 kernel_ver; + + /* + * 7:0 : driver_module_type + * 15:8 : driver_sub_minor + * 23:16 : driver_minor + * 31:24 : driver_major + */ + u32 driver_ver; + + /* + * Device's Bus, Device and Function + * 2:0 : function + * 7:3 : device + * 15:8 : bus + */ + u16 bdf; + + /* + * Spec version + * 7:0 : spec_minor + * 15:8 : spec_major + */ + u16 spec_ver; + + /* + * 0 : intree - Intree driver + * 1 : gdr - GPUDirect RDMA supported + * 31:2 : reserved2 + */ + u32 flags; +}; + /* create_qp_cmd */ #define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK BIT(0) #define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK BIT(1) @@ -820,4 +868,17 @@ struct efa_admin_mmio_req_read_less_resp { /* feature_device_attr_desc */ #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) +/* host_info */ +#define EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE_MASK GENMASK(7, 0) +#define EFA_ADMIN_HOST_INFO_DRIVER_SUB_MINOR_MASK GENMASK(15, 8) +#define EFA_ADMIN_HOST_INFO_DRIVER_MINOR_MASK GENMASK(23, 16) +#define EFA_ADMIN_HOST_INFO_DRIVER_MAJOR_MASK GENMASK(31, 24) +#define EFA_ADMIN_HOST_INFO_FUNCTION_MASK GENMASK(2, 0) +#define EFA_ADMIN_HOST_INFO_DEVICE_MASK GENMASK(7, 3) +#define EFA_ADMIN_HOST_INFO_BUS_MASK GENMASK(15, 8) +#define EFA_ADMIN_HOST_INFO_SPEC_MINOR_MASK GENMASK(7, 0) +#define EFA_ADMIN_HOST_INFO_SPEC_MAJOR_MASK GENMASK(15, 8) +#define EFA_ADMIN_HOST_INFO_INTREE_MASK BIT(0) +#define EFA_ADMIN_HOST_INFO_GDR_MASK BIT(1) + #endif /* _EFA_ADMIN_CMDS_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index 69f842c92ff6..fabd8df2e78f 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -351,7 +351,7 @@ int efa_com_destroy_ah(struct efa_com_dev *edev, return 0; } -static bool +bool efa_com_check_supported_feature_id(struct efa_com_dev *edev, enum efa_admin_aq_feature_id feature_id) { @@ -517,12 +517,12 @@ int efa_com_get_hw_hints(struct efa_com_dev *edev, return 0; } -static int efa_com_set_feature_ex(struct efa_com_dev *edev, - struct efa_admin_set_feature_resp *set_resp, - struct efa_admin_set_feature_cmd *set_cmd, - enum efa_admin_aq_feature_id feature_id, - dma_addr_t control_buf_dma_addr, - u32 control_buff_size) +int efa_com_set_feature_ex(struct efa_com_dev *edev, + struct efa_admin_set_feature_resp *set_resp, + struct efa_admin_set_feature_cmd *set_cmd, + enum efa_admin_aq_feature_id feature_id, + dma_addr_t control_buf_dma_addr, + u32 control_buff_size) { struct efa_com_admin_queue *aq; int err; diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 31db5a0cbd5b..41ce4a476ee6 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_COM_CMD_H_ @@ -270,6 +270,15 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, struct efa_com_get_device_attr_result *result); int efa_com_get_hw_hints(struct efa_com_dev *edev, struct efa_com_get_hw_hints_result *result); +bool +efa_com_check_supported_feature_id(struct efa_com_dev *edev, + enum efa_admin_aq_feature_id feature_id); +int efa_com_set_feature_ex(struct efa_com_dev *edev, + struct efa_admin_set_feature_resp *set_resp, + struct efa_admin_set_feature_cmd *set_cmd, + enum efa_admin_aq_feature_id feature_id, + dma_addr_t control_buf_dma_addr, + u32 control_buff_size); int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups); int efa_com_alloc_pd(struct efa_com_dev *edev, struct efa_com_alloc_pd_result *result); diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index faf3ff1bca2a..82145574c928 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -1,10 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include #include +#include +#include #include @@ -187,6 +189,52 @@ static void efa_stats_init(struct efa_dev *dev) atomic64_set(s, 0); } +static void efa_set_host_info(struct efa_dev *dev) +{ + struct efa_admin_set_feature_resp resp = {}; + struct efa_admin_set_feature_cmd cmd = {}; + struct efa_admin_host_info *hinf; + u32 bufsz = sizeof(*hinf); + dma_addr_t hinf_dma; + + if (!efa_com_check_supported_feature_id(&dev->edev, + EFA_ADMIN_HOST_INFO)) + return; + + /* Failures in host info set shall not disturb probe */ + hinf = dma_alloc_coherent(&dev->pdev->dev, bufsz, &hinf_dma, + GFP_KERNEL); + if (!hinf) + return; + + strlcpy(hinf->os_dist_str, utsname()->release, + min(sizeof(hinf->os_dist_str), sizeof(utsname()->release))); + hinf->os_type = EFA_ADMIN_OS_LINUX; + strlcpy(hinf->kernel_ver_str, utsname()->version, + min(sizeof(hinf->kernel_ver_str), sizeof(utsname()->version))); + hinf->kernel_ver = LINUX_VERSION_CODE; + EFA_SET(&hinf->driver_ver, EFA_ADMIN_HOST_INFO_DRIVER_MAJOR, 0); + EFA_SET(&hinf->driver_ver, EFA_ADMIN_HOST_INFO_DRIVER_MINOR, 0); + EFA_SET(&hinf->driver_ver, EFA_ADMIN_HOST_INFO_DRIVER_SUB_MINOR, 0); + EFA_SET(&hinf->driver_ver, EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE, 0); + EFA_SET(&hinf->bdf, EFA_ADMIN_HOST_INFO_BUS, dev->pdev->bus->number); + EFA_SET(&hinf->bdf, EFA_ADMIN_HOST_INFO_DEVICE, + PCI_SLOT(dev->pdev->devfn)); + EFA_SET(&hinf->bdf, EFA_ADMIN_HOST_INFO_FUNCTION, + PCI_FUNC(dev->pdev->devfn)); + EFA_SET(&hinf->spec_ver, EFA_ADMIN_HOST_INFO_SPEC_MAJOR, + EFA_COMMON_SPEC_VERSION_MAJOR); + EFA_SET(&hinf->spec_ver, EFA_ADMIN_HOST_INFO_SPEC_MINOR, + EFA_COMMON_SPEC_VERSION_MINOR); + EFA_SET(&hinf->flags, EFA_ADMIN_HOST_INFO_INTREE, 1); + EFA_SET(&hinf->flags, EFA_ADMIN_HOST_INFO_GDR, 0); + + efa_com_set_feature_ex(&dev->edev, &resp, &cmd, EFA_ADMIN_HOST_INFO, + hinf_dma, bufsz); + + dma_free_coherent(&dev->pdev->dev, bufsz, hinf, hinf_dma); +} + static const struct ib_device_ops efa_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_EFA, @@ -251,6 +299,8 @@ static int efa_ib_device_add(struct efa_dev *dev) if (err) goto err_release_doorbell_bar; + efa_set_host_info(dev); + dev->ibdev.node_type = RDMA_NODE_UNSPECIFIED; dev->ibdev.phys_port_cnt = 1; dev->ibdev.num_comp_vectors = 1; From fe810b509c5f62b5b3d5681ea6f5d36349ced979 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 11 May 2020 12:05:41 -0400 Subject: [PATCH 428/562] IB/hfi1: Add accelerated IP capability bit The accelerated IP capability bit is added to allow users to control which feature is enabled and disabled. Link: https://lore.kernel.org/r/20200511160541.173205.96870.stgit@awfm-01.aw.intel.com Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/common.h | 5 +++-- include/uapi/rdma/hfi/hfi1_user.h | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 40a1ff0c8a8e..1f7107e35a43 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -149,7 +149,8 @@ HFI1_CAP_NO_INTEGRITY | \ HFI1_CAP_PKEY_CHECK | \ HFI1_CAP_TID_RDMA | \ - HFI1_CAP_OPFN) << \ + HFI1_CAP_OPFN | \ + HFI1_CAP_AIP) << \ HFI1_CAP_USER_SHIFT) /* * Set of capabilities that need to be enabled for kernel context in diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h index 01ac5853d9ac..d95ef9a2b032 100644 --- a/include/uapi/rdma/hfi/hfi1_user.h +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -6,7 +6,7 @@ * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -109,6 +109,7 @@ #define HFI1_CAP_OPFN (1UL << 16) /* Enable the OPFN protocol */ #define HFI1_CAP_SDMA_HEAD_CHECK (1UL << 17) /* SDMA head checking */ #define HFI1_CAP_EARLY_CREDIT_RETURN (1UL << 18) /* early credit return */ +#define HFI1_CAP_AIP (1UL << 19) /* Enable accelerated IP */ #define HFI1_RCVHDR_ENTSIZE_2 (1UL << 0) #define HFI1_RCVHDR_ENTSIZE_16 (1UL << 1) From d99dc602e2a55a99940ba9506a7126dfa54d54ea Mon Sep 17 00:00:00 2001 From: Gary Leshner Date: Mon, 11 May 2020 12:05:48 -0400 Subject: [PATCH 429/562] IB/hfi1: Add functions to transmit datagram ipoib packets This patch implements the mechanism to accelerate the transmit side of a multiple transmit queue RDMA netdev by submitting the packets to the SDMA engine directly instead of sending through the verbs layer. This patch also changes the UD/SEND_ONLY op to output the entropy value in byte 0 of deth[1]. UD/SEND_ONLY_WITH_IMMEDIATE uses the previous behavior with no entropy value being output. The code in the ipoib rdma netdev which submits tx requests upon successful submission will call trace_sdma_output_ibhdr to output the ibhdr to the trace buffer. Link: https://lore.kernel.org/r/20200511160548.173205.45616.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Gary Leshner Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/Makefile | 1 + drivers/infiniband/hw/hfi1/ipoib.h | 145 +++++ drivers/infiniband/hw/hfi1/ipoib_tx.c | 828 ++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/trace.c | 10 +- include/rdma/ib_verbs.h | 1 + 5 files changed, 984 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/hw/hfi1/ipoib.h create mode 100644 drivers/infiniband/hw/hfi1/ipoib_tx.c diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 0405d26d0833..09ef0b8b8ac7 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -22,6 +22,7 @@ hfi1-y := \ init.o \ intr.o \ iowait.o \ + ipoib_tx.o \ mad.o \ mmu_rb.o \ msix.o \ diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h new file mode 100644 index 000000000000..2b541abde266 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/ipoib.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * Copyright(c) 2020 Intel Corporation. + * + */ + +/* + * This file contains HFI1 support for IPOIB functionality + */ + +#ifndef HFI1_IPOIB_H +#define HFI1_IPOIB_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hfi.h" +#include "iowait.h" + +#include + +#define HFI1_IPOIB_ENTROPY_SHIFT 24 + +#define HFI1_IPOIB_TXREQ_NAME_LEN 32 + +#define HFI1_IPOIB_ENCAP_LEN 4 + +struct hfi1_ipoib_dev_priv; + +union hfi1_ipoib_flow { + u16 as_int; + struct { + u8 tx_queue; + u8 sc5; + } __attribute__((__packed__)); +}; + +/** + * struct hfi1_ipoib_circ_buf - List of items to be processed + * @items: ring of items + * @head: ring head + * @tail: ring tail + * @max_items: max items + 1 that the ring can contain + * @producer_lock: producer sync lock + * @consumer_lock: consumer sync lock + */ +struct hfi1_ipoib_circ_buf { + void **items; + unsigned long head; + unsigned long tail; + unsigned long max_items; + spinlock_t producer_lock; /* head sync lock */ + spinlock_t consumer_lock; /* tail sync lock */ +}; + +/** + * struct hfi1_ipoib_txq - IPOIB per Tx queue information + * @priv: private pointer + * @sde: sdma engine + * @tx_list: tx request list + * @sent_txreqs: count of txreqs posted to sdma + * @flow: tracks when list needs to be flushed for a flow change + * @q_idx: ipoib Tx queue index + * @pkts_sent: indicator packets have been sent from this queue + * @wait: iowait structure + * @complete_txreqs: count of txreqs completed by sdma + * @napi: pointer to tx napi interface + * @tx_ring: ring of ipoib txreqs to be reaped by napi callback + */ +struct hfi1_ipoib_txq { + struct hfi1_ipoib_dev_priv *priv; + struct sdma_engine *sde; + struct list_head tx_list; + u64 sent_txreqs; + union hfi1_ipoib_flow flow; + u8 q_idx; + bool pkts_sent; + struct iowait wait; + + atomic64_t ____cacheline_aligned_in_smp complete_txreqs; + struct napi_struct *napi; + struct hfi1_ipoib_circ_buf tx_ring; +}; + +struct hfi1_ipoib_dev_priv { + struct hfi1_devdata *dd; + struct net_device *netdev; + struct ib_device *device; + struct hfi1_ipoib_txq *txqs; + struct kmem_cache *txreq_cache; + struct napi_struct *tx_napis; + u16 pkey; + u16 pkey_index; + u32 qkey; + u8 port_num; + + const struct net_device_ops *netdev_ops; + struct rvt_qp *qp; + struct pcpu_sw_netstats __percpu *netstats; +}; + +/* hfi1 ipoib rdma netdev's private data structure */ +struct hfi1_ipoib_rdma_netdev { + struct rdma_netdev rn; /* keep this first */ + /* followed by device private data */ + struct hfi1_ipoib_dev_priv dev_priv; +}; + +static inline struct hfi1_ipoib_dev_priv * +hfi1_ipoib_priv(const struct net_device *dev) +{ + return &((struct hfi1_ipoib_rdma_netdev *)netdev_priv(dev))->dev_priv; +} + +static inline void +hfi1_ipoib_update_tx_netstats(struct hfi1_ipoib_dev_priv *priv, + u64 packets, + u64 bytes) +{ + struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats); + + u64_stats_update_begin(&netstats->syncp); + netstats->tx_packets += packets; + netstats->tx_bytes += bytes; + u64_stats_update_end(&netstats->syncp); +} + +int hfi1_ipoib_send_dma(struct net_device *dev, + struct sk_buff *skb, + struct ib_ah *address, + u32 dqpn); + +int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv); +void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv); + +void hfi1_ipoib_napi_tx_enable(struct net_device *dev); +void hfi1_ipoib_napi_tx_disable(struct net_device *dev); + +#endif /* _IPOIB_H */ diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c new file mode 100644 index 000000000000..883cb9d48022 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -0,0 +1,828 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright(c) 2020 Intel Corporation. + * + */ + +/* + * This file contains HFI1 support for IPOIB SDMA functionality + */ + +#include +#include + +#include "sdma.h" +#include "verbs.h" +#include "trace_ibhdrs.h" +#include "ipoib.h" + +/* Add a convenience helper */ +#define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1)) +#define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size) +#define CIRC_PREV(val, size) CIRC_ADD(val, -1, size) + +/** + * struct ipoib_txreq - IPOIB transmit descriptor + * @txreq: sdma transmit request + * @sdma_hdr: 9b ib headers + * @sdma_status: status returned by sdma engine + * @priv: ipoib netdev private data + * @txq: txq on which skb was output + * @skb: skb to send + */ +struct ipoib_txreq { + struct sdma_txreq txreq; + struct hfi1_sdma_header sdma_hdr; + int sdma_status; + struct hfi1_ipoib_dev_priv *priv; + struct hfi1_ipoib_txq *txq; + struct sk_buff *skb; +}; + +struct ipoib_txparms { + struct hfi1_devdata *dd; + struct rdma_ah_attr *ah_attr; + struct hfi1_ibport *ibp; + struct hfi1_ipoib_txq *txq; + union hfi1_ipoib_flow flow; + u32 dqpn; + u8 hdr_dwords; + u8 entropy; +}; + +static u64 hfi1_ipoib_txreqs(const u64 sent, const u64 completed) +{ + return sent - completed; +} + +static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq) +{ + if (unlikely(hfi1_ipoib_txreqs(++txq->sent_txreqs, + atomic64_read(&txq->complete_txreqs)) >= + min_t(unsigned int, txq->priv->netdev->tx_queue_len, + txq->tx_ring.max_items - 1))) + netif_stop_subqueue(txq->priv->netdev, txq->q_idx); +} + +static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq) +{ + struct net_device *dev = txq->priv->netdev; + + /* If the queue is already running just return */ + if (likely(!__netif_subqueue_stopped(dev, txq->q_idx))) + return; + + /* If shutting down just return as queue state is irrelevant */ + if (unlikely(dev->reg_state != NETREG_REGISTERED)) + return; + + /* + * When the queue has been drained to less than half full it will be + * restarted. + * The size of the txreq ring is fixed at initialization. + * The tx queue len can be adjusted upward while the interface is + * running. + * The tx queue len can be large enough to overflow the txreq_ring. + * Use the minimum of the current tx_queue_len or the rings max txreqs + * to protect against ring overflow. + */ + if (hfi1_ipoib_txreqs(txq->sent_txreqs, + atomic64_read(&txq->complete_txreqs)) + < min_t(unsigned int, dev->tx_queue_len, + txq->tx_ring.max_items) >> 1) + netif_wake_subqueue(dev, txq->q_idx); +} + +static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget) +{ + struct hfi1_ipoib_dev_priv *priv = tx->priv; + + if (likely(!tx->sdma_status)) { + hfi1_ipoib_update_tx_netstats(priv, 1, tx->skb->len); + } else { + ++priv->netdev->stats.tx_errors; + dd_dev_warn(priv->dd, + "%s: Status = 0x%x pbc 0x%llx txq = %d sde = %d\n", + __func__, tx->sdma_status, + le64_to_cpu(tx->sdma_hdr.pbc), tx->txq->q_idx, + tx->txq->sde->this_idx); + } + + napi_consume_skb(tx->skb, budget); + sdma_txclean(priv->dd, &tx->txreq); + kmem_cache_free(priv->txreq_cache, tx); +} + +static int hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq, int budget) +{ + struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; + unsigned long head; + unsigned long tail; + unsigned int max_tx; + int work_done; + int tx_count; + + spin_lock_bh(&tx_ring->consumer_lock); + + /* Read index before reading contents at that index. */ + head = smp_load_acquire(&tx_ring->head); + tail = tx_ring->tail; + max_tx = tx_ring->max_items; + + work_done = min_t(int, CIRC_CNT(head, tail, max_tx), budget); + + for (tx_count = work_done; tx_count; tx_count--) { + hfi1_ipoib_free_tx(tx_ring->items[tail], budget); + tail = CIRC_NEXT(tail, max_tx); + } + + atomic64_add(work_done, &txq->complete_txreqs); + + /* Finished freeing tx items so store the tail value. */ + smp_store_release(&tx_ring->tail, tail); + + spin_unlock_bh(&tx_ring->consumer_lock); + + hfi1_ipoib_check_queue_stopped(txq); + + return work_done; +} + +static int hfi1_ipoib_process_tx_ring(struct napi_struct *napi, int budget) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(napi->dev); + struct hfi1_ipoib_txq *txq = &priv->txqs[napi - priv->tx_napis]; + + int work_done = hfi1_ipoib_drain_tx_ring(txq, budget); + + if (work_done < budget) + napi_complete_done(napi, work_done); + + return work_done; +} + +static void hfi1_ipoib_add_tx(struct ipoib_txreq *tx) +{ + struct hfi1_ipoib_circ_buf *tx_ring = &tx->txq->tx_ring; + unsigned long head; + unsigned long tail; + size_t max_tx; + + spin_lock(&tx_ring->producer_lock); + + head = tx_ring->head; + tail = READ_ONCE(tx_ring->tail); + max_tx = tx_ring->max_items; + + if (likely(CIRC_SPACE(head, tail, max_tx))) { + tx_ring->items[head] = tx; + + /* Finish storing txreq before incrementing head. */ + smp_store_release(&tx_ring->head, CIRC_ADD(head, 1, max_tx)); + napi_schedule(tx->txq->napi); + } else { + struct hfi1_ipoib_txq *txq = tx->txq; + struct hfi1_ipoib_dev_priv *priv = tx->priv; + + /* Ring was full */ + hfi1_ipoib_free_tx(tx, 0); + atomic64_inc(&txq->complete_txreqs); + dd_dev_dbg(priv->dd, "txq %d full.\n", txq->q_idx); + } + + spin_unlock(&tx_ring->producer_lock); +} + +static void hfi1_ipoib_sdma_complete(struct sdma_txreq *txreq, int status) +{ + struct ipoib_txreq *tx = container_of(txreq, struct ipoib_txreq, txreq); + + tx->sdma_status = status; + + hfi1_ipoib_add_tx(tx); +} + +static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx, + struct ipoib_txparms *txp) +{ + struct hfi1_devdata *dd = txp->dd; + struct sdma_txreq *txreq = &tx->txreq; + struct sk_buff *skb = tx->skb; + int ret = 0; + int i; + + if (skb_headlen(skb)) { + ret = sdma_txadd_kvaddr(dd, txreq, skb->data, skb_headlen(skb)); + if (unlikely(ret)) + return ret; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + ret = sdma_txadd_page(dd, + txreq, + skb_frag_page(frag), + frag->bv_offset, + skb_frag_size(frag)); + if (unlikely(ret)) + break; + } + + return ret; +} + +static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx, + struct ipoib_txparms *txp) +{ + struct hfi1_devdata *dd = txp->dd; + struct sdma_txreq *txreq = &tx->txreq; + struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr; + u16 pkt_bytes = + sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2) + tx->skb->len; + int ret; + + ret = sdma_txinit(txreq, 0, pkt_bytes, hfi1_ipoib_sdma_complete); + if (unlikely(ret)) + return ret; + + /* add pbc + headers */ + ret = sdma_txadd_kvaddr(dd, + txreq, + sdma_hdr, + sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2)); + if (unlikely(ret)) + return ret; + + /* add the ulp payload */ + return hfi1_ipoib_build_ulp_payload(tx, txp); +} + +static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx, + struct ipoib_txparms *txp) +{ + struct hfi1_ipoib_dev_priv *priv = tx->priv; + struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr; + struct sk_buff *skb = tx->skb; + struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp); + struct rdma_ah_attr *ah_attr = txp->ah_attr; + struct ib_other_headers *ohdr; + struct ib_grh *grh; + u16 dwords; + u16 slid; + u16 dlid; + u16 lrh0; + u32 bth0; + u32 sqpn = (u32)(priv->netdev->dev_addr[1] << 16 | + priv->netdev->dev_addr[2] << 8 | + priv->netdev->dev_addr[3]); + u16 payload_dwords; + u8 pad_cnt; + + pad_cnt = -skb->len & 3; + + /* Includes ICRC */ + payload_dwords = ((skb->len + pad_cnt) >> 2) + SIZE_OF_CRC; + + /* header size in dwords LRH+BTH+DETH = (8+12+8)/4. */ + txp->hdr_dwords = 7; + + if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { + grh = &sdma_hdr->hdr.ibh.u.l.grh; + txp->hdr_dwords += + hfi1_make_grh(txp->ibp, + grh, + rdma_ah_read_grh(ah_attr), + txp->hdr_dwords - LRH_9B_DWORDS, + payload_dwords); + lrh0 = HFI1_LRH_GRH; + ohdr = &sdma_hdr->hdr.ibh.u.l.oth; + } else { + lrh0 = HFI1_LRH_BTH; + ohdr = &sdma_hdr->hdr.ibh.u.oth; + } + + lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4; + lrh0 |= (txp->flow.sc5 & 0xf) << 12; + + dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 9B); + if (dlid == be16_to_cpu(IB_LID_PERMISSIVE)) { + slid = be16_to_cpu(IB_LID_PERMISSIVE); + } else { + u16 lid = (u16)ppd->lid; + + if (lid) { + lid |= rdma_ah_get_path_bits(ah_attr) & + ((1 << ppd->lmc) - 1); + slid = lid; + } else { + slid = be16_to_cpu(IB_LID_PERMISSIVE); + } + } + + /* Includes ICRC */ + dwords = txp->hdr_dwords + payload_dwords; + + /* Build the lrh */ + sdma_hdr->hdr.hdr_type = HFI1_PKT_TYPE_9B; + hfi1_make_ib_hdr(&sdma_hdr->hdr.ibh, lrh0, dwords, dlid, slid); + + /* Build the bth */ + bth0 = (IB_OPCODE_UD_SEND_ONLY << 24) | (pad_cnt << 20) | priv->pkey; + + ohdr->bth[0] = cpu_to_be32(bth0); + ohdr->bth[1] = cpu_to_be32(txp->dqpn); + ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->sent_txreqs)); + + /* Build the deth */ + ohdr->u.ud.deth[0] = cpu_to_be32(priv->qkey); + ohdr->u.ud.deth[1] = cpu_to_be32((txp->entropy << + HFI1_IPOIB_ENTROPY_SHIFT) | sqpn); + + /* Construct the pbc. */ + sdma_hdr->pbc = + cpu_to_le64(create_pbc(ppd, + ib_is_sc5(txp->flow.sc5) << + PBC_DC_INFO_SHIFT, + 0, + sc_to_vlt(priv->dd, txp->flow.sc5), + dwords - SIZE_OF_CRC + + (sizeof(sdma_hdr->pbc) >> 2))); +} + +static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev, + struct sk_buff *skb, + struct ipoib_txparms *txp) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + struct ipoib_txreq *tx; + int ret; + + tx = kmem_cache_alloc_node(priv->txreq_cache, + GFP_ATOMIC, + priv->dd->node); + if (unlikely(!tx)) + return ERR_PTR(-ENOMEM); + + /* so that we can test if the sdma decriptors are there */ + tx->txreq.num_desc = 0; + tx->priv = priv; + tx->txq = txp->txq; + tx->skb = skb; + + hfi1_ipoib_build_ib_tx_headers(tx, txp); + + ret = hfi1_ipoib_build_tx_desc(tx, txp); + if (likely(!ret)) { + if (txp->txq->flow.as_int != txp->flow.as_int) { + txp->txq->flow.tx_queue = txp->flow.tx_queue; + txp->txq->flow.sc5 = txp->flow.sc5; + txp->txq->sde = + sdma_select_engine_sc(priv->dd, + txp->flow.tx_queue, + txp->flow.sc5); + } + + return tx; + } + + sdma_txclean(priv->dd, &tx->txreq); + kmem_cache_free(priv->txreq_cache, tx); + + return ERR_PTR(ret); +} + +static int hfi1_ipoib_submit_tx_list(struct net_device *dev, + struct hfi1_ipoib_txq *txq) +{ + int ret; + u16 count_out; + + ret = sdma_send_txlist(txq->sde, + iowait_get_ib_work(&txq->wait), + &txq->tx_list, + &count_out); + if (likely(!ret) || ret == -EBUSY || ret == -ECOMM) + return ret; + + dd_dev_warn(txq->priv->dd, "cannot send skb tx list, err %d.\n", ret); + + return ret; +} + +static int hfi1_ipoib_flush_tx_list(struct net_device *dev, + struct hfi1_ipoib_txq *txq) +{ + int ret = 0; + + if (!list_empty(&txq->tx_list)) { + /* Flush the current list */ + ret = hfi1_ipoib_submit_tx_list(dev, txq); + + if (unlikely(ret)) + if (ret != -EBUSY) + ++dev->stats.tx_carrier_errors; + } + + return ret; +} + +static int hfi1_ipoib_submit_tx(struct hfi1_ipoib_txq *txq, + struct ipoib_txreq *tx) +{ + int ret; + + ret = sdma_send_txreq(txq->sde, + iowait_get_ib_work(&txq->wait), + &tx->txreq, + txq->pkts_sent); + if (likely(!ret)) { + txq->pkts_sent = true; + iowait_starve_clear(txq->pkts_sent, &txq->wait); + } + + return ret; +} + +static int hfi1_ipoib_send_dma_single(struct net_device *dev, + struct sk_buff *skb, + struct ipoib_txparms *txp) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + struct hfi1_ipoib_txq *txq = txp->txq; + struct ipoib_txreq *tx; + int ret; + + tx = hfi1_ipoib_send_dma_common(dev, skb, txp); + if (IS_ERR(tx)) { + int ret = PTR_ERR(tx); + + dev_kfree_skb_any(skb); + + if (ret == -ENOMEM) + ++dev->stats.tx_errors; + else + ++dev->stats.tx_carrier_errors; + + return NETDEV_TX_OK; + } + + ret = hfi1_ipoib_submit_tx(txq, tx); + if (likely(!ret)) { + trace_sdma_output_ibhdr(tx->priv->dd, + &tx->sdma_hdr.hdr, + ib_is_sc5(txp->flow.sc5)); + hfi1_ipoib_check_queue_depth(txq); + return NETDEV_TX_OK; + } + + txq->pkts_sent = false; + + if (ret == -EBUSY) { + list_add_tail(&tx->txreq.list, &txq->tx_list); + + trace_sdma_output_ibhdr(tx->priv->dd, + &tx->sdma_hdr.hdr, + ib_is_sc5(txp->flow.sc5)); + hfi1_ipoib_check_queue_depth(txq); + return NETDEV_TX_OK; + } + + if (ret == -ECOMM) { + hfi1_ipoib_check_queue_depth(txq); + return NETDEV_TX_OK; + } + + sdma_txclean(priv->dd, &tx->txreq); + dev_kfree_skb_any(skb); + kmem_cache_free(priv->txreq_cache, tx); + ++dev->stats.tx_carrier_errors; + + return NETDEV_TX_OK; +} + +static int hfi1_ipoib_send_dma_list(struct net_device *dev, + struct sk_buff *skb, + struct ipoib_txparms *txp) +{ + struct hfi1_ipoib_txq *txq = txp->txq; + struct ipoib_txreq *tx; + + /* Has the flow change ? */ + if (txq->flow.as_int != txp->flow.as_int) + (void)hfi1_ipoib_flush_tx_list(dev, txq); + + tx = hfi1_ipoib_send_dma_common(dev, skb, txp); + if (IS_ERR(tx)) { + int ret = PTR_ERR(tx); + + dev_kfree_skb_any(skb); + + if (ret == -ENOMEM) + ++dev->stats.tx_errors; + else + ++dev->stats.tx_carrier_errors; + + return NETDEV_TX_OK; + } + + list_add_tail(&tx->txreq.list, &txq->tx_list); + + hfi1_ipoib_check_queue_depth(txq); + + trace_sdma_output_ibhdr(tx->priv->dd, + &tx->sdma_hdr.hdr, + ib_is_sc5(txp->flow.sc5)); + + if (!netdev_xmit_more()) + (void)hfi1_ipoib_flush_tx_list(dev, txq); + + return NETDEV_TX_OK; +} + +static u8 hfi1_ipoib_calc_entropy(struct sk_buff *skb) +{ + if (skb_transport_header_was_set(skb)) { + u8 *hdr = (u8 *)skb_transport_header(skb); + + return (hdr[0] ^ hdr[1] ^ hdr[2] ^ hdr[3]); + } + + return (u8)skb_get_queue_mapping(skb); +} + +int hfi1_ipoib_send_dma(struct net_device *dev, + struct sk_buff *skb, + struct ib_ah *address, + u32 dqpn) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + struct ipoib_txparms txp; + struct rdma_netdev *rn = netdev_priv(dev); + + if (unlikely(skb->len > rn->mtu + HFI1_IPOIB_ENCAP_LEN)) { + dd_dev_warn(priv->dd, "packet len %d (> %d) too long to send, dropping\n", + skb->len, + rn->mtu + HFI1_IPOIB_ENCAP_LEN); + ++dev->stats.tx_dropped; + ++dev->stats.tx_errors; + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + txp.dd = priv->dd; + txp.ah_attr = &ibah_to_rvtah(address)->attr; + txp.ibp = to_iport(priv->device, priv->port_num); + txp.txq = &priv->txqs[skb_get_queue_mapping(skb)]; + txp.dqpn = dqpn; + txp.flow.sc5 = txp.ibp->sl_to_sc[rdma_ah_get_sl(txp.ah_attr)]; + txp.flow.tx_queue = (u8)skb_get_queue_mapping(skb); + txp.entropy = hfi1_ipoib_calc_entropy(skb); + + if (netdev_xmit_more() || !list_empty(&txp.txq->tx_list)) + return hfi1_ipoib_send_dma_list(dev, skb, &txp); + + return hfi1_ipoib_send_dma_single(dev, skb, &txp); +} + +/* + * hfi1_ipoib_sdma_sleep - ipoib sdma sleep function + * + * This function gets called from sdma_send_txreq() when there are not enough + * sdma descriptors available to send the packet. It adds Tx queue's wait + * structure to sdma engine's dmawait list to be woken up when descriptors + * become available. + */ +static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde, + struct iowait_work *wait, + struct sdma_txreq *txreq, + uint seq, + bool pkts_sent) +{ + struct hfi1_ipoib_txq *txq = + container_of(wait->iow, struct hfi1_ipoib_txq, wait); + + write_seqlock(&sde->waitlock); + + if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED)) { + if (sdma_progress(sde, seq, txreq)) { + write_sequnlock(&sde->waitlock); + return -EAGAIN; + } + + netif_stop_subqueue(txq->priv->netdev, txq->q_idx); + + if (list_empty(&txq->wait.list)) + iowait_queue(pkts_sent, wait->iow, &sde->dmawait); + + write_sequnlock(&sde->waitlock); + return -EBUSY; + } + + write_sequnlock(&sde->waitlock); + return -EINVAL; +} + +/* + * hfi1_ipoib_sdma_wakeup - ipoib sdma wakeup function + * + * This function gets called when SDMA descriptors becomes available and Tx + * queue's wait structure was previously added to sdma engine's dmawait list. + */ +static void hfi1_ipoib_sdma_wakeup(struct iowait *wait, int reason) +{ + struct hfi1_ipoib_txq *txq = + container_of(wait, struct hfi1_ipoib_txq, wait); + + if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED)) + iowait_schedule(wait, system_highpri_wq, WORK_CPU_UNBOUND); +} + +static void hfi1_ipoib_flush_txq(struct work_struct *work) +{ + struct iowait_work *ioww = + container_of(work, struct iowait_work, iowork); + struct iowait *wait = iowait_ioww_to_iow(ioww); + struct hfi1_ipoib_txq *txq = + container_of(wait, struct hfi1_ipoib_txq, wait); + struct net_device *dev = txq->priv->netdev; + + if (likely(dev->reg_state == NETREG_REGISTERED) && + likely(__netif_subqueue_stopped(dev, txq->q_idx)) && + likely(!hfi1_ipoib_flush_tx_list(dev, txq))) + netif_wake_subqueue(dev, txq->q_idx); +} + +int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) +{ + struct net_device *dev = priv->netdev; + char buf[HFI1_IPOIB_TXREQ_NAME_LEN]; + unsigned long tx_ring_size; + int i; + + /* + * Ring holds 1 less than tx_ring_size + * Round up to next power of 2 in order to hold at least tx_queue_len + */ + tx_ring_size = roundup_pow_of_two((unsigned long)dev->tx_queue_len + 1); + + snprintf(buf, sizeof(buf), "hfi1_%u_ipoib_txreq_cache", priv->dd->unit); + priv->txreq_cache = kmem_cache_create(buf, + sizeof(struct ipoib_txreq), + 0, + 0, + NULL); + if (!priv->txreq_cache) + return -ENOMEM; + + priv->tx_napis = kcalloc_node(dev->num_tx_queues, + sizeof(struct napi_struct), + GFP_ATOMIC, + priv->dd->node); + if (!priv->tx_napis) + goto free_txreq_cache; + + priv->txqs = kcalloc_node(dev->num_tx_queues, + sizeof(struct hfi1_ipoib_txq), + GFP_ATOMIC, + priv->dd->node); + if (!priv->txqs) + goto free_tx_napis; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct hfi1_ipoib_txq *txq = &priv->txqs[i]; + + iowait_init(&txq->wait, + 0, + hfi1_ipoib_flush_txq, + NULL, + hfi1_ipoib_sdma_sleep, + hfi1_ipoib_sdma_wakeup, + NULL, + NULL); + txq->priv = priv; + txq->sde = NULL; + INIT_LIST_HEAD(&txq->tx_list); + atomic64_set(&txq->complete_txreqs, 0); + txq->q_idx = i; + txq->flow.tx_queue = 0xff; + txq->flow.sc5 = 0xff; + txq->pkts_sent = false; + + netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), + priv->dd->node); + + txq->tx_ring.items = + vzalloc_node(array_size(tx_ring_size, + sizeof(struct ipoib_txreq)), + priv->dd->node); + if (!txq->tx_ring.items) + goto free_txqs; + + spin_lock_init(&txq->tx_ring.producer_lock); + spin_lock_init(&txq->tx_ring.consumer_lock); + txq->tx_ring.max_items = tx_ring_size; + + txq->napi = &priv->tx_napis[i]; + netif_tx_napi_add(dev, txq->napi, + hfi1_ipoib_process_tx_ring, + NAPI_POLL_WEIGHT); + } + + return 0; + +free_txqs: + for (i--; i >= 0; i--) { + struct hfi1_ipoib_txq *txq = &priv->txqs[i]; + + netif_napi_del(txq->napi); + vfree(txq->tx_ring.items); + } + + kfree(priv->txqs); + priv->txqs = NULL; + +free_tx_napis: + kfree(priv->tx_napis); + priv->tx_napis = NULL; + +free_txreq_cache: + kmem_cache_destroy(priv->txreq_cache); + priv->txreq_cache = NULL; + return -ENOMEM; +} + +static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq) +{ + struct sdma_txreq *txreq; + struct sdma_txreq *txreq_tmp; + atomic64_t *complete_txreqs = &txq->complete_txreqs; + + list_for_each_entry_safe(txreq, txreq_tmp, &txq->tx_list, list) { + struct ipoib_txreq *tx = + container_of(txreq, struct ipoib_txreq, txreq); + + list_del(&txreq->list); + sdma_txclean(txq->priv->dd, &tx->txreq); + dev_kfree_skb_any(tx->skb); + kmem_cache_free(txq->priv->txreq_cache, tx); + atomic64_inc(complete_txreqs); + } + + if (hfi1_ipoib_txreqs(txq->sent_txreqs, atomic64_read(complete_txreqs))) + dd_dev_warn(txq->priv->dd, + "txq %d not empty found %llu requests\n", + txq->q_idx, + hfi1_ipoib_txreqs(txq->sent_txreqs, + atomic64_read(complete_txreqs))); +} + +void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv) +{ + int i; + + for (i = 0; i < priv->netdev->num_tx_queues; i++) { + struct hfi1_ipoib_txq *txq = &priv->txqs[i]; + + iowait_cancel_work(&txq->wait); + iowait_sdma_drain(&txq->wait); + hfi1_ipoib_drain_tx_list(txq); + netif_napi_del(txq->napi); + (void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items); + vfree(txq->tx_ring.items); + } + + kfree(priv->txqs); + priv->txqs = NULL; + + kfree(priv->tx_napis); + priv->tx_napis = NULL; + + kmem_cache_destroy(priv->txreq_cache); + priv->txreq_cache = NULL; +} + +void hfi1_ipoib_napi_tx_enable(struct net_device *dev) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct hfi1_ipoib_txq *txq = &priv->txqs[i]; + + napi_enable(txq->napi); + } +} + +void hfi1_ipoib_napi_tx_disable(struct net_device *dev) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct hfi1_ipoib_txq *txq = &priv->txqs[i]; + + napi_disable(txq->napi); + (void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items); + } +} diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 9a3d236bcc88..c8a9988d972d 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -47,6 +47,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" #include "exp_rcv.h" +#include "ipoib.h" static u8 __get_ib_hdr_len(struct ib_header *hdr) { @@ -126,6 +127,7 @@ const char *hfi1_trace_get_packet_l2_str(u8 l2) #define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x" #define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x" #define DETH_PRN "deth qkey:0x%.8x sqpn:0x%.6x" +#define DETH_ENTROPY_PRN "deth qkey:0x%.8x sqpn:0x%.6x entropy:0x%.2x" #define IETH_PRN "ieth rkey:0x%.8x" #define ATOMICACKETH_PRN "origdata:%llx" #define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx" @@ -444,6 +446,12 @@ const char *parse_everbs_hdrs( break; /* deth */ case OP(UD, SEND_ONLY): + trace_seq_printf(p, DETH_ENTROPY_PRN, + be32_to_cpu(eh->ud.deth[0]), + be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK, + be32_to_cpu(eh->ud.deth[1]) >> + HFI1_IPOIB_ENTROPY_SHIFT); + break; case OP(UD, SEND_ONLY_WITH_IMMEDIATE): trace_seq_printf(p, DETH_PRN, be32_to_cpu(eh->ud.deth[0]), diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index db58f11552f1..029541a8faeb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2205,6 +2205,7 @@ struct rdma_netdev { void *clnt_priv; struct ib_device *hca; u8 port_num; + int mtu; /* * cleanup function must be specified. From 438d7dda9841ec42ef7d9024dc45347f9526016a Mon Sep 17 00:00:00 2001 From: Gary Leshner Date: Mon, 11 May 2020 12:05:54 -0400 Subject: [PATCH 430/562] IB/hfi1: Add the transmit side of a datagram ipoib RDMA netdev This implements the transmit side of the multiple transmit queue RDMA netdev used to accelerate ipoib. The receive side remains the ipoib internal implementation. The init/unint/open/stop netdev operations are saved off and called by the versions within the hfi1 netdev in order to initialize the connected mode resources present in ipoib thus allowing us to switch modes between datagram and connected. The datagram queue pair instantiated by the ipoib ulp is used by this implementation for its queue pair number and to register with multicast. The above queue pair is not used on transmit other than its qpn as the verbs layer is skipped and packets are directly submitted to the sdma engines. Link: https://lore.kernel.org/r/20200511160554.173205.1369.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Gary Leshner Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/Makefile | 1 + drivers/infiniband/hw/hfi1/ipoib.h | 5 + drivers/infiniband/hw/hfi1/ipoib_main.c | 283 ++++++++++++++++++++++++ 3 files changed, 289 insertions(+) create mode 100644 drivers/infiniband/hw/hfi1/ipoib_main.c diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 09ef0b8b8ac7..0b2571306267 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -22,6 +22,7 @@ hfi1-y := \ init.o \ intr.o \ iowait.o \ + ipoib_main.o \ ipoib_tx.o \ mad.o \ mmu_rb.o \ diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h index 2b541abde266..c2e63ca57896 100644 --- a/drivers/infiniband/hw/hfi1/ipoib.h +++ b/drivers/infiniband/hw/hfi1/ipoib.h @@ -142,4 +142,9 @@ void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv); void hfi1_ipoib_napi_tx_enable(struct net_device *dev); void hfi1_ipoib_napi_tx_disable(struct net_device *dev); +int hfi1_ipoib_rn_get_params(struct ib_device *device, + u8 port_num, + enum rdma_netdev_t type, + struct rdma_netdev_alloc_params *params); + #endif /* _IPOIB_H */ diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c new file mode 100644 index 000000000000..304a5ac86f77 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/ipoib_main.c @@ -0,0 +1,283 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright(c) 2020 Intel Corporation. + * + */ + +/* + * This file contains HFI1 support for ipoib functionality + */ + +#include "ipoib.h" +#include "hfi.h" + +static u32 qpn_from_mac(u8 *mac_arr) +{ + return (u32)mac_arr[1] << 16 | mac_arr[2] << 8 | mac_arr[3]; +} + +static int hfi1_ipoib_dev_init(struct net_device *dev) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + + priv->netstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + + return priv->netdev_ops->ndo_init(dev); +} + +static void hfi1_ipoib_dev_uninit(struct net_device *dev) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + + priv->netdev_ops->ndo_uninit(dev); +} + +static int hfi1_ipoib_dev_open(struct net_device *dev) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + int ret; + + ret = priv->netdev_ops->ndo_open(dev); + if (!ret) { + struct hfi1_ibport *ibp = to_iport(priv->device, + priv->port_num); + struct rvt_qp *qp; + u32 qpn = qpn_from_mac(priv->netdev->dev_addr); + + rcu_read_lock(); + qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn); + if (!qp) { + rcu_read_unlock(); + priv->netdev_ops->ndo_stop(dev); + return -EINVAL; + } + rvt_get_qp(qp); + priv->qp = qp; + rcu_read_unlock(); + + hfi1_ipoib_napi_tx_enable(dev); + } + + return ret; +} + +static int hfi1_ipoib_dev_stop(struct net_device *dev) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + + if (!priv->qp) + return 0; + + hfi1_ipoib_napi_tx_disable(dev); + + rvt_put_qp(priv->qp); + priv->qp = NULL; + + return priv->netdev_ops->ndo_stop(dev); +} + +static void hfi1_ipoib_dev_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *storage) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + u64 rx_packets = 0ull; + u64 rx_bytes = 0ull; + u64 tx_packets = 0ull; + u64 tx_bytes = 0ull; + int i; + + netdev_stats_to_stats64(storage, &dev->stats); + + for_each_possible_cpu(i) { + const struct pcpu_sw_netstats *stats; + unsigned int start; + u64 trx_packets; + u64 trx_bytes; + u64 ttx_packets; + u64 ttx_bytes; + + stats = per_cpu_ptr(priv->netstats, i); + do { + start = u64_stats_fetch_begin_irq(&stats->syncp); + trx_packets = stats->rx_packets; + trx_bytes = stats->rx_bytes; + ttx_packets = stats->tx_packets; + ttx_bytes = stats->tx_bytes; + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + + rx_packets += trx_packets; + rx_bytes += trx_bytes; + tx_packets += ttx_packets; + tx_bytes += ttx_bytes; + } + + storage->rx_packets += rx_packets; + storage->rx_bytes += rx_bytes; + storage->tx_packets += tx_packets; + storage->tx_bytes += tx_bytes; +} + +static const struct net_device_ops hfi1_ipoib_netdev_ops = { + .ndo_init = hfi1_ipoib_dev_init, + .ndo_uninit = hfi1_ipoib_dev_uninit, + .ndo_open = hfi1_ipoib_dev_open, + .ndo_stop = hfi1_ipoib_dev_stop, + .ndo_get_stats64 = hfi1_ipoib_dev_get_stats64, +}; + +static int hfi1_ipoib_send(struct net_device *dev, + struct sk_buff *skb, + struct ib_ah *address, + u32 dqpn) +{ + return hfi1_ipoib_send_dma(dev, skb, address, dqpn); +} + +static int hfi1_ipoib_mcast_attach(struct net_device *dev, + struct ib_device *device, + union ib_gid *mgid, + u16 mlid, + int set_qkey, + u32 qkey) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + u32 qpn = (u32)qpn_from_mac(priv->netdev->dev_addr); + struct hfi1_ibport *ibp = to_iport(priv->device, priv->port_num); + struct rvt_qp *qp; + int ret = -EINVAL; + + rcu_read_lock(); + + qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn); + if (qp) { + rvt_get_qp(qp); + rcu_read_unlock(); + if (set_qkey) + priv->qkey = qkey; + + /* attach QP to multicast group */ + ret = ib_attach_mcast(&qp->ibqp, mgid, mlid); + rvt_put_qp(qp); + } else { + rcu_read_unlock(); + } + + return ret; +} + +static int hfi1_ipoib_mcast_detach(struct net_device *dev, + struct ib_device *device, + union ib_gid *mgid, + u16 mlid) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + u32 qpn = (u32)qpn_from_mac(priv->netdev->dev_addr); + struct hfi1_ibport *ibp = to_iport(priv->device, priv->port_num); + struct rvt_qp *qp; + int ret = -EINVAL; + + rcu_read_lock(); + + qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn); + if (qp) { + rvt_get_qp(qp); + rcu_read_unlock(); + ret = ib_detach_mcast(&qp->ibqp, mgid, mlid); + rvt_put_qp(qp); + } else { + rcu_read_unlock(); + } + return ret; +} + +static void hfi1_ipoib_netdev_dtor(struct net_device *dev) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + + hfi1_ipoib_txreq_deinit(priv); + + free_percpu(priv->netstats); +} + +static void hfi1_ipoib_free_rdma_netdev(struct net_device *dev) +{ + hfi1_ipoib_netdev_dtor(dev); + free_netdev(dev); +} + +static void hfi1_ipoib_set_id(struct net_device *dev, int id) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + + priv->pkey_index = (u16)id; + ib_query_pkey(priv->device, + priv->port_num, + priv->pkey_index, + &priv->pkey); +} + +static int hfi1_ipoib_setup_rn(struct ib_device *device, + u8 port_num, + struct net_device *netdev, + void *param) +{ + struct hfi1_devdata *dd = dd_from_ibdev(device); + struct rdma_netdev *rn = netdev_priv(netdev); + struct hfi1_ipoib_dev_priv *priv; + int rc; + + rn->send = hfi1_ipoib_send; + rn->attach_mcast = hfi1_ipoib_mcast_attach; + rn->detach_mcast = hfi1_ipoib_mcast_detach; + rn->set_id = hfi1_ipoib_set_id; + rn->hca = device; + rn->port_num = port_num; + rn->mtu = netdev->mtu; + + priv = hfi1_ipoib_priv(netdev); + priv->dd = dd; + priv->netdev = netdev; + priv->device = device; + priv->port_num = port_num; + priv->netdev_ops = netdev->netdev_ops; + + netdev->netdev_ops = &hfi1_ipoib_netdev_ops; + + ib_query_pkey(device, port_num, priv->pkey_index, &priv->pkey); + + rc = hfi1_ipoib_txreq_init(priv); + if (rc) { + dd_dev_err(dd, "IPoIB netdev TX init - failed(%d)\n", rc); + hfi1_ipoib_free_rdma_netdev(netdev); + return rc; + } + + netdev->priv_destructor = hfi1_ipoib_netdev_dtor; + netdev->needs_free_netdev = true; + + return 0; +} + +int hfi1_ipoib_rn_get_params(struct ib_device *device, + u8 port_num, + enum rdma_netdev_t type, + struct rdma_netdev_alloc_params *params) +{ + struct hfi1_devdata *dd = dd_from_ibdev(device); + + if (type != RDMA_NETDEV_IPOIB) + return -EOPNOTSUPP; + + if (!HFI1_CAP_IS_KSET(AIP)) + return -EOPNOTSUPP; + + if (!port_num || port_num > dd->num_pports) + return -EINVAL; + + params->sizeof_priv = sizeof(struct hfi1_ipoib_rdma_netdev); + params->txqs = dd->num_sdma; + params->param = NULL; + params->initialize_rdma_netdev = hfi1_ipoib_setup_rn; + + return 0; +} From 84e3b19a27f8f37c8cf98f8b7cdf3f8674bf8e97 Mon Sep 17 00:00:00 2001 From: Gary Leshner Date: Mon, 11 May 2020 12:06:00 -0400 Subject: [PATCH 431/562] IB/hfi1: Remove module parameter for KDETH qpns The module parameter for KDETH qpns is being removed in favor of always using the default value of 0x80 as the qpn prefix. Defines have been added for various KDETH values including the prefix of 0x80. The reserved range now starts at the base value for KDETH qpns (0x80) and extends up to and including the last qpn for other reserved QP prefixed types. Adjust other QP prefixed define names to match KDETH defined names. Link: https://lore.kernel.org/r/20200511160600.173205.27508.stgit@awfm-01.aw.intel.com Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Gary Leshner Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 19 +++--------------- drivers/infiniband/hw/hfi1/common.h | 7 ------- drivers/infiniband/hw/hfi1/file_ops.c | 4 ++-- drivers/infiniband/hw/hfi1/hfi.h | 3 +-- drivers/infiniband/hw/hfi1/tid_rdma.c | 4 ++-- drivers/infiniband/hw/hfi1/verbs.c | 7 +++---- include/rdma/rdmavt_qp.h | 29 ++++++++++++++++++++++++++- 7 files changed, 39 insertions(+), 34 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index e0b1238d31df..c08bf813d6fa 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -67,10 +67,6 @@ #include "debugfs.h" #include "fault.h" -uint kdeth_qp; -module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO); -MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix"); - uint num_vls = HFI1_MAX_VLS_SUPPORTED; module_param(num_vls, uint, S_IRUGO); MODULE_PARM_DESC(num_vls, "Set number of Virtual Lanes to use (1-8)"); @@ -14119,21 +14115,12 @@ static void init_early_variables(struct hfi1_devdata *dd) static void init_kdeth_qp(struct hfi1_devdata *dd) { - /* user changed the KDETH_QP */ - if (kdeth_qp != 0 && kdeth_qp >= 0xff) { - /* out of range or illegal value */ - dd_dev_err(dd, "Invalid KDETH queue pair prefix, ignoring"); - kdeth_qp = 0; - } - if (kdeth_qp == 0) /* not set, or failed range check */ - kdeth_qp = DEFAULT_KDETH_QP; - write_csr(dd, SEND_BTH_QP, - (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK) << + (RVT_KDETH_QP_PREFIX & SEND_BTH_QP_KDETH_QP_MASK) << SEND_BTH_QP_KDETH_QP_SHIFT); write_csr(dd, RCV_BTH_QP, - (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK) << + (RVT_KDETH_QP_PREFIX & RCV_BTH_QP_KDETH_QP_MASK) << RCV_BTH_QP_KDETH_QP_SHIFT); } diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 1f7107e35a43..606254513640 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -72,13 +72,6 @@ * compilation unit */ -/* - * If a packet's QP[23:16] bits match this value, then it is - * a PSM packet and the hardware will expect a KDETH header - * following the BTH. - */ -#define DEFAULT_KDETH_QP 0x80 - /* driver/hw feature set bitmask */ #define HFI1_CAP_USER_SHIFT 24 #define HFI1_CAP_MASK ((1UL << HFI1_CAP_USER_SHIFT) - 1) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index e7fdd70c6e78..8ca51e43cf53 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015-2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -1264,7 +1264,7 @@ static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) memset(&binfo, 0, sizeof(binfo)); binfo.hw_version = dd->revision; binfo.sw_version = HFI1_KERN_SWVERSION; - binfo.bthqp = kdeth_qp; + binfo.bthqp = RVT_KDETH_QP_PREFIX; binfo.jkey = uctxt->jkey; /* * If more than 64 contexts are enabled the allocated credit diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index b06c2594105a..ed13051d38da 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1,7 +1,7 @@ #ifndef _HFI1_KERNEL_H #define _HFI1_KERNEL_H /* - * Copyright(c) 2015-2018 Intel Corporation. + * Copyright(c) 2015-2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -2250,7 +2250,6 @@ extern int num_user_contexts; extern unsigned long n_krcvqs; extern uint krcvqs[]; extern int krcvqsset; -extern uint kdeth_qp; extern uint loopback; extern uint quick_linkup; extern uint rcv_intr_timeout; diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 8a2e0d9351e9..243b4ba0b6f6 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* - * Copyright(c) 2018 Intel Corporation. + * Copyright(c) 2018 - 2020 Intel Corporation. * */ @@ -194,7 +194,7 @@ void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p) { struct hfi1_qp_priv *priv = qp->priv; - p->qp = (kdeth_qp << 16) | priv->rcd->ctxt; + p->qp = (RVT_KDETH_QP_PREFIX << 16) | priv->rcd->ctxt; p->max_len = TID_RDMA_MAX_SEGMENT_SIZE; p->jkey = priv->rcd->jkey; p->max_read = TID_RDMA_MAX_READ_SEGS_PER_REQ; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 2f6323ad9c59..c1c6fa986cd1 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -1863,9 +1863,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.dparms.qpn_start = 0; dd->verbs_dev.rdi.dparms.qpn_inc = 1; dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift; - dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16; - dd->verbs_dev.rdi.dparms.qpn_res_end = - dd->verbs_dev.rdi.dparms.qpn_res_start + 65535; + dd->verbs_dev.rdi.dparms.qpn_res_start = RVT_KDETH_QP_BASE; + dd->verbs_dev.rdi.dparms.qpn_res_end = RVT_AIP_QP_MAX; dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC; dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK; dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 982bf2340840..c4369a6c2951 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -2,7 +2,7 @@ #define DEF_RDMAVT_INCQP_H /* - * Copyright(c) 2016 - 2019 Intel Corporation. + * Copyright(c) 2016 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -68,6 +68,33 @@ #define RVT_R_RSP_SEND 0x08 #define RVT_R_COMM_EST 0x10 +/* + * If a packet's QP[23:16] bits match this value, then it is + * a PSM packet and the hardware will expect a KDETH header + * following the BTH. + */ +#define RVT_KDETH_QP_PREFIX 0x80 +#define RVT_KDETH_QP_SUFFIX 0xffff +#define RVT_KDETH_QP_PREFIX_MASK 0x00ff0000 +#define RVT_KDETH_QP_PREFIX_SHIFT 16 +#define RVT_KDETH_QP_BASE (u32)(RVT_KDETH_QP_PREFIX << \ + RVT_KDETH_QP_PREFIX_SHIFT) +#define RVT_KDETH_QP_MAX (u32)(RVT_KDETH_QP_BASE + RVT_KDETH_QP_SUFFIX) + +/* + * If a packet's LNH == BTH and DEST QPN[23:16] in the BTH match this + * prefix value, then it is an AIP packet with a DETH containing the entropy + * value in byte 4 following the BTH. + */ +#define RVT_AIP_QP_PREFIX 0x81 +#define RVT_AIP_QP_SUFFIX 0xffff +#define RVT_AIP_QP_PREFIX_MASK 0x00ff0000 +#define RVT_AIP_QP_PREFIX_SHIFT 16 +#define RVT_AIP_QP_BASE (u32)(RVT_AIP_QP_PREFIX << \ + RVT_AIP_QP_PREFIX_SHIFT) +#define RVT_AIP_QPN_MAX BIT(RVT_AIP_QP_PREFIX_SHIFT) +#define RVT_AIP_QP_MAX (u32)(RVT_AIP_QP_BASE + RVT_AIP_QPN_MAX - 1) + /* * Bit definitions for s_flags. * From 7f90a5a069f8dff9c76505b9853f95667d117c15 Mon Sep 17 00:00:00 2001 From: Gary Leshner Date: Mon, 11 May 2020 12:06:07 -0400 Subject: [PATCH 432/562] IB/{rdmavt, hfi1}: Implement creation of accelerated UD QPs Adds capability to create a qpn to be recognized as an accelerated UD QP for ipoib. This is accomplished by reserving 0x81 in byte[0] of the qpn as the prefix for these qp types and reserving qpns between 0x810000 and 0x81ffff. The hfi1 capability mask already contained a flag for the VNIC netdev. This has been renamed and extended to include both VNIC and ipoib. The rvt code to allocate qps now recognizes this flag and sets 0x81 into byte[0] of the qpn. The code to allocate qpns is modified to reset the qpn numbering when it is detected that a value is located in byte[0] for a UD QP and it is a qpn being requested for net dev use. If it is a regular UD QP then it is allowable to have bits set in byte[0] of the qpn and provide the previously normal behavior. The code to free the qpn now checks for the AIP prefix value of 0x81 and removes it from the qpn before being freed so that the lower 16 bit number can be reused. This patch requires minor changes in the IB core and ipoib to facilitate the creation of accelerated UP QPs. Link: https://lore.kernel.org/r/20200511160607.173205.11757.stgit@awfm-01.aw.intel.com Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Gary Leshner Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/verbs.c | 2 +- drivers/infiniband/sw/rdmavt/qp.c | 24 +++++++++++++++++----- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 3 +++ include/rdma/ib_verbs.h | 4 ++-- include/rdma/opa_vnic.h | 4 ++-- 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index c1c6fa986cd1..c61b2916d420 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1342,7 +1342,7 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd) IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE | IB_DEVICE_MEM_MGT_EXTENSIONS | - IB_DEVICE_RDMA_NETDEV_OPA_VNIC; + IB_DEVICE_RDMA_NETDEV_OPA; rdi->dparms.props.page_size_cap = PAGE_SIZE; rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; rdi->dparms.props.vendor_part_id = dd->pcidev->device; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 0e1b291d2cec..91ad6c571080 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 - 2019 Intel Corporation. + * Copyright(c) 2016 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -525,15 +525,18 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, * @rdi: rvt device info structure * @qpt: queue pair number table pointer * @port_num: IB port number, 1 based, comes from core + * @exclude_prefix: prefix of special queue pair number being allocated * * Return: The queue pair number */ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port_num) + enum ib_qp_type type, u8 port_num, u8 exclude_prefix) { u32 i, offset, max_scan, qpn; struct rvt_qpn_map *map; u32 ret; + u32 max_qpn = exclude_prefix == RVT_AIP_QP_PREFIX ? + RVT_AIP_QPN_MAX : RVT_QPN_MAX; if (rdi->driver_f.alloc_qpn) return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num); @@ -553,7 +556,7 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, } qpn = qpt->last + qpt->incr; - if (qpn >= RVT_QPN_MAX) + if (qpn >= max_qpn) qpn = qpt->incr | ((qpt->last & 1) ^ 1); /* offset carries bit 0 */ offset = qpn & RVT_BITS_PER_PAGE_MASK; @@ -987,6 +990,9 @@ static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) { struct rvt_qpn_map *map; + if ((qpn & RVT_AIP_QP_PREFIX_MASK) == RVT_AIP_QP_BASE) + qpn &= RVT_AIP_QP_SUFFIX; + map = qpt->map + (qpn & RVT_QPN_MASK) / RVT_BITS_PER_PAGE; if (map->page) clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); @@ -1074,13 +1080,15 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); void *priv = NULL; size_t sqsize; + u8 exclude_prefix = 0; if (!rdi) return ERR_PTR(-EINVAL); if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge || init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || - init_attr->create_flags) + (init_attr->create_flags && + init_attr->create_flags != IB_QP_CREATE_NETDEV_USE)) return ERR_PTR(-EINVAL); /* Check receive queue parameters if no SRQ is specified. */ @@ -1199,14 +1207,20 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, goto bail_driver_priv; } + if (init_attr->create_flags & IB_QP_CREATE_NETDEV_USE) + exclude_prefix = RVT_AIP_QP_PREFIX; + err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, init_attr->qp_type, - init_attr->port_num); + init_attr->port_num, + exclude_prefix); if (err < 0) { ret = ERR_PTR(err); goto bail_rq_wq; } qp->ibqp.qp_num = err; + if (init_attr->create_flags & IB_QP_CREATE_NETDEV_USE) + qp->ibqp.qp_num |= RVT_AIP_QP_BASE; qp->port_num = init_attr->port_num; rvt_init_qp(rdi, qp, init_attr->qp_type); if (rdi->driver_f.qp_priv_init) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index b69304d28f06..587252fd6f57 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -206,6 +206,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING) init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; + if (priv->hca_caps & IB_DEVICE_RDMA_NETDEV_OPA) + init_attr.create_flags |= IB_QP_CREATE_NETDEV_USE; + priv->qp = ib_create_qp(priv->pd, &init_attr); if (IS_ERR(priv->qp)) { pr_warn("%s: failed to create QP\n", ca->name); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 029541a8faeb..6278e4e040fc 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -305,7 +305,7 @@ enum ib_device_cap_flags { IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33), /* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */ IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34), - IB_DEVICE_RDMA_NETDEV_OPA_VNIC = (1ULL << 35), + IB_DEVICE_RDMA_NETDEV_OPA = (1ULL << 35), /* The device supports padding incoming writes to cacheline. */ IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36), IB_DEVICE_ALLOW_USER_UNREG = (1ULL << 37), @@ -1117,7 +1117,7 @@ enum ib_qp_create_flags { IB_QP_CREATE_MANAGED_RECV = 1 << 4, IB_QP_CREATE_NETIF_QP = 1 << 5, IB_QP_CREATE_INTEGRITY_EN = 1 << 6, - /* FREE = 1 << 7, */ + IB_QP_CREATE_NETDEV_USE = 1 << 7, IB_QP_CREATE_SCATTER_FCS = 1 << 8, IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9, IB_QP_CREATE_SOURCE_QPN = 1 << 10, diff --git a/include/rdma/opa_vnic.h b/include/rdma/opa_vnic.h index e90b149fe92a..6f244e759b4f 100644 --- a/include/rdma/opa_vnic.h +++ b/include/rdma/opa_vnic.h @@ -1,7 +1,7 @@ #ifndef _OPA_VNIC_H #define _OPA_VNIC_H /* - * Copyright(c) 2017 Intel Corporation. + * Copyright(c) 2017 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -132,7 +132,7 @@ struct opa_vnic_stats { static inline bool rdma_cap_opa_vnic(struct ib_device *device) { return !!(device->attrs.device_cap_flags & - IB_DEVICE_RDMA_NETDEV_OPA_VNIC); + IB_DEVICE_RDMA_NETDEV_OPA); } #endif /* _OPA_VNIC_H */ From 19d8b90a509f7fd9a3224cca6df160a413a4d521 Mon Sep 17 00:00:00 2001 From: Grzegorz Andrejczuk Date: Mon, 11 May 2020 12:06:12 -0400 Subject: [PATCH 433/562] IB/hfi1: RSM rules for AIP This is implementation of RSM rule for AIP packets. AIP rule will use rule RSM2 and will match standard Infiniband packet containg BTH (LNH==BTH) and having Dest QPN prefixed with value 0x81. Spread between receive contexts will be done using source QPN bits. VNIC and AIP will share receive contexts, so their rules will point to the same RMT entries and their shared code is moved to separate functions. If any of the rules is active RMT mapping will be skipped for latter. Changed function hfi1_vnic_is_rsm_full to be more general and moved it from main header to chip.c. Changed the order of RSM rules because AIP rule as more specific one is needed to be placed before more general QOS rule. Rules are occupying two last RSM registers. Link: https://lore.kernel.org/r/20200511160612.173205.73002.stgit@awfm-01.aw.intel.com Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Grzegorz Andrejczuk Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 169 ++++++++++++++++++++++-------- drivers/infiniband/hw/hfi1/chip.h | 4 +- drivers/infiniband/hw/hfi1/hfi.h | 8 +- drivers/infiniband/hw/hfi1/init.c | 3 +- 4 files changed, 135 insertions(+), 49 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index c08bf813d6fa..be1fb29a1f85 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -124,13 +124,15 @@ struct flag_table { /* * RSM instance allocation - * 0 - Verbs - * 1 - User Fecn Handling - * 2 - Vnic + * 0 - User Fecn Handling + * 1 - Vnic + * 2 - AIP + * 3 - Verbs */ -#define RSM_INS_VERBS 0 -#define RSM_INS_FECN 1 -#define RSM_INS_VNIC 2 +#define RSM_INS_FECN 0 +#define RSM_INS_VNIC 1 +#define RSM_INS_AIP 2 +#define RSM_INS_VERBS 3 /* Bit offset into the GUID which carries HFI id information */ #define GUID_HFI_INDEX_SHIFT 39 @@ -171,6 +173,25 @@ struct flag_table { /* QPN[m+n:1] QW 1, OFFSET 1 */ #define QPN_SELECT_OFFSET ((1ull << QW_SHIFT) | (1ull)) +/* RSM fields for AIP */ +/* LRH.BTH above is reused for this rule */ + +/* BTH.DESTQP: QW 1, OFFSET 16 for match */ +#define BTH_DESTQP_QW 1ull +#define BTH_DESTQP_BIT_OFFSET 16ull +#define BTH_DESTQP_OFFSET(off) ((BTH_DESTQP_QW << QW_SHIFT) | (off)) +#define BTH_DESTQP_MATCH_OFFSET BTH_DESTQP_OFFSET(BTH_DESTQP_BIT_OFFSET) +#define BTH_DESTQP_MASK 0xFFull +#define BTH_DESTQP_VALUE 0x81ull + +/* DETH.SQPN: QW 1 Offset 56 for select */ +/* We use 8 most significant Soure QPN bits as entropy fpr AIP */ +#define DETH_AIP_SQPN_QW 3ull +#define DETH_AIP_SQPN_BIT_OFFSET 56ull +#define DETH_AIP_SQPN_OFFSET(off) ((DETH_AIP_SQPN_QW << QW_SHIFT) | (off)) +#define DETH_AIP_SQPN_SELECT_OFFSET \ + DETH_AIP_SQPN_OFFSET(DETH_AIP_SQPN_BIT_OFFSET) + /* RSM fields for Vnic */ /* L2_TYPE: QW 0, OFFSET 61 - for match */ #define L2_TYPE_QW 0ull @@ -14236,6 +14257,12 @@ static void complete_rsm_map_table(struct hfi1_devdata *dd, } } +/* Is a receive side mapping rule */ +static bool has_rsm_rule(struct hfi1_devdata *dd, u8 rule_index) +{ + return read_csr(dd, RCV_RSM_CFG + (8 * rule_index)) != 0; +} + /* * Add a receive side mapping rule. */ @@ -14472,39 +14499,49 @@ static void init_fecn_handling(struct hfi1_devdata *dd, rmt->used += total_cnt; } -/* Initialize RSM for VNIC */ -void hfi1_init_vnic_rsm(struct hfi1_devdata *dd) +static inline bool hfi1_is_rmt_full(int start, int spare) +{ + return (start + spare) > NUM_MAP_ENTRIES; +} + +static bool hfi1_netdev_update_rmt(struct hfi1_devdata *dd) { u8 i, j; u8 ctx_id = 0; u64 reg; u32 regoff; - struct rsm_rule_data rrd; + int rmt_start = dd->vnic.rmt_start; - if (hfi1_vnic_is_rsm_full(dd, NUM_VNIC_MAP_ENTRIES)) { - dd_dev_err(dd, "Vnic RSM disabled, rmt entries used = %d\n", - dd->vnic.rmt_start); - return; + /* We already have contexts mapped in RMT */ + if (has_rsm_rule(dd, RSM_INS_VNIC) || has_rsm_rule(dd, RSM_INS_AIP)) { + dd_dev_info(dd, "Contexts are already mapped in RMT\n"); + return true; } - dev_dbg(&(dd)->pcidev->dev, "Vnic rsm start = %d, end %d\n", - dd->vnic.rmt_start, - dd->vnic.rmt_start + NUM_VNIC_MAP_ENTRIES); + if (hfi1_is_rmt_full(rmt_start, NUM_VNIC_MAP_ENTRIES)) { + dd_dev_err(dd, "Not enought RMT entries used = %d\n", + rmt_start); + return false; + } + + dev_dbg(&(dd)->pcidev->dev, "RMT start = %d, end %d\n", + rmt_start, + rmt_start + NUM_VNIC_MAP_ENTRIES); /* Update RSM mapping table, 32 regs, 256 entries - 1 ctx per byte */ - regoff = RCV_RSM_MAP_TABLE + (dd->vnic.rmt_start / 8) * 8; + regoff = RCV_RSM_MAP_TABLE + (rmt_start / 8) * 8; reg = read_csr(dd, regoff); for (i = 0; i < NUM_VNIC_MAP_ENTRIES; i++) { - /* Update map register with vnic context */ - j = (dd->vnic.rmt_start + i) % 8; + /* Update map register with netdev context */ + j = (rmt_start + i) % 8; reg &= ~(0xffllu << (j * 8)); reg |= (u64)dd->vnic.ctxt[ctx_id++]->ctxt << (j * 8); - /* Wrap up vnic ctx index */ + /* Wrap up netdev ctx index */ ctx_id %= dd->vnic.num_ctxt; /* Write back map register */ if (j == 7 || ((i + 1) == NUM_VNIC_MAP_ENTRIES)) { dev_dbg(&(dd)->pcidev->dev, - "Vnic rsm map reg[%d] =0x%llx\n", + "RMT[%d] =0x%llx\n", regoff - RCV_RSM_MAP_TABLE, reg); write_csr(dd, regoff, reg); @@ -14514,35 +14551,83 @@ void hfi1_init_vnic_rsm(struct hfi1_devdata *dd) } } - /* Add rule for vnic */ - rrd.offset = dd->vnic.rmt_start; - rrd.pkt_type = 4; - /* Match 16B packets */ - rrd.field1_off = L2_TYPE_MATCH_OFFSET; - rrd.mask1 = L2_TYPE_MASK; - rrd.value1 = L2_16B_VALUE; - /* Match ETH L4 packets */ - rrd.field2_off = L4_TYPE_MATCH_OFFSET; - rrd.mask2 = L4_16B_TYPE_MASK; - rrd.value2 = L4_16B_ETH_VALUE; - /* Calc context from veswid and entropy */ - rrd.index1_off = L4_16B_HDR_VESWID_OFFSET; - rrd.index1_width = ilog2(NUM_VNIC_MAP_ENTRIES); - rrd.index2_off = L2_16B_ENTROPY_OFFSET; - rrd.index2_width = ilog2(NUM_VNIC_MAP_ENTRIES); - add_rsm_rule(dd, RSM_INS_VNIC, &rrd); + return true; +} - /* Enable RSM if not already enabled */ +static void hfi1_enable_rsm_rule(struct hfi1_devdata *dd, + int rule, struct rsm_rule_data *rrd) +{ + if (!hfi1_netdev_update_rmt(dd)) { + dd_dev_err(dd, "Failed to update RMT for RSM%d rule\n", rule); + return; + } + + add_rsm_rule(dd, rule, rrd); add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK); } +void hfi1_init_aip_rsm(struct hfi1_devdata *dd) +{ + /* + * go through with the initialisation only if this rule actually doesn't + * exist yet + */ + if (atomic_fetch_inc(&dd->ipoib_rsm_usr_num) == 0) { + struct rsm_rule_data rrd = { + .offset = dd->vnic.rmt_start, + .pkt_type = IB_PACKET_TYPE, + .field1_off = LRH_BTH_MATCH_OFFSET, + .mask1 = LRH_BTH_MASK, + .value1 = LRH_BTH_VALUE, + .field2_off = BTH_DESTQP_MATCH_OFFSET, + .mask2 = BTH_DESTQP_MASK, + .value2 = BTH_DESTQP_VALUE, + .index1_off = DETH_AIP_SQPN_SELECT_OFFSET + + ilog2(NUM_VNIC_MAP_ENTRIES), + .index1_width = ilog2(NUM_VNIC_MAP_ENTRIES), + .index2_off = DETH_AIP_SQPN_SELECT_OFFSET, + .index2_width = ilog2(NUM_VNIC_MAP_ENTRIES) + }; + + hfi1_enable_rsm_rule(dd, RSM_INS_AIP, &rrd); + } +} + +/* Initialize RSM for VNIC */ +void hfi1_init_vnic_rsm(struct hfi1_devdata *dd) +{ + struct rsm_rule_data rrd = { + /* Add rule for vnic */ + .offset = dd->vnic.rmt_start, + .pkt_type = 4, + /* Match 16B packets */ + .field1_off = L2_TYPE_MATCH_OFFSET, + .mask1 = L2_TYPE_MASK, + .value1 = L2_16B_VALUE, + /* Match ETH L4 packets */ + .field2_off = L4_TYPE_MATCH_OFFSET, + .mask2 = L4_16B_TYPE_MASK, + .value2 = L4_16B_ETH_VALUE, + /* Calc context from veswid and entropy */ + .index1_off = L4_16B_HDR_VESWID_OFFSET, + .index1_width = ilog2(NUM_VNIC_MAP_ENTRIES), + .index2_off = L2_16B_ENTROPY_OFFSET, + .index2_width = ilog2(NUM_VNIC_MAP_ENTRIES) + }; + + hfi1_enable_rsm_rule(dd, RSM_INS_VNIC, &rrd); +} + void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd) { clear_rsm_rule(dd, RSM_INS_VNIC); +} - /* Disable RSM if used only by vnic */ - if (dd->vnic.rmt_start == 0) - clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK); +void hfi1_deinit_aip_rsm(struct hfi1_devdata *dd) +{ + /* only actually clear the rule if it's the last user asking to do so */ + if (atomic_fetch_add_unless(&dd->ipoib_rsm_usr_num, -1, 0) == 1) + clear_rsm_rule(dd, RSM_INS_AIP); } static int init_rxe(struct hfi1_devdata *dd) diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 725509261016..b10e0bf6d8c0 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1,7 +1,7 @@ #ifndef _CHIP_H #define _CHIP_H /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -1455,6 +1455,8 @@ void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr); void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr); void reset_interrupts(struct hfi1_devdata *dd); u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx); +void hfi1_init_aip_rsm(struct hfi1_devdata *dd); +void hfi1_deinit_aip_rsm(struct hfi1_devdata *dd); /* * Interrupt source table. diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index ed13051d38da..c61e56a34cb8 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1417,12 +1417,10 @@ struct hfi1_devdata { struct hfi1_vnic_data vnic; /* Lock to protect IRQ SRC register access */ spinlock_t irq_src_lock; -}; -static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare) -{ - return (dd->vnic.rmt_start + spare) > NUM_MAP_ENTRIES; -} + /* Keeps track of IPoIB RSM rule users */ + atomic_t ipoib_rsm_usr_num; +}; /* 8051 firmware version helper */ #define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c)) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 3759d9233a1c..8c6b96a660a6 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -1316,6 +1316,7 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, goto bail; } + atomic_set(&dd->ipoib_rsm_usr_num, 0); return dd; bail: From 6d72344cf6c47010cc2055a832e16c7fcdd16f82 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 11 May 2020 12:06:18 -0400 Subject: [PATCH 434/562] IB/ipoib: Increase ipoib Datagram mode MTU's upper limit Currently the ipoib UD mtu is restricted to 4K bytes. Remove this limitation so that the IPOIB module can potentially use an MTU (in UD mode) that is bounded by the MTU of the underlying device. A field is added to the ib_port_attr structure to indicate the maximum physical MTU the underlying device supports. Link: https://lore.kernel.org/r/20200511160618.173205.23053.stgit@awfm-01.aw.intel.com Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Sadanand Warrier Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/qp.c | 18 +---- drivers/infiniband/hw/hfi1/verbs.c | 2 + drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +- .../infiniband/ulp/ipoib/ipoib_multicast.c | 11 ++- include/rdma/ib_verbs.h | 77 +++++++++++++++++++ include/rdma/opa_port_info.h | 10 +-- 6 files changed, 88 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index f8e733aa3bb8..0c2ae9f7b3e8 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2019 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -186,15 +186,6 @@ static void flush_iowait(struct rvt_qp *qp) write_sequnlock_irqrestore(lock, flags); } -static inline int opa_mtu_enum_to_int(int mtu) -{ - switch (mtu) { - case OPA_MTU_8192: return 8192; - case OPA_MTU_10240: return 10240; - default: return -1; - } -} - /** * This function is what we would push to the core layer if we wanted to be a * "first class citizen". Instead we hide this here and rely on Verbs ULPs @@ -202,15 +193,10 @@ static inline int opa_mtu_enum_to_int(int mtu) */ static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) { - int val; - /* Constraining 10KB packets to 8KB packets */ if (mtu == (enum ib_mtu)OPA_MTU_10240) mtu = OPA_MTU_8192; - val = opa_mtu_enum_to_int((int)mtu); - if (val > 0) - return val; - return ib_mtu_enum_to_int(mtu); + return opa_mtu_enum_to_int((enum opa_mtu)mtu); } int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index c61b2916d420..19d5d0061b01 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1439,6 +1439,8 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, 4096 : hfi1_max_mtu), IB_MTU_4096); props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : mtu_to_enum(ppd->ibmtu, IB_MTU_4096); + props->phys_mtu = HFI1_CAP_IS_KSET(AIP) ? hfi1_max_mtu : + ib_mtu_enum_to_int(props->max_mtu); return 0; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index d4c6a97ce4c0..22216f181b24 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1855,7 +1855,7 @@ static int ipoib_parent_init(struct net_device *ndev) priv->port); return result; } - priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); + priv->max_ib_mtu = rdma_mtu_from_attr(priv->ca, priv->port, &attr); result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey); if (result) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index b9e9562f5034..7166ee9b7a25 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -218,6 +218,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, struct rdma_ah_attr av; int ret; int set_qkey = 0; + int mtu; mcast->mcmember = *mcmember; @@ -240,13 +241,11 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, priv->broadcast->mcmember.flow_label = mcmember->flow_label; priv->broadcast->mcmember.hop_limit = mcmember->hop_limit; /* assume if the admin and the mcast are the same both can be changed */ + mtu = rdma_mtu_enum_to_int(priv->ca, priv->port, + priv->broadcast->mcmember.mtu); if (priv->mcast_mtu == priv->admin_mtu) - priv->admin_mtu = - priv->mcast_mtu = - IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); - else - priv->mcast_mtu = - IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); + priv->admin_mtu = IPOIB_UD_MTU(mtu); + priv->mcast_mtu = IPOIB_UD_MTU(mtu); priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); spin_unlock_irq(&priv->lock); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6278e4e040fc..641f4751b062 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -462,6 +462,11 @@ enum ib_mtu { IB_MTU_4096 = 5 }; +enum opa_mtu { + OPA_MTU_8192 = 6, + OPA_MTU_10240 = 7 +}; + static inline int ib_mtu_enum_to_int(enum ib_mtu mtu) { switch (mtu) { @@ -488,6 +493,28 @@ static inline enum ib_mtu ib_mtu_int_to_enum(int mtu) return IB_MTU_256; } +static inline int opa_mtu_enum_to_int(enum opa_mtu mtu) +{ + switch (mtu) { + case OPA_MTU_8192: + return 8192; + case OPA_MTU_10240: + return 10240; + default: + return(ib_mtu_enum_to_int((enum ib_mtu)mtu)); + } +} + +static inline enum opa_mtu opa_mtu_int_to_enum(int mtu) +{ + if (mtu >= 10240) + return OPA_MTU_10240; + else if (mtu >= 8192) + return OPA_MTU_8192; + else + return ((enum opa_mtu)ib_mtu_int_to_enum(mtu)); +} + enum ib_port_state { IB_PORT_NOP = 0, IB_PORT_DOWN = 1, @@ -651,6 +678,7 @@ struct ib_port_attr { enum ib_port_state state; enum ib_mtu max_mtu; enum ib_mtu active_mtu; + u32 phys_mtu; int gid_tbl_len; unsigned int ip_gids:1; /* This is the value from PortInfo CapabilityMask, defined by IBA */ @@ -3364,6 +3392,55 @@ static inline unsigned int rdma_find_pg_bit(unsigned long addr, return __fls(pgsz); } +/** + * rdma_core_cap_opa_port - Return whether the RDMA Port is OPA or not. + * @device: Device + * @port_num: 1 based Port number + * + * Return true if port is an Intel OPA port , false if not + */ +static inline bool rdma_core_cap_opa_port(struct ib_device *device, + u32 port_num) +{ + return (device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_PORT_INTEL_OPA) == RDMA_CORE_PORT_INTEL_OPA; +} + +/** + * rdma_mtu_enum_to_int - Return the mtu of the port as an integer value. + * @device: Device + * @port_num: Port number + * @mtu: enum value of MTU + * + * Return the MTU size supported by the port as an integer value. Will return + * -1 if enum value of mtu is not supported. + */ +static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port, + int mtu) +{ + if (rdma_core_cap_opa_port(device, port)) + return opa_mtu_enum_to_int((enum opa_mtu)mtu); + else + return ib_mtu_enum_to_int((enum ib_mtu)mtu); +} + +/** + * rdma_mtu_from_attr - Return the mtu of the port from the port attribute. + * @device: Device + * @port_num: Port number + * @attr: port attribute + * + * Return the MTU size supported by the port as an integer value. + */ +static inline int rdma_mtu_from_attr(struct ib_device *device, u8 port, + struct ib_port_attr *attr) +{ + if (rdma_core_cap_opa_port(device, port)) + return attr->phys_mtu; + else + return ib_mtu_enum_to_int(attr->max_mtu); +} + int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, int state); int ib_get_vf_config(struct ib_device *device, int vf, u8 port, diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h index bdbfe25d3854..0d9e6d74c385 100644 --- a/include/rdma/opa_port_info.h +++ b/include/rdma/opa_port_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2017 Intel Corporation. All rights reserved. + * Copyright (c) 2014-2020 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -139,14 +139,6 @@ #define OPA_CAP_MASK3_IsVLMarkerSupported (1 << 1) #define OPA_CAP_MASK3_IsVLrSupported (1 << 0) -/** - * new MTU values - */ -enum { - OPA_MTU_8192 = 6, - OPA_MTU_10240 = 7, -}; - enum { OPA_PORT_PHYS_CONF_DISCONNECTED = 0, OPA_PORT_PHYS_CONF_STANDARD = 1, From 89dcaa366bffb9fcef39b97d08cc26d0a115ee35 Mon Sep 17 00:00:00 2001 From: Grzegorz Andrejczuk Date: Mon, 11 May 2020 12:06:25 -0400 Subject: [PATCH 435/562] IB/hfi1: Rename num_vnic_contexts as num_netdev_contexts Rename num_vnic_contexts as num_ndetdev_contexts since VNIC and ipoib will share the same set of receive contexts. Link: https://lore.kernel.org/r/20200511160625.173205.53306.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Sadanand Warrier Signed-off-by: Grzegorz Andrejczuk Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 24 ++++++++++++------------ drivers/infiniband/hw/hfi1/hfi.h | 4 ++-- drivers/infiniband/hw/hfi1/msix.c | 4 ++-- drivers/infiniband/hw/hfi1/vnic_main.c | 8 ++++---- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index be1fb29a1f85..07eec3544304 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -13347,12 +13347,12 @@ static int set_up_interrupts(struct hfi1_devdata *dd) * in array of contexts * freectxts - number of free user contexts * num_send_contexts - number of PIO send contexts being used - * num_vnic_contexts - number of contexts reserved for VNIC + * num_netdev_contexts - number of contexts reserved for netdev */ static int set_up_context_variables(struct hfi1_devdata *dd) { unsigned long num_kernel_contexts; - u16 num_vnic_contexts = HFI1_NUM_VNIC_CTXT; + u16 num_netdev_contexts = HFI1_NUM_VNIC_CTXT; int total_contexts; int ret; unsigned ngroups; @@ -13391,11 +13391,11 @@ static int set_up_context_variables(struct hfi1_devdata *dd) } /* Accommodate VNIC contexts if possible */ - if ((num_kernel_contexts + num_vnic_contexts) > rcv_contexts) { + if ((num_kernel_contexts + num_netdev_contexts) > rcv_contexts) { dd_dev_err(dd, "No receive contexts available for VNIC\n"); - num_vnic_contexts = 0; + num_netdev_contexts = 0; } - total_contexts = num_kernel_contexts + num_vnic_contexts; + total_contexts = num_kernel_contexts + num_netdev_contexts; /* * User contexts: @@ -13422,15 +13422,15 @@ static int set_up_context_variables(struct hfi1_devdata *dd) * The RMT entries are currently allocated as shown below: * 1. QOS (0 to 128 entries); * 2. FECN (num_kernel_context - 1 + num_user_contexts + - * num_vnic_contexts); - * 3. VNIC (num_vnic_contexts). - * It should be noted that FECN oversubscribe num_vnic_contexts - * entries of RMT because both VNIC and PSM could allocate any receive + * num_netdev_contexts); + * 3. netdev (num_netdev_contexts). + * It should be noted that FECN oversubscribe num_netdev_contexts + * entries of RMT because both netdev and PSM could allocate any receive * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts, * and PSM FECN must reserve an RMT entry for each possible PSM receive * context. */ - rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2); + rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_netdev_contexts * 2); if (HFI1_CAP_IS_KSET(TID_RDMA)) rmt_count += num_kernel_contexts - 1; if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) { @@ -13449,7 +13449,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) dd->num_rcv_contexts = total_contexts; dd->n_krcv_queues = num_kernel_contexts; dd->first_dyn_alloc_ctxt = num_kernel_contexts; - dd->num_vnic_contexts = num_vnic_contexts; + dd->num_netdev_contexts = num_netdev_contexts; dd->num_user_contexts = n_usr_ctxts; dd->freectxts = n_usr_ctxts; dd_dev_info(dd, @@ -13457,7 +13457,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) rcv_contexts, (int)dd->num_rcv_contexts, (int)dd->n_krcv_queues, - dd->num_vnic_contexts, + dd->num_netdev_contexts, dd->num_user_contexts); /* diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index c61e56a34cb8..5a9276c4c188 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1167,8 +1167,8 @@ struct hfi1_devdata { u64 z_send_schedule; u64 __percpu *send_schedule; - /* number of reserved contexts for VNIC usage */ - u16 num_vnic_contexts; + /* number of reserved contexts for netdev usage */ + u16 num_netdev_contexts; /* number of receive contexts in use by the driver */ u32 num_rcv_contexts; /* number of pio send contexts in use by the driver */ diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c index db82db497b2c..7574f2bc9718 100644 --- a/drivers/infiniband/hw/hfi1/msix.c +++ b/drivers/infiniband/hw/hfi1/msix.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* - * Copyright(c) 2018 Intel Corporation. + * Copyright(c) 2018 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -69,7 +69,7 @@ int msix_initialize(struct hfi1_devdata *dd) * one for each VNIC context * ...any new IRQs should be added here. */ - total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts; + total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_netdev_contexts; if (total >= CCE_NUM_MSIX_VECTORS) return -EINVAL; diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 6b14581b9965..db7624cacee1 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2017 - 2018 Intel Corporation. + * Copyright(c) 2017 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -804,7 +804,7 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, struct rdma_netdev *rn; int i, size, rc; - if (!dd->num_vnic_contexts) + if (!dd->num_netdev_contexts) return ERR_PTR(-ENOMEM); if (!port_num || (port_num > dd->num_pports)) @@ -815,7 +815,7 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, - dd->num_sdma, dd->num_vnic_contexts); + dd->num_sdma, dd->num_netdev_contexts); if (!netdev) return ERR_PTR(-ENOMEM); @@ -823,7 +823,7 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, vinfo = opa_vnic_dev_priv(netdev); vinfo->dd = dd; vinfo->num_tx_q = dd->num_sdma; - vinfo->num_rx_q = dd->num_vnic_contexts; + vinfo->num_rx_q = dd->num_netdev_contexts; vinfo->netdev = netdev; rn->free_rdma_netdev = hfi1_vnic_free_rn; rn->set_id = hfi1_vnic_set_vesw_id; From 6991abcb993cf6c0711237b9d393d4f0a2008f1f Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 11 May 2020 12:06:31 -0400 Subject: [PATCH 436/562] IB/hfi1: Add functions to receive accelerated ipoib packets Ipoib netdev will share receive contexts with existing VNIC netdev. To achieve that, a dummy netdev is allocated with hfi1_devdata to own the receive contexts, and ipoib and VNIC netdevs will be put on top of it. Each receive context is associated with a single NAPI object. This patch adds the functions to receive incoming packets for accelerated ipoib. Link: https://lore.kernel.org/r/20200511160631.173205.54184.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Sadanand Warrier Signed-off-by: Grzegorz Andrejczuk Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/Makefile | 2 + drivers/infiniband/hw/hfi1/driver.c | 92 +++++++++++++++++++++++++- drivers/infiniband/hw/hfi1/hfi.h | 5 +- drivers/infiniband/hw/hfi1/ipoib.h | 18 +++++ drivers/infiniband/hw/hfi1/ipoib_rx.c | 71 ++++++++++++++++++++ drivers/infiniband/hw/hfi1/netdev.h | 90 +++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/netdev_rx.c | 79 ++++++++++++++++++++++ 7 files changed, 355 insertions(+), 2 deletions(-) create mode 100644 drivers/infiniband/hw/hfi1/ipoib_rx.c create mode 100644 drivers/infiniband/hw/hfi1/netdev.h create mode 100644 drivers/infiniband/hw/hfi1/netdev_rx.c diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 0b2571306267..2e89ec10efed 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -23,10 +23,12 @@ hfi1-y := \ intr.o \ iowait.o \ ipoib_main.o \ + ipoib_rx.o \ ipoib_tx.o \ mad.o \ mmu_rb.o \ msix.o \ + netdev_rx.o \ opfn.o \ pcie.o \ pio.o \ diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 049d15befe58..c5ed6ed30100 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015-2018 Intel Corporation. + * Copyright(c) 2015-2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -54,6 +54,7 @@ #include #include #include +#include #include "hfi.h" #include "trace.h" @@ -63,6 +64,9 @@ #include "vnic.h" #include "fault.h" +#include "ipoib.h" +#include "netdev.h" + #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -1550,6 +1554,81 @@ void handle_eflags(struct hfi1_packet *packet) show_eflags_errs(packet); } +static void hfi1_ipoib_ib_rcv(struct hfi1_packet *packet) +{ + struct hfi1_ibport *ibp; + struct net_device *netdev; + struct hfi1_ctxtdata *rcd = packet->rcd; + struct napi_struct *napi = rcd->napi; + struct sk_buff *skb; + struct hfi1_netdev_rxq *rxq = container_of(napi, + struct hfi1_netdev_rxq, napi); + u32 extra_bytes; + u32 tlen, qpnum; + bool do_work, do_cnp; + struct hfi1_ipoib_dev_priv *priv; + + trace_hfi1_rcvhdr(packet); + + hfi1_setup_ib_header(packet); + + packet->ohdr = &((struct ib_header *)packet->hdr)->u.oth; + packet->grh = NULL; + + if (unlikely(rhf_err_flags(packet->rhf))) { + handle_eflags(packet); + return; + } + + qpnum = ib_bth_get_qpn(packet->ohdr); + netdev = hfi1_netdev_get_data(rcd->dd, qpnum); + if (!netdev) + goto drop_no_nd; + + trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf))); + + /* handle congestion notifications */ + do_work = hfi1_may_ecn(packet); + if (unlikely(do_work)) { + do_cnp = (packet->opcode != IB_OPCODE_CNP); + (void)hfi1_process_ecn_slowpath(hfi1_ipoib_priv(netdev)->qp, + packet, do_cnp); + } + + /* + * We have split point after last byte of DETH + * lets strip padding and CRC and ICRC. + * tlen is whole packet len so we need to + * subtract header size as well. + */ + tlen = packet->tlen; + extra_bytes = ib_bth_get_pad(packet->ohdr) + (SIZE_OF_CRC << 2) + + packet->hlen; + if (unlikely(tlen < extra_bytes)) + goto drop; + + tlen -= extra_bytes; + + skb = hfi1_ipoib_prepare_skb(rxq, tlen, packet->ebuf); + if (unlikely(!skb)) + goto drop; + + priv = hfi1_ipoib_priv(netdev); + hfi1_ipoib_update_rx_netstats(priv, 1, skb->len); + + skb->dev = netdev; + skb->pkt_type = PACKET_HOST; + netif_receive_skb(skb); + + return; + +drop: + ++netdev->stats.rx_dropped; +drop_no_nd: + ibp = rcd_to_iport(packet->rcd); + ++ibp->rvp.n_pkt_drops; +} + /* * The following functions are called by the interrupt handler. They are type * specific handlers for each packet type. @@ -1757,3 +1836,14 @@ const rhf_rcv_function_ptr normal_rhf_rcv_functions[] = { [RHF_RCV_TYPE_INVALID6] = process_receive_invalid, [RHF_RCV_TYPE_INVALID7] = process_receive_invalid, }; + +const rhf_rcv_function_ptr netdev_rhf_rcv_functions[] = { + [RHF_RCV_TYPE_EXPECTED] = process_receive_invalid, + [RHF_RCV_TYPE_EAGER] = process_receive_invalid, + [RHF_RCV_TYPE_IB] = hfi1_ipoib_ib_rcv, + [RHF_RCV_TYPE_ERROR] = process_receive_error, + [RHF_RCV_TYPE_BYPASS] = hfi1_vnic_bypass_rcv, + [RHF_RCV_TYPE_INVALID5] = process_receive_invalid, + [RHF_RCV_TYPE_INVALID6] = process_receive_invalid, + [RHF_RCV_TYPE_INVALID7] = process_receive_invalid, +}; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 5a9276c4c188..c7d0aad41f41 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -233,6 +233,8 @@ struct hfi1_ctxtdata { intr_handler fast_handler; /** slow handler */ intr_handler slow_handler; + /* napi pointer assiociated with netdev */ + struct napi_struct *napi; /* verbs rx_stats per rcd */ struct hfi1_opcode_stats_perctx *opstats; /* clear interrupt mask */ @@ -985,7 +987,7 @@ typedef void (*hfi1_make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct rvt_swqe *wqe); extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[]; - +extern const rhf_rcv_function_ptr netdev_rhf_rcv_functions[]; /* return values for the RHF receive functions */ #define RHF_RCV_CONTINUE 0 /* keep going */ @@ -1417,6 +1419,7 @@ struct hfi1_devdata { struct hfi1_vnic_data vnic; /* Lock to protect IRQ SRC register access */ spinlock_t irq_src_lock; + struct net_device *dummy_netdev; /* Keeps track of IPoIB RSM rule users */ atomic_t ipoib_rsm_usr_num; diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h index c2e63ca57896..ca00f6c6a90d 100644 --- a/drivers/infiniband/hw/hfi1/ipoib.h +++ b/drivers/infiniband/hw/hfi1/ipoib.h @@ -22,6 +22,7 @@ #include "hfi.h" #include "iowait.h" +#include "netdev.h" #include @@ -29,6 +30,7 @@ #define HFI1_IPOIB_TXREQ_NAME_LEN 32 +#define HFI1_IPOIB_PSEUDO_LEN 20 #define HFI1_IPOIB_ENCAP_LEN 4 struct hfi1_ipoib_dev_priv; @@ -118,6 +120,19 @@ hfi1_ipoib_priv(const struct net_device *dev) return &((struct hfi1_ipoib_rdma_netdev *)netdev_priv(dev))->dev_priv; } +static inline void +hfi1_ipoib_update_rx_netstats(struct hfi1_ipoib_dev_priv *priv, + u64 packets, + u64 bytes) +{ + struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats); + + u64_stats_update_begin(&netstats->syncp); + netstats->rx_packets += packets; + netstats->rx_bytes += bytes; + u64_stats_update_end(&netstats->syncp); +} + static inline void hfi1_ipoib_update_tx_netstats(struct hfi1_ipoib_dev_priv *priv, u64 packets, @@ -142,6 +157,9 @@ void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv); void hfi1_ipoib_napi_tx_enable(struct net_device *dev); void hfi1_ipoib_napi_tx_disable(struct net_device *dev); +struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq, + int size, void *data); + int hfi1_ipoib_rn_get_params(struct ib_device *device, u8 port_num, enum rdma_netdev_t type, diff --git a/drivers/infiniband/hw/hfi1/ipoib_rx.c b/drivers/infiniband/hw/hfi1/ipoib_rx.c new file mode 100644 index 000000000000..2485663032c7 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/ipoib_rx.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright(c) 2020 Intel Corporation. + * + */ + +#include "netdev.h" +#include "ipoib.h" + +#define HFI1_IPOIB_SKB_PAD ((NET_SKB_PAD) + (NET_IP_ALIGN)) + +static void copy_ipoib_buf(struct sk_buff *skb, void *data, int size) +{ + void *dst_data; + + skb_checksum_none_assert(skb); + skb->protocol = *((__be16 *)data); + + dst_data = skb_put(skb, size); + memcpy(dst_data, data, size); + skb->mac_header = HFI1_IPOIB_PSEUDO_LEN; + skb_pull(skb, HFI1_IPOIB_ENCAP_LEN); +} + +static struct sk_buff *prepare_frag_skb(struct napi_struct *napi, int size) +{ + struct sk_buff *skb; + int skb_size = SKB_DATA_ALIGN(size + HFI1_IPOIB_SKB_PAD); + void *frag; + + skb_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + skb_size = SKB_DATA_ALIGN(skb_size); + frag = napi_alloc_frag(skb_size); + + if (unlikely(!frag)) + return napi_alloc_skb(napi, size); + + skb = build_skb(frag, skb_size); + + if (unlikely(!skb)) { + skb_free_frag(frag); + return NULL; + } + + skb_reserve(skb, HFI1_IPOIB_SKB_PAD); + return skb; +} + +struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq, + int size, void *data) +{ + struct napi_struct *napi = &rxq->napi; + int skb_size = size + HFI1_IPOIB_ENCAP_LEN; + struct sk_buff *skb; + + /* + * For smaller(4k + skb overhead) allocations we will go using + * napi cache. Otherwise we will try to use napi frag cache. + */ + if (size <= SKB_WITH_OVERHEAD(PAGE_SIZE)) + skb = napi_alloc_skb(napi, skb_size); + else + skb = prepare_frag_skb(napi, skb_size); + + if (unlikely(!skb)) + return NULL; + + copy_ipoib_buf(skb, data, size); + + return skb; +} diff --git a/drivers/infiniband/hw/hfi1/netdev.h b/drivers/infiniband/hw/hfi1/netdev.h new file mode 100644 index 000000000000..8992dfe11e3e --- /dev/null +++ b/drivers/infiniband/hw/hfi1/netdev.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * Copyright(c) 2020 Intel Corporation. + * + */ + +#ifndef HFI1_NETDEV_H +#define HFI1_NETDEV_H + +#include "hfi.h" + +#include +#include + +/** + * struct hfi1_netdev_rxq - Receive Queue for HFI + * dummy netdev. Both IPoIB and VNIC netdevices will be working on + * top of this device. + * @napi: napi object + * @priv: ptr to netdev_priv + * @rcd: ptr to receive context data + */ +struct hfi1_netdev_rxq { + struct napi_struct napi; + struct hfi1_netdev_priv *priv; + struct hfi1_ctxtdata *rcd; +}; + +/* + * Number of netdev contexts used. Ensure it is less than or equal to + * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE). + */ +#define HFI1_MAX_NETDEV_CTXTS 8 + +/* Number of NETDEV RSM entries */ +#define NUM_NETDEV_MAP_ENTRIES HFI1_MAX_NETDEV_CTXTS + +/** + * struct hfi1_netdev_priv: data required to setup and run HFI netdev. + * @dd: hfi1_devdata + * @rxq: pointer to dummy netdev receive queues. + * @num_rx_q: number of receive queues + * @rmt_index: first free index in RMT Array + * @msix_start: first free MSI-X interrupt vector. + * @dev_tbl: netdev table for unique identifier VNIC and IPoIb VLANs. + * @enabled: atomic counter of netdevs enabling receive queues. + * When 0 NAPI will be disabled. + * @netdevs: atomic counter of netdevs using dummy netdev. + * When 0 receive queues will be freed. + */ +struct hfi1_netdev_priv { + struct hfi1_devdata *dd; + struct hfi1_netdev_rxq *rxq; + int num_rx_q; + int rmt_start; + struct xarray dev_tbl; + /* count of enabled napi polls */ + atomic_t enabled; + /* count of netdevs on top */ + atomic_t netdevs; +}; + +static inline +struct hfi1_netdev_priv *hfi1_netdev_priv(struct net_device *dev) +{ + return (struct hfi1_netdev_priv *)&dev[1]; +} + +static inline +int hfi1_netdev_ctxt_count(struct hfi1_devdata *dd) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + + return priv->num_rx_q; +} + +static inline +struct hfi1_ctxtdata *hfi1_netdev_get_ctxt(struct hfi1_devdata *dd, int ctxt) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + + return priv->rxq[ctxt].rcd; +} + +int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data); +void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id); +void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id); +void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id); + +#endif /* HFI1_NETDEV_H */ diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c new file mode 100644 index 000000000000..3e286cbaa8c6 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/netdev_rx.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright(c) 2020 Intel Corporation. + * + */ + +/* + * This file contains HFI1 support for netdev RX functionality + */ + +#include "sdma.h" +#include "verbs.h" +#include "netdev.h" +#include "hfi.h" + +#include +#include +#include + +/** + * hfi1_netdev_add_data - Registers data with unique identifier + * to be requested later this is needed for VNIC and IPoIB VLANs + * implementations. + * This call is protected by mutex idr_lock. + * + * @dd: hfi1 dev data + * @id: requested integer id up to INT_MAX + * @data: data to be associated with index + */ +int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + + return xa_insert(&priv->dev_tbl, id, data, GFP_NOWAIT); +} + +/** + * hfi1_netdev_remove_data - Removes data with previously given id. + * Returns the reference to removed entry. + * + * @dd: hfi1 dev data + * @id: requested integer id up to INT_MAX + */ +void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + + return xa_erase(&priv->dev_tbl, id); +} + +/** + * hfi1_netdev_get_data - Gets data with given id + * + * @dd: hfi1 dev data + * @id: requested integer id up to INT_MAX + */ +void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + + return xa_load(&priv->dev_tbl, id); +} + +/** + * hfi1_netdev_get_first_dat - Gets first entry with greater or equal id. + * + * @dd: hfi1 dev data + * @id: requested integer id up to INT_MAX + */ +void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + unsigned long index = *start_id; + void *ret; + + ret = xa_find(&priv->dev_tbl, &index, UINT_MAX, XA_PRESENT); + *start_id = (int)index; + return ret; +} From 0bae02d56bba6cc3836a9d8dfbbe53787af19a58 Mon Sep 17 00:00:00 2001 From: Grzegorz Andrejczuk Date: Mon, 11 May 2020 12:06:37 -0400 Subject: [PATCH 437/562] IB/hfi1: Add interrupt handler functions for accelerated ipoib This patch adds the interrupt handler function, the NAPI poll function, and its associated helper functions for receiving accelerated ipoib packets. While we are here, fix the formats of two error printouts. Link: https://lore.kernel.org/r/20200511160637.173205.64890.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Sadanand Warrier Signed-off-by: Grzegorz Andrejczuk Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/affinity.c | 12 ++- drivers/infiniband/hw/hfi1/affinity.h | 3 +- drivers/infiniband/hw/hfi1/chip.c | 44 ++++++++++ drivers/infiniband/hw/hfi1/chip.h | 1 + drivers/infiniband/hw/hfi1/driver.c | 120 ++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/hfi.h | 4 +- drivers/infiniband/hw/hfi1/init.c | 1 + drivers/infiniband/hw/hfi1/msix.c | 20 ++++- drivers/infiniband/hw/hfi1/msix.h | 5 +- drivers/infiniband/hw/hfi1/netdev.h | 3 + 10 files changed, 206 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 1aeea5d65c01..2a91b8d95e12 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -64,6 +64,7 @@ struct hfi1_affinity_node_list node_affinity = { static const char * const irq_type_names[] = { "SDMA", "RCVCTXT", + "NETDEVCTXT", "GENERAL", "OTHER", }; @@ -915,6 +916,11 @@ static int get_irq_affinity(struct hfi1_devdata *dd, set = &entry->rcv_intr; scnprintf(extra, 64, "ctxt %u", rcd->ctxt); break; + case IRQ_NETDEVCTXT: + rcd = (struct hfi1_ctxtdata *)msix->arg; + set = &entry->def_intr; + scnprintf(extra, 64, "ctxt %u", rcd->ctxt); + break; default: dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type); return -EINVAL; @@ -987,6 +993,10 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd, if (rcd->ctxt != HFI1_CTRL_CTXT) set = &entry->rcv_intr; break; + case IRQ_NETDEVCTXT: + rcd = (struct hfi1_ctxtdata *)msix->arg; + set = &entry->def_intr; + break; default: mutex_unlock(&node_affinity.lock); return; diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index 6a7e6ea4e426..f94ed5d7c7a3 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -52,6 +52,7 @@ enum irq_type { IRQ_SDMA, IRQ_RCVCTXT, + IRQ_NETDEVCTXT, IRQ_GENERAL, IRQ_OTHER }; diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 07eec3544304..2117612c61b2 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -66,6 +66,7 @@ #include "affinity.h" #include "debugfs.h" #include "fault.h" +#include "netdev.h" uint num_vls = HFI1_MAX_VLS_SUPPORTED; module_param(num_vls, uint, S_IRUGO); @@ -8480,6 +8481,49 @@ static void hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd) local_irq_restore(flags); } +/** + * hfi1_netdev_rx_napi - napi poll function to move eoi inline + * @napi - pointer to napi object + * @budget - netdev budget + */ +int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget) +{ + struct hfi1_netdev_rxq *rxq = container_of(napi, + struct hfi1_netdev_rxq, napi); + struct hfi1_ctxtdata *rcd = rxq->rcd; + int work_done = 0; + + work_done = rcd->do_interrupt(rcd, budget); + + if (work_done < budget) { + napi_complete_done(napi, work_done); + hfi1_rcd_eoi_intr(rcd); + } + + return work_done; +} + +/* Receive packet napi handler for netdevs VNIC and AIP */ +irqreturn_t receive_context_interrupt_napi(int irq, void *data) +{ + struct hfi1_ctxtdata *rcd = data; + + receive_interrupt_common(rcd); + + if (likely(rcd->napi)) { + if (likely(napi_schedule_prep(rcd->napi))) + __napi_schedule_irqoff(rcd->napi); + else + __hfi1_rcd_eoi_intr(rcd); + } else { + WARN_ONCE(1, "Napi IRQ handler without napi set up ctxt=%d\n", + rcd->ctxt); + __hfi1_rcd_eoi_intr(rcd); + } + + return IRQ_HANDLED; +} + /* * Receive packet IRQ handler. This routine expects to be on its own IRQ. * This routine will try to handle packets immediately (latency), but if diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index b10e0bf6d8c0..2c6f2de74d4d 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1447,6 +1447,7 @@ irqreturn_t general_interrupt(int irq, void *data); irqreturn_t sdma_interrupt(int irq, void *data); irqreturn_t receive_context_interrupt(int irq, void *data); irqreturn_t receive_context_thread(int irq, void *data); +irqreturn_t receive_context_interrupt_napi(int irq, void *data); int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set); void init_qsfp_int(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index c5ed6ed30100..d89fc8fdff6a 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -752,6 +752,39 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread) return ret; } +static void process_rcv_packet_napi(struct hfi1_packet *packet) +{ + packet->etype = rhf_rcv_type(packet->rhf); + + /* total length */ + packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */ + /* retrieve eager buffer details */ + packet->etail = rhf_egr_index(packet->rhf); + packet->ebuf = get_egrbuf(packet->rcd, packet->rhf, + &packet->updegr); + /* + * Prefetch the contents of the eager buffer. It is + * OK to send a negative length to prefetch_range(). + * The +2 is the size of the RHF. + */ + prefetch_range(packet->ebuf, + packet->tlen - ((packet->rcd->rcvhdrqentsize - + (rhf_hdrq_offset(packet->rhf) + + 2)) * 4)); + + packet->rcd->rhf_rcv_function_map[packet->etype](packet); + packet->numpkt++; + + /* Set up for the next packet */ + packet->rhqoff += packet->rsize; + if (packet->rhqoff >= packet->maxcnt) + packet->rhqoff = 0; + + packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff + + packet->rcd->rhf_offset; + packet->rhf = rhf_to_cpu(packet->rhf_addr); +} + static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) { int ret; @@ -830,6 +863,36 @@ static inline void finish_packet(struct hfi1_packet *packet) packet->etail, rcv_intr_dynamic, packet->numpkt); } +/* + * handle_receive_interrupt_napi_fp - receive a packet + * @rcd: the context + * @budget: polling budget + * + * Called from interrupt handler for receive interrupt. + * This is the fast path interrupt handler + * when executing napi soft irq environment. + */ +int handle_receive_interrupt_napi_fp(struct hfi1_ctxtdata *rcd, int budget) +{ + struct hfi1_packet packet; + + init_packet(rcd, &packet); + if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) + goto bail; + + while (packet.numpkt < budget) { + process_rcv_packet_napi(&packet); + if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf))) + break; + + process_rcv_update(0, &packet); + } + hfi1_set_rcd_head(rcd, packet.rhqoff); +bail: + finish_packet(&packet); + return packet.numpkt; +} + /* * Handle receive interrupts when using the no dma rtail option. */ @@ -1077,6 +1140,63 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) return last; } +/* + * handle_receive_interrupt_napi_sp - receive a packet + * @rcd: the context + * @budget: polling budget + * + * Called from interrupt handler for errors or receive interrupt. + * This is the slow path interrupt handler + * when executing napi soft irq environment. + */ +int handle_receive_interrupt_napi_sp(struct hfi1_ctxtdata *rcd, int budget) +{ + struct hfi1_devdata *dd = rcd->dd; + int last = RCV_PKT_OK; + bool needset = true; + struct hfi1_packet packet; + + init_packet(rcd, &packet); + if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) + goto bail; + + while (last != RCV_PKT_DONE && packet.numpkt < budget) { + if (hfi1_need_drop(dd)) { + /* On to the next packet */ + packet.rhqoff += packet.rsize; + packet.rhf_addr = (__le32 *)rcd->rcvhdrq + + packet.rhqoff + + rcd->rhf_offset; + packet.rhf = rhf_to_cpu(packet.rhf_addr); + + } else { + if (set_armed_to_active(&packet)) + goto bail; + process_rcv_packet_napi(&packet); + } + + if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf))) + last = RCV_PKT_DONE; + + if (needset) { + needset = false; + set_all_fastpath(dd, rcd); + } + + process_rcv_update(last, &packet); + } + + hfi1_set_rcd_head(rcd, packet.rhqoff); + +bail: + /* + * Always write head at end, and setup rcv interrupt, even + * if no packets were processed. + */ + finish_packet(&packet); + return packet.numpkt; +} + /* * We may discover in the interrupt that the hardware link state has * changed from ARMED to ACTIVE (due to the arrival of a non-SC15 packet), diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index c7d0aad41f41..986d8c3dc430 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -385,11 +385,11 @@ struct hfi1_packet { u32 rhqoff; u32 dlid; u32 slid; + int numpkt; u16 tlen; s16 etail; u16 pkey; u8 hlen; - u8 numpkt; u8 rsize; u8 updegr; u8 etype; @@ -1501,6 +1501,8 @@ struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt); int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread); +int handle_receive_interrupt_napi_fp(struct hfi1_ctxtdata *rcd, int budget); +int handle_receive_interrupt_napi_sp(struct hfi1_ctxtdata *rcd, int budget); void set_all_slowpath(struct hfi1_devdata *dd); extern const struct pci_device_id hfi1_pci_tbl[]; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 8c6b96a660a6..64279d04370d 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -374,6 +374,7 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, rcd->numa_id = numa; rcd->rcv_array_groups = dd->rcv_entries.ngroups; rcd->rhf_rcv_function_map = normal_rhf_rcv_functions; + rcd->msix_intr = CCE_NUM_MSIX_VECTORS; mutex_init(&rcd->exp_mutex); spin_lock_init(&rcd->exp_lock); diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c index 7574f2bc9718..7559875e0322 100644 --- a/drivers/infiniband/hw/hfi1/msix.c +++ b/drivers/infiniband/hw/hfi1/msix.c @@ -49,6 +49,7 @@ #include "hfi.h" #include "affinity.h" #include "sdma.h" +#include "netdev.h" /** * msix_initialize() - Calculate, request and configure MSIx IRQs @@ -140,7 +141,7 @@ static int msix_request_irq(struct hfi1_devdata *dd, void *arg, ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name); if (ret) { dd_dev_err(dd, - "%s: request for IRQ %d failed, MSIx %lu, err %d\n", + "%s: request for IRQ %d failed, MSIx %lx, err %d\n", name, irq, nr, ret); spin_lock(&dd->msix_info.msix_lock); __clear_bit(nr, dd->msix_info.in_use_msix); @@ -160,7 +161,7 @@ static int msix_request_irq(struct hfi1_devdata *dd, void *arg, /* This is a request, so a failure is not fatal */ ret = hfi1_get_irq_affinity(dd, me); if (ret) - dd_dev_err(dd, "unable to pin IRQ %d\n", ret); + dd_dev_err(dd, "%s: unable to pin IRQ %d\n", name, ret); return nr; } @@ -203,6 +204,21 @@ int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd) receive_context_thread, name); } +/** + * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs + * for netdev context + * @rcd: valid netdev contexti + */ +int msix_netdev_request_rcd_irq(struct hfi1_ctxtdata *rcd) +{ + char name[MAX_NAME_SIZE]; + + snprintf(name, sizeof(name), DRIVER_NAME "_%d nd kctxt%d", + rcd->dd->unit, rcd->ctxt); + return msix_request_rcd_irq_common(rcd, receive_context_interrupt_napi, + NULL, name); +} + /** * msix_request_smda_ira() - Helper for getting SDMA IRQ resources * @sde: valid sdma engine diff --git a/drivers/infiniband/hw/hfi1/msix.h b/drivers/infiniband/hw/hfi1/msix.h index 1a02ab7971c8..42fab9475499 100644 --- a/drivers/infiniband/hw/hfi1/msix.h +++ b/drivers/infiniband/hw/hfi1/msix.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ /* - * Copyright(c) 2018 Intel Corporation. + * Copyright(c) 2018 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -59,7 +59,8 @@ int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd); int msix_request_sdma_irq(struct sdma_engine *sde); void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr); -/* VNIC interface */ +/* Netdev interface */ void msix_vnic_synchronize_irq(struct hfi1_devdata *dd); +int msix_netdev_request_rcd_irq(struct hfi1_ctxtdata *rcd); #endif diff --git a/drivers/infiniband/hw/hfi1/netdev.h b/drivers/infiniband/hw/hfi1/netdev.h index 8992dfe11e3e..6740ec34224a 100644 --- a/drivers/infiniband/hw/hfi1/netdev.h +++ b/drivers/infiniband/hw/hfi1/netdev.h @@ -87,4 +87,7 @@ void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id); void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id); void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id); +/* chip.c */ +int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget); + #endif /* HFI1_NETDEV_H */ From 370caa5b5880cd988645735c2d5d1d597c258e39 Mon Sep 17 00:00:00 2001 From: Grzegorz Andrejczuk Date: Mon, 11 May 2020 12:06:43 -0400 Subject: [PATCH 438/562] IB/hfi1: Add rx functions for dummy netdev This patch adds the rx functions for the dummy netdev: - Functions to allocate/free the dummy netdev. - Functions to allocate/free receiving contexts for the netdev. - Functions to initialize/de-initialize the receive queue. - Functions to enable/disable the receive queue. Link: https://lore.kernel.org/r/20200511160643.173205.75087.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Sadanand Warrier Signed-off-by: Grzegorz Andrejczuk Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/ipoib.h | 3 + drivers/infiniband/hw/hfi1/ipoib_main.c | 30 +- drivers/infiniband/hw/hfi1/ipoib_rx.c | 16 ++ drivers/infiniband/hw/hfi1/netdev.h | 6 + drivers/infiniband/hw/hfi1/netdev_rx.c | 361 ++++++++++++++++++++++++ 5 files changed, 414 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h index ca00f6c6a90d..185c9b02c974 100644 --- a/drivers/infiniband/hw/hfi1/ipoib.h +++ b/drivers/infiniband/hw/hfi1/ipoib.h @@ -154,6 +154,9 @@ int hfi1_ipoib_send_dma(struct net_device *dev, int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv); void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv); +int hfi1_ipoib_rxq_init(struct net_device *dev); +void hfi1_ipoib_rxq_deinit(struct net_device *dev); + void hfi1_ipoib_napi_tx_enable(struct net_device *dev); void hfi1_ipoib_napi_tx_disable(struct net_device *dev); diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c index 304a5ac86f77..014351ebbefa 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_main.c +++ b/drivers/infiniband/hw/hfi1/ipoib_main.c @@ -19,16 +19,31 @@ static u32 qpn_from_mac(u8 *mac_arr) static int hfi1_ipoib_dev_init(struct net_device *dev) { struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + int ret; priv->netstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - return priv->netdev_ops->ndo_init(dev); + ret = priv->netdev_ops->ndo_init(dev); + if (ret) + return ret; + + ret = hfi1_netdev_add_data(priv->dd, + qpn_from_mac(priv->netdev->dev_addr), + dev); + if (ret < 0) { + priv->netdev_ops->ndo_uninit(dev); + return ret; + } + + return 0; } static void hfi1_ipoib_dev_uninit(struct net_device *dev) { struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + hfi1_netdev_remove_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr)); + priv->netdev_ops->ndo_uninit(dev); } @@ -55,6 +70,7 @@ static int hfi1_ipoib_dev_open(struct net_device *dev) priv->qp = qp; rcu_read_unlock(); + hfi1_netdev_enable_queues(priv->dd); hfi1_ipoib_napi_tx_enable(dev); } @@ -69,6 +85,7 @@ static int hfi1_ipoib_dev_stop(struct net_device *dev) return 0; hfi1_ipoib_napi_tx_disable(dev); + hfi1_netdev_disable_queues(priv->dd); rvt_put_qp(priv->qp); priv->qp = NULL; @@ -195,6 +212,7 @@ static void hfi1_ipoib_netdev_dtor(struct net_device *dev) struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); hfi1_ipoib_txreq_deinit(priv); + hfi1_ipoib_rxq_deinit(priv->netdev); free_percpu(priv->netstats); } @@ -252,6 +270,13 @@ static int hfi1_ipoib_setup_rn(struct ib_device *device, return rc; } + rc = hfi1_ipoib_rxq_init(netdev); + if (rc) { + dd_dev_err(dd, "IPoIB netdev RX init - failed(%d)\n", rc); + hfi1_ipoib_free_rdma_netdev(netdev); + return rc; + } + netdev->priv_destructor = hfi1_ipoib_netdev_dtor; netdev->needs_free_netdev = true; @@ -268,7 +293,7 @@ int hfi1_ipoib_rn_get_params(struct ib_device *device, if (type != RDMA_NETDEV_IPOIB) return -EOPNOTSUPP; - if (!HFI1_CAP_IS_KSET(AIP)) + if (!HFI1_CAP_IS_KSET(AIP) || !dd->num_netdev_contexts) return -EOPNOTSUPP; if (!port_num || port_num > dd->num_pports) @@ -276,6 +301,7 @@ int hfi1_ipoib_rn_get_params(struct ib_device *device, params->sizeof_priv = sizeof(struct hfi1_ipoib_rdma_netdev); params->txqs = dd->num_sdma; + params->rxqs = dd->num_netdev_contexts; params->param = NULL; params->initialize_rdma_netdev = hfi1_ipoib_setup_rn; diff --git a/drivers/infiniband/hw/hfi1/ipoib_rx.c b/drivers/infiniband/hw/hfi1/ipoib_rx.c index 2485663032c7..606ac69eeea5 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_rx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_rx.c @@ -69,3 +69,19 @@ struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq, return skb; } + +int hfi1_ipoib_rxq_init(struct net_device *netdev) +{ + struct hfi1_ipoib_dev_priv *ipoib_priv = hfi1_ipoib_priv(netdev); + struct hfi1_devdata *dd = ipoib_priv->dd; + + return hfi1_netdev_rx_init(dd); +} + +void hfi1_ipoib_rxq_deinit(struct net_device *netdev) +{ + struct hfi1_ipoib_dev_priv *ipoib_priv = hfi1_ipoib_priv(netdev); + struct hfi1_devdata *dd = ipoib_priv->dd; + + hfi1_netdev_rx_destroy(dd); +} diff --git a/drivers/infiniband/hw/hfi1/netdev.h b/drivers/infiniband/hw/hfi1/netdev.h index 6740ec34224a..edb936f013c1 100644 --- a/drivers/infiniband/hw/hfi1/netdev.h +++ b/drivers/infiniband/hw/hfi1/netdev.h @@ -82,6 +82,12 @@ struct hfi1_ctxtdata *hfi1_netdev_get_ctxt(struct hfi1_devdata *dd, int ctxt) return priv->rxq[ctxt].rcd; } +void hfi1_netdev_enable_queues(struct hfi1_devdata *dd); +void hfi1_netdev_disable_queues(struct hfi1_devdata *dd); +int hfi1_netdev_rx_init(struct hfi1_devdata *dd); +int hfi1_netdev_rx_destroy(struct hfi1_devdata *dd); +int hfi1_netdev_alloc(struct hfi1_devdata *dd); +void hfi1_netdev_free(struct hfi1_devdata *dd); int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data); void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id); void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id); diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c index 3e286cbaa8c6..124e4e8695b0 100644 --- a/drivers/infiniband/hw/hfi1/netdev_rx.c +++ b/drivers/infiniband/hw/hfi1/netdev_rx.c @@ -17,6 +17,367 @@ #include #include +static int hfi1_netdev_setup_ctxt(struct hfi1_netdev_priv *priv, + struct hfi1_ctxtdata *uctxt) +{ + unsigned int rcvctrl_ops; + struct hfi1_devdata *dd = priv->dd; + int ret; + + uctxt->rhf_rcv_function_map = netdev_rhf_rcv_functions; + uctxt->do_interrupt = &handle_receive_interrupt_napi_sp; + + /* Now allocate the RcvHdr queue and eager buffers. */ + ret = hfi1_create_rcvhdrq(dd, uctxt); + if (ret) + goto done; + + ret = hfi1_setup_eagerbufs(uctxt); + if (ret) + goto done; + + clear_rcvhdrtail(uctxt); + + rcvctrl_ops = HFI1_RCVCTRL_CTXT_DIS; + rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_DIS; + + if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR)) + rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; + if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL)) + rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; + if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) + rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; + if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) + rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; + + hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); +done: + return ret; +} + +static int hfi1_netdev_allocate_ctxt(struct hfi1_devdata *dd, + struct hfi1_ctxtdata **ctxt) +{ + struct hfi1_ctxtdata *uctxt; + int ret; + + if (dd->flags & HFI1_FROZEN) + return -EIO; + + ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt); + if (ret < 0) { + dd_dev_err(dd, "Unable to create ctxtdata, failing open\n"); + return -ENOMEM; + } + + uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) | + HFI1_CAP_KGET(NODROP_RHQ_FULL) | + HFI1_CAP_KGET(NODROP_EGR_FULL) | + HFI1_CAP_KGET(DMA_RTAIL); + /* Netdev contexts are always NO_RDMA_RTAIL */ + uctxt->fast_handler = handle_receive_interrupt_napi_fp; + uctxt->slow_handler = handle_receive_interrupt_napi_sp; + hfi1_set_seq_cnt(uctxt, 1); + uctxt->is_vnic = true; + + hfi1_stats.sps_ctxts++; + + dd_dev_info(dd, "created netdev context %d\n", uctxt->ctxt); + *ctxt = uctxt; + + return 0; +} + +static void hfi1_netdev_deallocate_ctxt(struct hfi1_devdata *dd, + struct hfi1_ctxtdata *uctxt) +{ + flush_wc(); + + /* + * Disable receive context and interrupt available, reset all + * RcvCtxtCtrl bits to default values. + */ + hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | + HFI1_RCVCTRL_TIDFLOW_DIS | + HFI1_RCVCTRL_INTRAVAIL_DIS | + HFI1_RCVCTRL_ONE_PKT_EGR_DIS | + HFI1_RCVCTRL_NO_RHQ_DROP_DIS | + HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); + + if (uctxt->msix_intr != CCE_NUM_MSIX_VECTORS) + msix_free_irq(dd, uctxt->msix_intr); + + uctxt->msix_intr = CCE_NUM_MSIX_VECTORS; + uctxt->event_flags = 0; + + hfi1_clear_tids(uctxt); + hfi1_clear_ctxt_pkey(dd, uctxt); + + hfi1_stats.sps_ctxts--; + + hfi1_free_ctxt(uctxt); +} + +static int hfi1_netdev_allot_ctxt(struct hfi1_netdev_priv *priv, + struct hfi1_ctxtdata **ctxt) +{ + int rc; + struct hfi1_devdata *dd = priv->dd; + + rc = hfi1_netdev_allocate_ctxt(dd, ctxt); + if (rc) { + dd_dev_err(dd, "netdev ctxt alloc failed %d\n", rc); + return rc; + } + + rc = hfi1_netdev_setup_ctxt(priv, *ctxt); + if (rc) { + dd_dev_err(dd, "netdev ctxt setup failed %d\n", rc); + hfi1_netdev_deallocate_ctxt(dd, *ctxt); + *ctxt = NULL; + } + + return rc; +} + +static int hfi1_netdev_rxq_init(struct net_device *dev) +{ + int i; + int rc; + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dev); + struct hfi1_devdata *dd = priv->dd; + + priv->num_rx_q = dd->num_netdev_contexts; + priv->rxq = kcalloc_node(priv->num_rx_q, sizeof(struct hfi1_netdev_rxq), + GFP_KERNEL, dd->node); + + if (!priv->rxq) { + dd_dev_err(dd, "Unable to allocate netdev queue data\n"); + return (-ENOMEM); + } + + for (i = 0; i < priv->num_rx_q; i++) { + struct hfi1_netdev_rxq *rxq = &priv->rxq[i]; + + rc = hfi1_netdev_allot_ctxt(priv, &rxq->rcd); + if (rc) + goto bail_context_irq_failure; + + hfi1_rcd_get(rxq->rcd); + rxq->priv = priv; + rxq->rcd->napi = &rxq->napi; + dd_dev_info(dd, "Setting rcv queue %d napi to context %d\n", + i, rxq->rcd->ctxt); + /* + * Disable BUSY_POLL on this NAPI as this is not supported + * right now. + */ + set_bit(NAPI_STATE_NO_BUSY_POLL, &rxq->napi.state); + netif_napi_add(dev, &rxq->napi, hfi1_netdev_rx_napi, 64); + rc = msix_netdev_request_rcd_irq(rxq->rcd); + if (rc) + goto bail_context_irq_failure; + } + + return 0; + +bail_context_irq_failure: + dd_dev_err(dd, "Unable to allot receive context\n"); + for (; i >= 0; i--) { + struct hfi1_netdev_rxq *rxq = &priv->rxq[i]; + + if (rxq->rcd) { + hfi1_netdev_deallocate_ctxt(dd, rxq->rcd); + hfi1_rcd_put(rxq->rcd); + rxq->rcd = NULL; + } + } + kfree(priv->rxq); + priv->rxq = NULL; + + return rc; +} + +static void hfi1_netdev_rxq_deinit(struct net_device *dev) +{ + int i; + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dev); + struct hfi1_devdata *dd = priv->dd; + + for (i = 0; i < priv->num_rx_q; i++) { + struct hfi1_netdev_rxq *rxq = &priv->rxq[i]; + + netif_napi_del(&rxq->napi); + hfi1_netdev_deallocate_ctxt(dd, rxq->rcd); + hfi1_rcd_put(rxq->rcd); + rxq->rcd = NULL; + } + + kfree(priv->rxq); + priv->rxq = NULL; + priv->num_rx_q = 0; +} + +static void enable_queues(struct hfi1_netdev_priv *priv) +{ + int i; + + for (i = 0; i < priv->num_rx_q; i++) { + struct hfi1_netdev_rxq *rxq = &priv->rxq[i]; + + dd_dev_info(priv->dd, "enabling queue %d on context %d\n", i, + rxq->rcd->ctxt); + napi_enable(&rxq->napi); + hfi1_rcvctrl(priv->dd, + HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB, + rxq->rcd); + } +} + +static void disable_queues(struct hfi1_netdev_priv *priv) +{ + int i; + + msix_vnic_synchronize_irq(priv->dd); + + for (i = 0; i < priv->num_rx_q; i++) { + struct hfi1_netdev_rxq *rxq = &priv->rxq[i]; + + dd_dev_info(priv->dd, "disabling queue %d on context %d\n", i, + rxq->rcd->ctxt); + + /* wait for napi if it was scheduled */ + hfi1_rcvctrl(priv->dd, + HFI1_RCVCTRL_CTXT_DIS | HFI1_RCVCTRL_INTRAVAIL_DIS, + rxq->rcd); + napi_synchronize(&rxq->napi); + napi_disable(&rxq->napi); + } +} + +/** + * hfi1_netdev_rx_init - Incrememnts netdevs counter. When called first time, + * it allocates receive queue data and calls netif_napi_add + * for each queue. + * + * @dd: hfi1 dev data + */ +int hfi1_netdev_rx_init(struct hfi1_devdata *dd) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + int res; + + if (atomic_fetch_inc(&priv->netdevs)) + return 0; + + mutex_lock(&hfi1_mutex); + init_dummy_netdev(dd->dummy_netdev); + res = hfi1_netdev_rxq_init(dd->dummy_netdev); + mutex_unlock(&hfi1_mutex); + return res; +} + +/** + * hfi1_netdev_rx_destroy - Decrements netdevs counter, when it reaches 0 + * napi is deleted and receive queses memory is freed. + * + * @dd: hfi1 dev data + */ +int hfi1_netdev_rx_destroy(struct hfi1_devdata *dd) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + + /* destroy the RX queues only if it is the last netdev going away */ + if (atomic_fetch_add_unless(&priv->netdevs, -1, 0) == 1) { + mutex_lock(&hfi1_mutex); + hfi1_netdev_rxq_deinit(dd->dummy_netdev); + mutex_unlock(&hfi1_mutex); + } + + return 0; +} + +/** + * hfi1_netdev_alloc - Allocates netdev and private data. It is required + * because RMT index and MSI-X interrupt can be set only + * during driver initialization. + * + * @dd: hfi1 dev data + */ +int hfi1_netdev_alloc(struct hfi1_devdata *dd) +{ + struct hfi1_netdev_priv *priv; + const int netdev_size = sizeof(*dd->dummy_netdev) + + sizeof(struct hfi1_netdev_priv); + + dd_dev_info(dd, "allocating netdev size %d\n", netdev_size); + dd->dummy_netdev = kcalloc_node(1, netdev_size, GFP_KERNEL, dd->node); + + if (!dd->dummy_netdev) + return -ENOMEM; + + priv = hfi1_netdev_priv(dd->dummy_netdev); + priv->dd = dd; + xa_init(&priv->dev_tbl); + atomic_set(&priv->enabled, 0); + atomic_set(&priv->netdevs, 0); + + return 0; +} + +void hfi1_netdev_free(struct hfi1_devdata *dd) +{ + struct hfi1_netdev_priv *priv; + + if (dd->dummy_netdev) { + priv = hfi1_netdev_priv(dd->dummy_netdev); + dd_dev_info(dd, "hfi1 netdev freed\n"); + kfree(dd->dummy_netdev); + dd->dummy_netdev = NULL; + } +} + +/** + * hfi1_netdev_enable_queues - This is napi enable function. + * It enables napi objects associated with queues. + * When at least one device has called it it increments atomic counter. + * Disable function decrements counter and when it is 0, + * calls napi_disable for every queue. + * + * @dd: hfi1 dev data + */ +void hfi1_netdev_enable_queues(struct hfi1_devdata *dd) +{ + struct hfi1_netdev_priv *priv; + + if (!dd->dummy_netdev) + return; + + priv = hfi1_netdev_priv(dd->dummy_netdev); + if (atomic_fetch_inc(&priv->enabled)) + return; + + mutex_lock(&hfi1_mutex); + enable_queues(priv); + mutex_unlock(&hfi1_mutex); +} + +void hfi1_netdev_disable_queues(struct hfi1_devdata *dd) +{ + struct hfi1_netdev_priv *priv; + + if (!dd->dummy_netdev) + return; + + priv = hfi1_netdev_priv(dd->dummy_netdev); + if (atomic_dec_if_positive(&priv->enabled)) + return; + + mutex_lock(&hfi1_mutex); + disable_queues(priv); + mutex_unlock(&hfi1_mutex); +} + /** * hfi1_netdev_add_data - Registers data with unique identifier * to be requested later this is needed for VNIC and IPoIB VLANs From 4730f4a6c6b2065589c0822af00aa45e639bbc36 Mon Sep 17 00:00:00 2001 From: Grzegorz Andrejczuk Date: Mon, 11 May 2020 12:06:49 -0400 Subject: [PATCH 439/562] IB/hfi1: Activate the dummy netdev As described in earlier patches, ipoib netdev will share receive contexts with existing VNIC netdev through a dummy netdev. The following changes are made to achieve that: - Set up netdev receive contexts after user contexts. A function is added to count the available netdev receive contexts. - Add functions to set/get receive map table free index. - Rename NUM_VNIC_MAP_ENTRIES as NUM_NETDEV_MAP_ENTRIES. - Let the dummy netdev own the receive contexts instead of VNIC. - Allocate the dummy netdev when the hfi1 device is added and free it when the device is removed. - Initialize AIP RSM rules when the IpoIb rxq is initialized and remove the rules when it is de-initialized. - Convert VNIC to use the dummy netdev. Link: https://lore.kernel.org/r/20200511160649.173205.4626.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Sadanand Warrier Signed-off-by: Grzegorz Andrejczuk Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 73 +++--- drivers/infiniband/hw/hfi1/driver.c | 18 -- drivers/infiniband/hw/hfi1/hfi.h | 14 +- drivers/infiniband/hw/hfi1/init.c | 9 +- drivers/infiniband/hw/hfi1/ipoib_rx.c | 10 +- drivers/infiniband/hw/hfi1/msix.c | 12 +- drivers/infiniband/hw/hfi1/msix.h | 2 +- drivers/infiniband/hw/hfi1/netdev.h | 19 ++ drivers/infiniband/hw/hfi1/netdev_rx.c | 46 +++- drivers/infiniband/hw/hfi1/vnic.h | 5 +- drivers/infiniband/hw/hfi1/vnic_main.c | 320 +++++-------------------- 11 files changed, 182 insertions(+), 346 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 2117612c61b2..7f35b9ea158b 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -13396,8 +13396,7 @@ static int set_up_interrupts(struct hfi1_devdata *dd) static int set_up_context_variables(struct hfi1_devdata *dd) { unsigned long num_kernel_contexts; - u16 num_netdev_contexts = HFI1_NUM_VNIC_CTXT; - int total_contexts; + u16 num_netdev_contexts; int ret; unsigned ngroups; int rmt_count; @@ -13434,13 +13433,6 @@ static int set_up_context_variables(struct hfi1_devdata *dd) num_kernel_contexts = send_contexts - num_vls - 1; } - /* Accommodate VNIC contexts if possible */ - if ((num_kernel_contexts + num_netdev_contexts) > rcv_contexts) { - dd_dev_err(dd, "No receive contexts available for VNIC\n"); - num_netdev_contexts = 0; - } - total_contexts = num_kernel_contexts + num_netdev_contexts; - /* * User contexts: * - default to 1 user context per real (non-HT) CPU core if @@ -13453,15 +13445,19 @@ static int set_up_context_variables(struct hfi1_devdata *dd) /* * Adjust the counts given a global max. */ - if (total_contexts + n_usr_ctxts > rcv_contexts) { + if (num_kernel_contexts + n_usr_ctxts > rcv_contexts) { dd_dev_err(dd, - "Reducing # user receive contexts to: %d, from %u\n", - rcv_contexts - total_contexts, + "Reducing # user receive contexts to: %u, from %u\n", + (u32)(rcv_contexts - num_kernel_contexts), n_usr_ctxts); /* recalculate */ - n_usr_ctxts = rcv_contexts - total_contexts; + n_usr_ctxts = rcv_contexts - num_kernel_contexts; } + num_netdev_contexts = + hfi1_num_netdev_contexts(dd, rcv_contexts - + (num_kernel_contexts + n_usr_ctxts), + &node_affinity.real_cpu_mask); /* * The RMT entries are currently allocated as shown below: * 1. QOS (0 to 128 entries); @@ -13487,17 +13483,16 @@ static int set_up_context_variables(struct hfi1_devdata *dd) n_usr_ctxts = user_rmt_reduced; } - total_contexts += n_usr_ctxts; - - /* the first N are kernel contexts, the rest are user/vnic contexts */ - dd->num_rcv_contexts = total_contexts; + /* the first N are kernel contexts, the rest are user/netdev contexts */ + dd->num_rcv_contexts = + num_kernel_contexts + n_usr_ctxts + num_netdev_contexts; dd->n_krcv_queues = num_kernel_contexts; dd->first_dyn_alloc_ctxt = num_kernel_contexts; dd->num_netdev_contexts = num_netdev_contexts; dd->num_user_contexts = n_usr_ctxts; dd->freectxts = n_usr_ctxts; dd_dev_info(dd, - "rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n", + "rcv contexts: chip %d, used %d (kernel %d, netdev %u, user %u)\n", rcv_contexts, (int)dd->num_rcv_contexts, (int)dd->n_krcv_queues, @@ -14554,7 +14549,8 @@ static bool hfi1_netdev_update_rmt(struct hfi1_devdata *dd) u8 ctx_id = 0; u64 reg; u32 regoff; - int rmt_start = dd->vnic.rmt_start; + int rmt_start = hfi1_netdev_get_free_rmt_idx(dd); + int ctxt_count = hfi1_netdev_ctxt_count(dd); /* We already have contexts mapped in RMT */ if (has_rsm_rule(dd, RSM_INS_VNIC) || has_rsm_rule(dd, RSM_INS_AIP)) { @@ -14562,7 +14558,7 @@ static bool hfi1_netdev_update_rmt(struct hfi1_devdata *dd) return true; } - if (hfi1_is_rmt_full(rmt_start, NUM_VNIC_MAP_ENTRIES)) { + if (hfi1_is_rmt_full(rmt_start, NUM_NETDEV_MAP_ENTRIES)) { dd_dev_err(dd, "Not enought RMT entries used = %d\n", rmt_start); return false; @@ -14570,27 +14566,27 @@ static bool hfi1_netdev_update_rmt(struct hfi1_devdata *dd) dev_dbg(&(dd)->pcidev->dev, "RMT start = %d, end %d\n", rmt_start, - rmt_start + NUM_VNIC_MAP_ENTRIES); + rmt_start + NUM_NETDEV_MAP_ENTRIES); /* Update RSM mapping table, 32 regs, 256 entries - 1 ctx per byte */ regoff = RCV_RSM_MAP_TABLE + (rmt_start / 8) * 8; reg = read_csr(dd, regoff); - for (i = 0; i < NUM_VNIC_MAP_ENTRIES; i++) { + for (i = 0; i < NUM_NETDEV_MAP_ENTRIES; i++) { /* Update map register with netdev context */ j = (rmt_start + i) % 8; reg &= ~(0xffllu << (j * 8)); - reg |= (u64)dd->vnic.ctxt[ctx_id++]->ctxt << (j * 8); + reg |= (u64)hfi1_netdev_get_ctxt(dd, ctx_id++)->ctxt << (j * 8); /* Wrap up netdev ctx index */ - ctx_id %= dd->vnic.num_ctxt; + ctx_id %= ctxt_count; /* Write back map register */ - if (j == 7 || ((i + 1) == NUM_VNIC_MAP_ENTRIES)) { + if (j == 7 || ((i + 1) == NUM_NETDEV_MAP_ENTRIES)) { dev_dbg(&(dd)->pcidev->dev, "RMT[%d] =0x%llx\n", regoff - RCV_RSM_MAP_TABLE, reg); write_csr(dd, regoff, reg); regoff += 8; - if (i < (NUM_VNIC_MAP_ENTRIES - 1)) + if (i < (NUM_NETDEV_MAP_ENTRIES - 1)) reg = read_csr(dd, regoff); } } @@ -14617,8 +14613,9 @@ void hfi1_init_aip_rsm(struct hfi1_devdata *dd) * exist yet */ if (atomic_fetch_inc(&dd->ipoib_rsm_usr_num) == 0) { + int rmt_start = hfi1_netdev_get_free_rmt_idx(dd); struct rsm_rule_data rrd = { - .offset = dd->vnic.rmt_start, + .offset = rmt_start, .pkt_type = IB_PACKET_TYPE, .field1_off = LRH_BTH_MATCH_OFFSET, .mask1 = LRH_BTH_MASK, @@ -14627,10 +14624,10 @@ void hfi1_init_aip_rsm(struct hfi1_devdata *dd) .mask2 = BTH_DESTQP_MASK, .value2 = BTH_DESTQP_VALUE, .index1_off = DETH_AIP_SQPN_SELECT_OFFSET + - ilog2(NUM_VNIC_MAP_ENTRIES), - .index1_width = ilog2(NUM_VNIC_MAP_ENTRIES), + ilog2(NUM_NETDEV_MAP_ENTRIES), + .index1_width = ilog2(NUM_NETDEV_MAP_ENTRIES), .index2_off = DETH_AIP_SQPN_SELECT_OFFSET, - .index2_width = ilog2(NUM_VNIC_MAP_ENTRIES) + .index2_width = ilog2(NUM_NETDEV_MAP_ENTRIES) }; hfi1_enable_rsm_rule(dd, RSM_INS_AIP, &rrd); @@ -14640,9 +14637,10 @@ void hfi1_init_aip_rsm(struct hfi1_devdata *dd) /* Initialize RSM for VNIC */ void hfi1_init_vnic_rsm(struct hfi1_devdata *dd) { + int rmt_start = hfi1_netdev_get_free_rmt_idx(dd); struct rsm_rule_data rrd = { /* Add rule for vnic */ - .offset = dd->vnic.rmt_start, + .offset = rmt_start, .pkt_type = 4, /* Match 16B packets */ .field1_off = L2_TYPE_MATCH_OFFSET, @@ -14654,9 +14652,9 @@ void hfi1_init_vnic_rsm(struct hfi1_devdata *dd) .value2 = L4_16B_ETH_VALUE, /* Calc context from veswid and entropy */ .index1_off = L4_16B_HDR_VESWID_OFFSET, - .index1_width = ilog2(NUM_VNIC_MAP_ENTRIES), + .index1_width = ilog2(NUM_NETDEV_MAP_ENTRIES), .index2_off = L2_16B_ENTROPY_OFFSET, - .index2_width = ilog2(NUM_VNIC_MAP_ENTRIES) + .index2_width = ilog2(NUM_NETDEV_MAP_ENTRIES) }; hfi1_enable_rsm_rule(dd, RSM_INS_VNIC, &rrd); @@ -14690,8 +14688,8 @@ static int init_rxe(struct hfi1_devdata *dd) init_qos(dd, rmt); init_fecn_handling(dd, rmt); complete_rsm_map_table(dd, rmt); - /* record number of used rsm map entries for vnic */ - dd->vnic.rmt_start = rmt->used; + /* record number of used rsm map entries for netdev */ + hfi1_netdev_set_free_rmt_idx(dd, rmt->used); kfree(rmt); /* @@ -15245,6 +15243,10 @@ int hfi1_init_dd(struct hfi1_devdata *dd) (dd->revision >> CCE_REVISION_SW_SHIFT) & CCE_REVISION_SW_MASK); + /* alloc netdev data */ + if (hfi1_netdev_alloc(dd)) + goto bail_cleanup; + ret = set_up_context_variables(dd); if (ret) goto bail_cleanup; @@ -15345,6 +15347,7 @@ int hfi1_init_dd(struct hfi1_devdata *dd) hfi1_comp_vectors_clean_up(dd); msix_clean_up_interrupts(dd); bail_cleanup: + hfi1_netdev_free(dd); hfi1_pcie_ddcleanup(dd); bail_free: hfi1_free_devdata(dd); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index d89fc8fdff6a..60ff6de8cf98 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1771,28 +1771,10 @@ static void process_receive_ib(struct hfi1_packet *packet) hfi1_ib_rcv(packet); } -static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet) -{ - /* Packet received in VNIC context via RSM */ - if (packet->rcd->is_vnic) - return true; - - if ((hfi1_16B_get_l2(packet->ebuf) == OPA_16B_L2_TYPE) && - (hfi1_16B_get_l4(packet->ebuf) == OPA_16B_L4_ETHR)) - return true; - - return false; -} - static void process_receive_bypass(struct hfi1_packet *packet) { struct hfi1_devdata *dd = packet->rcd->dd; - if (hfi1_is_vnic_packet(packet)) { - hfi1_vnic_bypass_rcv(packet); - return; - } - if (hfi1_setup_bypass_packet(packet)) return; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 986d8c3dc430..b4c6bff60a4e 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1047,23 +1047,10 @@ struct hfi1_asic_data { #define NUM_MAP_ENTRIES 256 #define NUM_MAP_REGS 32 -/* - * Number of VNIC contexts used. Ensure it is less than or equal to - * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE). - */ -#define HFI1_NUM_VNIC_CTXT 8 - -/* Number of VNIC RSM entries */ -#define NUM_VNIC_MAP_ENTRIES 8 - /* Virtual NIC information */ struct hfi1_vnic_data { - struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT]; struct kmem_cache *txreq_cache; - struct xarray vesws; u8 num_vports; - u8 rmt_start; - u8 num_ctxt; }; struct hfi1_vnic_vport_info; @@ -1419,6 +1406,7 @@ struct hfi1_devdata { struct hfi1_vnic_data vnic; /* Lock to protect IRQ SRC register access */ spinlock_t irq_src_lock; + int vnic_num_vports; struct net_device *dummy_netdev; /* Keeps track of IPoIB RSM rule users */ diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 64279d04370d..5eed4360695f 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -69,6 +69,7 @@ #include "affinity.h" #include "vnic.h" #include "exp_rcv.h" +#include "netdev.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -1665,9 +1666,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* do the generic initialization */ initfail = hfi1_init(dd, 0); - /* setup vnic */ - hfi1_vnic_setup(dd); - ret = hfi1_register_ib_device(dd); /* @@ -1706,7 +1704,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) hfi1_device_remove(dd); if (!ret) hfi1_unregister_ib_device(dd); - hfi1_vnic_cleanup(dd); postinit_cleanup(dd); if (initfail) ret = initfail; @@ -1751,8 +1748,8 @@ static void remove_one(struct pci_dev *pdev) /* unregister from IB core */ hfi1_unregister_ib_device(dd); - /* cleanup vnic */ - hfi1_vnic_cleanup(dd); + /* free netdev data */ + hfi1_netdev_free(dd); /* * Disable the IB link, disable interrupts on the device, diff --git a/drivers/infiniband/hw/hfi1/ipoib_rx.c b/drivers/infiniband/hw/hfi1/ipoib_rx.c index 606ac69eeea5..3afa7545242c 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_rx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_rx.c @@ -74,8 +74,15 @@ int hfi1_ipoib_rxq_init(struct net_device *netdev) { struct hfi1_ipoib_dev_priv *ipoib_priv = hfi1_ipoib_priv(netdev); struct hfi1_devdata *dd = ipoib_priv->dd; + int ret; - return hfi1_netdev_rx_init(dd); + ret = hfi1_netdev_rx_init(dd); + if (ret) + return ret; + + hfi1_init_aip_rsm(dd); + + return ret; } void hfi1_ipoib_rxq_deinit(struct net_device *netdev) @@ -83,5 +90,6 @@ void hfi1_ipoib_rxq_deinit(struct net_device *netdev) struct hfi1_ipoib_dev_priv *ipoib_priv = hfi1_ipoib_priv(netdev); struct hfi1_devdata *dd = ipoib_priv->dd; + hfi1_deinit_aip_rsm(dd); hfi1_netdev_rx_destroy(dd); } diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c index 7559875e0322..d61ee853d215 100644 --- a/drivers/infiniband/hw/hfi1/msix.c +++ b/drivers/infiniband/hw/hfi1/msix.c @@ -172,7 +172,8 @@ static int msix_request_rcd_irq_common(struct hfi1_ctxtdata *rcd, const char *name) { int nr = msix_request_irq(rcd->dd, rcd, handler, thread, - IRQ_RCVCTXT, name); + rcd->is_vnic ? IRQ_NETDEVCTXT : IRQ_RCVCTXT, + name); if (nr < 0) return nr; @@ -371,15 +372,16 @@ void msix_clean_up_interrupts(struct hfi1_devdata *dd) } /** - * msix_vnic_syncrhonize_irq() - Vnic IRQ synchronize + * msix_netdev_syncrhonize_irq() - netdev IRQ synchronize * @dd: valid devdata */ -void msix_vnic_synchronize_irq(struct hfi1_devdata *dd) +void msix_netdev_synchronize_irq(struct hfi1_devdata *dd) { int i; + int ctxt_count = hfi1_netdev_ctxt_count(dd); - for (i = 0; i < dd->vnic.num_ctxt; i++) { - struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i]; + for (i = 0; i < ctxt_count; i++) { + struct hfi1_ctxtdata *rcd = hfi1_netdev_get_ctxt(dd, i); struct hfi1_msix_entry *me; me = &dd->msix_info.msix_entries[rcd->msix_intr]; diff --git a/drivers/infiniband/hw/hfi1/msix.h b/drivers/infiniband/hw/hfi1/msix.h index 42fab9475499..e63e944bf0fc 100644 --- a/drivers/infiniband/hw/hfi1/msix.h +++ b/drivers/infiniband/hw/hfi1/msix.h @@ -60,7 +60,7 @@ int msix_request_sdma_irq(struct sdma_engine *sde); void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr); /* Netdev interface */ -void msix_vnic_synchronize_irq(struct hfi1_devdata *dd); +void msix_netdev_synchronize_irq(struct hfi1_devdata *dd); int msix_netdev_request_rcd_irq(struct hfi1_ctxtdata *rcd); #endif diff --git a/drivers/infiniband/hw/hfi1/netdev.h b/drivers/infiniband/hw/hfi1/netdev.h index edb936f013c1..947543a3e0c4 100644 --- a/drivers/infiniband/hw/hfi1/netdev.h +++ b/drivers/infiniband/hw/hfi1/netdev.h @@ -82,6 +82,25 @@ struct hfi1_ctxtdata *hfi1_netdev_get_ctxt(struct hfi1_devdata *dd, int ctxt) return priv->rxq[ctxt].rcd; } +static inline +int hfi1_netdev_get_free_rmt_idx(struct hfi1_devdata *dd) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + + return priv->rmt_start; +} + +static inline +void hfi1_netdev_set_free_rmt_idx(struct hfi1_devdata *dd, int rmt_idx) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + + priv->rmt_start = rmt_idx; +} + +u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts, + struct cpumask *cpu_mask); + void hfi1_netdev_enable_queues(struct hfi1_devdata *dd); void hfi1_netdev_disable_queues(struct hfi1_devdata *dd); int hfi1_netdev_rx_init(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c index 124e4e8695b0..58af6a454761 100644 --- a/drivers/infiniband/hw/hfi1/netdev_rx.c +++ b/drivers/infiniband/hw/hfi1/netdev_rx.c @@ -140,6 +140,50 @@ static int hfi1_netdev_allot_ctxt(struct hfi1_netdev_priv *priv, return rc; } +/** + * hfi1_num_netdev_contexts - Count of netdev recv contexts to use. + * @dd: device on which to allocate netdev contexts + * @available_contexts: count of available receive contexts + * @cpu_mask: mask of possible cpus to include for contexts + * + * Return: count of physical cores on a node or the remaining available recv + * contexts for netdev recv context usage up to the maximum of + * HFI1_MAX_NETDEV_CTXTS. + * A value of 0 can be returned when acceleration is explicitly turned off, + * a memory allocation error occurs or when there are no available contexts. + * + */ +u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts, + struct cpumask *cpu_mask) +{ + cpumask_var_t node_cpu_mask; + unsigned int available_cpus; + + if (!HFI1_CAP_IS_KSET(AIP)) + return 0; + + /* Always give user contexts priority over netdev contexts */ + if (available_contexts == 0) { + dd_dev_info(dd, "No receive contexts available for netdevs.\n"); + return 0; + } + + if (!zalloc_cpumask_var(&node_cpu_mask, GFP_KERNEL)) { + dd_dev_err(dd, "Unable to allocate cpu_mask for netdevs.\n"); + return 0; + } + + cpumask_and(node_cpu_mask, cpu_mask, + cpumask_of_node(pcibus_to_node(dd->pcidev->bus))); + + available_cpus = cpumask_weight(node_cpu_mask); + + free_cpumask_var(node_cpu_mask); + + return min3(available_cpus, available_contexts, + (u32)HFI1_MAX_NETDEV_CTXTS); +} + static int hfi1_netdev_rxq_init(struct net_device *dev) { int i; @@ -238,7 +282,7 @@ static void disable_queues(struct hfi1_netdev_priv *priv) { int i; - msix_vnic_synchronize_irq(priv->dd); + msix_netdev_synchronize_irq(priv->dd); for (i = 0; i < priv->num_rx_q; i++) { struct hfi1_netdev_rxq *rxq = &priv->rxq[i]; diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h index 5ae781514e32..66150a13f374 100644 --- a/drivers/infiniband/hw/hfi1/vnic.h +++ b/drivers/infiniband/hw/hfi1/vnic.h @@ -1,7 +1,7 @@ #ifndef _HFI1_VNIC_H #define _HFI1_VNIC_H /* - * Copyright(c) 2017 Intel Corporation. + * Copyright(c) 2017 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -69,6 +69,7 @@ #define HFI1_VNIC_SC_SHIFT 4 #define HFI1_VNIC_MAX_QUEUE 16 +#define HFI1_NUM_VNIC_CTXT 8 /** * struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information @@ -104,7 +105,6 @@ struct hfi1_vnic_rx_queue { struct hfi1_vnic_vport_info *vinfo; struct net_device *netdev; struct napi_struct napi; - struct sk_buff_head skbq; }; /** @@ -146,7 +146,6 @@ struct hfi1_vnic_vport_info { /* vnic hfi1 internal functions */ void hfi1_vnic_setup(struct hfi1_devdata *dd); -void hfi1_vnic_cleanup(struct hfi1_devdata *dd); int hfi1_vnic_txreq_init(struct hfi1_devdata *dd); void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index db7624cacee1..b183c56b7b6a 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -53,6 +53,7 @@ #include #include "vnic.h" +#include "netdev.h" #define HFI_TX_TIMEOUT_MS 1000 @@ -62,114 +63,6 @@ static DEFINE_SPINLOCK(vport_cntr_lock); -static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) -{ - unsigned int rcvctrl_ops = 0; - int ret; - - uctxt->do_interrupt = &handle_receive_interrupt; - - /* Now allocate the RcvHdr queue and eager buffers. */ - ret = hfi1_create_rcvhdrq(dd, uctxt); - if (ret) - goto done; - - ret = hfi1_setup_eagerbufs(uctxt); - if (ret) - goto done; - - if (hfi1_rcvhdrtail_kvaddr(uctxt)) - clear_rcvhdrtail(uctxt); - - rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; - rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB; - - if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR)) - rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; - if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL)) - rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; - if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) - rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; - if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) - rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; - - hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); -done: - return ret; -} - -static int allocate_vnic_ctxt(struct hfi1_devdata *dd, - struct hfi1_ctxtdata **vnic_ctxt) -{ - struct hfi1_ctxtdata *uctxt; - int ret; - - if (dd->flags & HFI1_FROZEN) - return -EIO; - - ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt); - if (ret < 0) { - dd_dev_err(dd, "Unable to create ctxtdata, failing open\n"); - return -ENOMEM; - } - - uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) | - HFI1_CAP_KGET(NODROP_RHQ_FULL) | - HFI1_CAP_KGET(NODROP_EGR_FULL) | - HFI1_CAP_KGET(DMA_RTAIL); - uctxt->seq_cnt = 1; - uctxt->is_vnic = true; - - msix_request_rcd_irq(uctxt); - - hfi1_stats.sps_ctxts++; - dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); - *vnic_ctxt = uctxt; - - return 0; -} - -static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, - struct hfi1_ctxtdata *uctxt) -{ - dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); - flush_wc(); - - /* - * Disable receive context and interrupt available, reset all - * RcvCtxtCtrl bits to default values. - */ - hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | - HFI1_RCVCTRL_TIDFLOW_DIS | - HFI1_RCVCTRL_INTRAVAIL_DIS | - HFI1_RCVCTRL_ONE_PKT_EGR_DIS | - HFI1_RCVCTRL_NO_RHQ_DROP_DIS | - HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); - - /* msix_intr will always be > 0, only clean up if this is true */ - if (uctxt->msix_intr) - msix_free_irq(dd, uctxt->msix_intr); - - uctxt->event_flags = 0; - - hfi1_clear_tids(uctxt); - hfi1_clear_ctxt_pkey(dd, uctxt); - - hfi1_stats.sps_ctxts--; - - hfi1_free_ctxt(uctxt); -} - -void hfi1_vnic_setup(struct hfi1_devdata *dd) -{ - xa_init(&dd->vnic.vesws); -} - -void hfi1_vnic_cleanup(struct hfi1_devdata *dd) -{ - WARN_ON(!xa_empty(&dd->vnic.vesws)); -} - #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \ u64 *src64, *dst64; \ for (src64 = &qstats->x_grp.unicast, \ @@ -179,6 +72,9 @@ void hfi1_vnic_cleanup(struct hfi1_devdata *dd) } \ } while (0) +#define VNIC_MASK (0xFF) +#define VNIC_ID(val) ((1ull << 24) | ((val) & VNIC_MASK)) + /* hfi1_vnic_update_stats - update statistics */ static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo, struct opa_vnic_stats *stats) @@ -454,71 +350,25 @@ static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq, return rc; } -static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq) +static struct hfi1_vnic_vport_info *get_vnic_port(struct hfi1_devdata *dd, + int vesw_id) { - unsigned char *pad_info; - struct sk_buff *skb; + int vnic_id = VNIC_ID(vesw_id); - skb = skb_dequeue(&rxq->skbq); - if (unlikely(!skb)) + return hfi1_netdev_get_data(dd, vnic_id); +} + +static struct hfi1_vnic_vport_info *get_first_vnic_port(struct hfi1_devdata *dd) +{ + struct hfi1_vnic_vport_info *vinfo; + int next_id = VNIC_ID(0); + + vinfo = hfi1_netdev_get_first_data(dd, &next_id); + + if (next_id > VNIC_ID(VNIC_MASK)) return NULL; - /* remove tail padding and icrc */ - pad_info = skb->data + skb->len - 1; - skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN - - ((*pad_info) & 0x7))); - - return skb; -} - -/* hfi1_vnic_handle_rx - handle skb receive */ -static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq, - int *work_done, int work_to_do) -{ - struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; - struct sk_buff *skb; - int rc; - - while (1) { - if (*work_done >= work_to_do) - break; - - skb = hfi1_vnic_get_skb(rxq); - if (unlikely(!skb)) - break; - - rc = hfi1_vnic_decap_skb(rxq, skb); - /* update rx counters */ - hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc); - if (unlikely(rc)) { - dev_kfree_skb_any(skb); - continue; - } - - skb_checksum_none_assert(skb); - skb->protocol = eth_type_trans(skb, rxq->netdev); - - napi_gro_receive(&rxq->napi, skb); - (*work_done)++; - } -} - -/* hfi1_vnic_napi - napi receive polling callback function */ -static int hfi1_vnic_napi(struct napi_struct *napi, int budget) -{ - struct hfi1_vnic_rx_queue *rxq = container_of(napi, - struct hfi1_vnic_rx_queue, napi); - struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; - int work_done = 0; - - v_dbg("napi %d budget %d\n", rxq->idx, budget); - hfi1_vnic_handle_rx(rxq, &work_done, budget); - - v_dbg("napi %d work_done %d\n", rxq->idx, work_done); - if (work_done < budget) - napi_complete(napi); - - return work_done; + return vinfo; } void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) @@ -527,13 +377,14 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) struct hfi1_vnic_vport_info *vinfo = NULL; struct hfi1_vnic_rx_queue *rxq; struct sk_buff *skb; - int l4_type, vesw_id = -1; + int l4_type, vesw_id = -1, rc; u8 q_idx; + unsigned char *pad_info; l4_type = hfi1_16B_get_l4(packet->ebuf); if (likely(l4_type == OPA_16B_L4_ETHR)) { vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf); - vinfo = xa_load(&dd->vnic.vesws, vesw_id); + vinfo = get_vnic_port(dd, vesw_id); /* * In case of invalid vesw id, count the error on @@ -541,10 +392,8 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) */ if (unlikely(!vinfo)) { struct hfi1_vnic_vport_info *vinfo_tmp; - unsigned long index = 0; - vinfo_tmp = xa_find(&dd->vnic.vesws, &index, ULONG_MAX, - XA_PRESENT); + vinfo_tmp = get_first_vnic_port(dd); if (vinfo_tmp) { spin_lock(&vport_cntr_lock); vinfo_tmp->stats[0].netstats.rx_nohandler++; @@ -563,12 +412,6 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) rxq = &vinfo->rxq[q_idx]; if (unlikely(!netif_oper_up(vinfo->netdev))) { vinfo->stats[q_idx].rx_drop_state++; - skb_queue_purge(&rxq->skbq); - return; - } - - if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) { - vinfo->stats[q_idx].netstats.rx_fifo_errors++; return; } @@ -580,34 +423,41 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) memcpy(skb->data, packet->ebuf, packet->tlen); skb_put(skb, packet->tlen); - skb_queue_tail(&rxq->skbq, skb); - if (napi_schedule_prep(&rxq->napi)) { - v_dbg("napi %d scheduling\n", q_idx); - __napi_schedule(&rxq->napi); + pad_info = skb->data + skb->len - 1; + skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN - + ((*pad_info) & 0x7))); + + rc = hfi1_vnic_decap_skb(rxq, skb); + + /* update rx counters */ + hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc); + if (unlikely(rc)) { + dev_kfree_skb_any(skb); + return; } + + skb_checksum_none_assert(skb); + skb->protocol = eth_type_trans(skb, rxq->netdev); + + napi_gro_receive(&rxq->napi, skb); } static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo) { struct hfi1_devdata *dd = vinfo->dd; struct net_device *netdev = vinfo->netdev; - int i, rc; + int rc; /* ensure virtual eth switch id is valid */ if (!vinfo->vesw_id) return -EINVAL; - rc = xa_insert(&dd->vnic.vesws, vinfo->vesw_id, vinfo, GFP_KERNEL); + rc = hfi1_netdev_add_data(dd, VNIC_ID(vinfo->vesw_id), vinfo); if (rc < 0) return rc; - for (i = 0; i < vinfo->num_rx_q; i++) { - struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; - - skb_queue_head_init(&rxq->skbq); - napi_enable(&rxq->napi); - } + hfi1_netdev_rx_init(dd); netif_carrier_on(netdev); netif_tx_start_all_queues(netdev); @@ -619,23 +469,13 @@ static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo) static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo) { struct hfi1_devdata *dd = vinfo->dd; - u8 i; clear_bit(HFI1_VNIC_UP, &vinfo->flags); netif_carrier_off(vinfo->netdev); netif_tx_disable(vinfo->netdev); - xa_erase(&dd->vnic.vesws, vinfo->vesw_id); + hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id)); - /* ensure irqs see the change */ - msix_vnic_synchronize_irq(dd); - - /* remove unread skbs */ - for (i = 0; i < vinfo->num_rx_q; i++) { - struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; - - napi_disable(&rxq->napi); - skb_queue_purge(&rxq->skbq); - } + hfi1_netdev_rx_destroy(dd); } static int hfi1_netdev_open(struct net_device *netdev) @@ -660,70 +500,30 @@ static int hfi1_netdev_close(struct net_device *netdev) return 0; } -static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd, - struct hfi1_ctxtdata **vnic_ctxt) -{ - int rc; - - rc = allocate_vnic_ctxt(dd, vnic_ctxt); - if (rc) { - dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc); - return rc; - } - - rc = setup_vnic_ctxt(dd, *vnic_ctxt); - if (rc) { - dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc); - deallocate_vnic_ctxt(dd, *vnic_ctxt); - *vnic_ctxt = NULL; - } - - return rc; -} - static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) { struct hfi1_devdata *dd = vinfo->dd; - int i, rc = 0; + int rc = 0; mutex_lock(&hfi1_mutex); - if (!dd->vnic.num_vports) { + if (!dd->vnic_num_vports) { rc = hfi1_vnic_txreq_init(dd); if (rc) goto txreq_fail; } - for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) { - rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]); - if (rc) - break; - hfi1_rcd_get(dd->vnic.ctxt[i]); - dd->vnic.ctxt[i]->vnic_q_idx = i; - } - - if (i < vinfo->num_rx_q) { - /* - * If required amount of contexts is not - * allocated successfully then remaining contexts - * are released. - */ - while (i-- > dd->vnic.num_ctxt) { - deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); - hfi1_rcd_put(dd->vnic.ctxt[i]); - dd->vnic.ctxt[i] = NULL; - } + if (hfi1_netdev_rx_init(dd)) { + dd_dev_err(dd, "Unable to initialize netdev contexts\n"); goto alloc_fail; } - if (dd->vnic.num_ctxt != i) { - dd->vnic.num_ctxt = i; - hfi1_init_vnic_rsm(dd); - } + hfi1_init_vnic_rsm(dd); - dd->vnic.num_vports++; + dd->vnic_num_vports++; hfi1_vnic_sdma_init(vinfo); + alloc_fail: - if (!dd->vnic.num_vports) + if (!dd->vnic_num_vports) hfi1_vnic_txreq_deinit(dd); txreq_fail: mutex_unlock(&hfi1_mutex); @@ -733,20 +533,14 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo) { struct hfi1_devdata *dd = vinfo->dd; - int i; mutex_lock(&hfi1_mutex); - if (--dd->vnic.num_vports == 0) { - for (i = 0; i < dd->vnic.num_ctxt; i++) { - deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); - hfi1_rcd_put(dd->vnic.ctxt[i]); - dd->vnic.ctxt[i] = NULL; - } + if (--dd->vnic_num_vports == 0) { hfi1_deinit_vnic_rsm(dd); - dd->vnic.num_ctxt = 0; hfi1_vnic_txreq_deinit(dd); } mutex_unlock(&hfi1_mutex); + hfi1_netdev_rx_destroy(dd); } static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id) @@ -815,14 +609,15 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, - dd->num_sdma, dd->num_netdev_contexts); + chip_sdma_engines(dd), + dd->num_netdev_contexts); if (!netdev) return ERR_PTR(-ENOMEM); rn = netdev_priv(netdev); vinfo = opa_vnic_dev_priv(netdev); vinfo->dd = dd; - vinfo->num_tx_q = dd->num_sdma; + vinfo->num_tx_q = chip_sdma_engines(dd); vinfo->num_rx_q = dd->num_netdev_contexts; vinfo->netdev = netdev; rn->free_rdma_netdev = hfi1_vnic_free_rn; @@ -841,7 +636,6 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, rxq->idx = i; rxq->vinfo = vinfo; rxq->netdev = netdev; - netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64); } rc = hfi1_vnic_init(vinfo); From b7e159eb008eb8b0bb83c09990b648bd2c4081df Mon Sep 17 00:00:00 2001 From: Gary Leshner Date: Mon, 11 May 2020 12:06:55 -0400 Subject: [PATCH 440/562] IB/{hfi1, ipoib, rdma}: Broadcast ping sent packets which exceeded mtu size When in connected mode ipoib sent broadcast pings which exceeded the mtu size for broadcast addresses. Add an mtu attribute to the rdma_netdev structure which ipoib sets to its mcast mtu size. The RDMA netdev uses this value to determine if the skb length is too long for the mtu specified and if it is, drops the packet and logs an error about the errant packet. Link: https://lore.kernel.org/r/20200511160655.173205.14546.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Gary Leshner Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 ++ drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 1 + drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 3 +++ 3 files changed, 6 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 22216f181b24..a6c4322b409b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1896,6 +1896,7 @@ static int ipoib_ndo_init(struct net_device *ndev) { struct ipoib_dev_priv *priv = ipoib_priv(ndev); int rc; + struct rdma_netdev *rn = netdev_priv(ndev); if (priv->parent) { ipoib_child_init(ndev); @@ -1908,6 +1909,7 @@ static int ipoib_ndo_init(struct net_device *ndev) /* MTU will be reset when mcast join happens */ ndev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); priv->mcast_mtu = priv->admin_mtu = ndev->mtu; + rn->mtu = priv->mcast_mtu; ndev->max_mtu = IPOIB_CM_MTU; ndev->neigh_priv_len = sizeof(struct ipoib_neigh); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 7166ee9b7a25..3d5f6b848c9e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -246,6 +246,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, if (priv->mcast_mtu == priv->admin_mtu) priv->admin_mtu = IPOIB_UD_MTU(mtu); priv->mcast_mtu = IPOIB_UD_MTU(mtu); + rn->mtu = priv->mcast_mtu; priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); spin_unlock_irq(&priv->lock); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 8ac8e18fbe0c..30865605e098 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -97,6 +97,7 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, { struct net_device *ndev = priv->dev; int result; + struct rdma_netdev *rn = netdev_priv(ndev); ASSERT_RTNL(); @@ -117,6 +118,8 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, goto out_early; } + rn->mtu = priv->mcast_mtu; + priv->parent = ppriv->dev; priv->pkey = pkey; priv->child_type = type; From 7638c0e965f48d773d8684d38e0967e4d0ee238c Mon Sep 17 00:00:00 2001 From: Grzegorz Andrejczuk Date: Mon, 11 May 2020 12:07:01 -0400 Subject: [PATCH 441/562] IB/hfi1: Add packet histogram trace event Add a simple trace event taking context number and building simple histogram to print packets distribution between contexts. Link: https://lore.kernel.org/r/20200511160700.173205.84270.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Grzegorz Andrejczuk Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/driver.c | 1 + drivers/infiniband/hw/hfi1/trace.c | 32 ++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/trace_ctxts.h | 11 +++++++- 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 60ff6de8cf98..a40701a6e1b6 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1706,6 +1706,7 @@ static void hfi1_ipoib_ib_rcv(struct hfi1_packet *packet) goto drop_no_nd; trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf))); + trace_ctxt_rsm_hist(rcd->ctxt); /* handle congestion notifications */ do_work = hfi1_may_ecn(packet); diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index c8a9988d972d..b219ea90fd6f 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -520,6 +520,38 @@ u16 hfi1_trace_get_tid_idx(u32 ent) return EXP_TID_GET(ent, IDX); } +struct hfi1_ctxt_hist { + atomic_t count; + atomic_t data[255]; +}; + +struct hfi1_ctxt_hist hist = { + .count = ATOMIC_INIT(0) +}; + +const char *hfi1_trace_print_rsm_hist(struct trace_seq *p, unsigned int ctxt) +{ + int i, len = ARRAY_SIZE(hist.data); + const char *ret = trace_seq_buffer_ptr(p); + unsigned long packet_count = atomic_fetch_inc(&hist.count); + + trace_seq_printf(p, "packet[%lu]", packet_count); + for (i = 0; i < len; ++i) { + unsigned long val; + atomic_t *count = &hist.data[i]; + + if (ctxt == i) + val = atomic_fetch_inc(count); + else + val = atomic_read(count); + + if (val) + trace_seq_printf(p, "(%d:%lu)", i, val); + } + trace_seq_putc(p, 0); + return ret; +} + __hfi1_trace_fn(AFFINITY); __hfi1_trace_fn(PKT); __hfi1_trace_fn(PROC); diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h index b5fc5c6cd52f..d8c168dc3ea8 100644 --- a/drivers/infiniband/hw/hfi1/trace_ctxts.h +++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h @@ -1,5 +1,5 @@ /* -* Copyright(c) 2015, 2016 Intel Corporation. +* Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -138,6 +138,15 @@ TRACE_EVENT(hfi1_ctxt_info, ) ); +const char *hfi1_trace_print_rsm_hist(struct trace_seq *p, unsigned int ctxt); +TRACE_EVENT(ctxt_rsm_hist, + TP_PROTO(unsigned int ctxt), + TP_ARGS(ctxt), + TP_STRUCT__entry(__field(unsigned int, ctxt)), + TP_fast_assign(__entry->ctxt = ctxt;), + TP_printk("%s", hfi1_trace_print_rsm_hist(p, __entry->ctxt)) +); + #endif /* __HFI1_TRACE_CTXTS_H */ #undef TRACE_INCLUDE_PATH From 8f149b684764662bca3e08f340202b7bd67736fc Mon Sep 17 00:00:00 2001 From: Gary Leshner Date: Mon, 11 May 2020 12:07:06 -0400 Subject: [PATCH 442/562] IB/ipoib: Add capability to switch between datagram and connected mode This is the prerequisite modification to the ipoib ulp to allow a rdma netdev to obtain the default ndo ops for init/uninit/open/close. This is accomplished by setting the netdev ops field within the callback function passed to the netdev allocation routine which in turn was passed into the rdma netdev allocation routine. This allows the rdma netdev to call back into the ulp to create the resources required for connected mode operation. Additionally as the ulp is not re-entrant, when switching modes, the number of real tx queues is set to 1 for the connected mode. For datagram mode the number of real tx queues is set to the actual number of tx queues specified at the netdev's allocation. For the internal ulp netdev the number of tx queues defaults to 1. It is up to the rdma netdev to specify the actual number it can support. When the driver does not support a rdma netdev for acceleration, (-ENOTSUPPORTED return code or the verbs function for allocation is NULL) the ipoib ulp functions are unaffected by using the internal netdev allocated by the ipoib ulp. Link: https://lore.kernel.org/r/20200511160706.173205.19086.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Gary Leshner Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index a6c4322b409b..d12e5c9c38af 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -526,6 +526,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) "will cause multicast packet drops\n"); netdev_update_features(dev); dev_set_mtu(dev, ipoib_cm_max_mtu(dev)); + netif_set_real_num_tx_queues(dev, 1); rtnl_unlock(); priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM; @@ -537,6 +538,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); netdev_update_features(dev); dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu)); + netif_set_real_num_tx_queues(dev, dev->num_tx_queues); rtnl_unlock(); ipoib_flush_paths(dev); return (!rtnl_trylock()) ? -EBUSY : 0; @@ -2071,9 +2073,17 @@ static const struct net_device_ops ipoib_netdev_ops_vf = { .ndo_do_ioctl = ipoib_ioctl, }; +static const struct net_device_ops ipoib_netdev_default_pf = { + .ndo_init = ipoib_dev_init_default, + .ndo_uninit = ipoib_dev_uninit_default, + .ndo_open = ipoib_ib_dev_open_default, + .ndo_stop = ipoib_ib_dev_stop_default, +}; + void ipoib_setup_common(struct net_device *dev) { dev->header_ops = &ipoib_header_ops; + dev->netdev_ops = &ipoib_netdev_default_pf; ipoib_set_ethtool_ops(dev); @@ -2123,13 +2133,6 @@ static void ipoib_build_priv(struct net_device *dev) INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh); } -static const struct net_device_ops ipoib_netdev_default_pf = { - .ndo_init = ipoib_dev_init_default, - .ndo_uninit = ipoib_dev_uninit_default, - .ndo_open = ipoib_ib_dev_open_default, - .ndo_stop = ipoib_ib_dev_stop_default, -}; - static struct net_device *ipoib_alloc_netdev(struct ib_device *hca, u8 port, const char *name) { @@ -2167,7 +2170,6 @@ int ipoib_intf_init(struct ib_device *hca, u8 port, const char *name, if (rc != -EOPNOTSUPP) goto out; - dev->netdev_ops = &ipoib_netdev_default_pf; rn->send = ipoib_send; rn->attach_mcast = ipoib_mcast_attach; rn->detach_mcast = ipoib_mcast_detach; From 0ad45e5fdc522b26242882abfca1b4b3c840961d Mon Sep 17 00:00:00 2001 From: Piotr Stankiewicz Date: Mon, 11 May 2020 12:07:13 -0400 Subject: [PATCH 443/562] IB/hfi1: Enable the transmit side of the datagram ipoib netdev This patch hooks the transmit side of the datagram netdev with ipoib by setting the rdma_netdev_get_params function for the hfi1 ib_device_ops structue. It also enables the receiving side by adding the AIP capability into the default capabilities. Link: https://lore.kernel.org/r/20200511160712.173205.65700.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Piotr Stankiewicz Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/common.h | 1 + drivers/infiniband/hw/hfi1/verbs.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 606254513640..ff423e546b80 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -160,6 +160,7 @@ HFI1_CAP_PKEY_CHECK | \ HFI1_CAP_MULTI_PKT_EGR | \ HFI1_CAP_EXTENDED_PSN | \ + HFI1_CAP_AIP | \ ((HFI1_CAP_HDRSUPP | \ HFI1_CAP_MULTI_PKT_EGR | \ HFI1_CAP_STATIC_RATE_CTRL | \ diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 19d5d0061b01..43ddced15951 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -66,6 +66,7 @@ #include "vnic.h" #include "fault.h" #include "affinity.h" +#include "ipoib.h" static unsigned int hfi1_lkey_table_size = 16; module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, @@ -1795,6 +1796,7 @@ static const struct ib_device_ops hfi1_dev_ops = { .modify_device = modify_device, /* keep process mad in the driver */ .process_mad = hfi1_process_mad, + .rdma_netdev_get_params = hfi1_ipoib_rn_get_params, }; /** From 0ac8903cbbe618d947b5815d6e0f7b044ee83aa3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 19 May 2020 10:27:05 +0300 Subject: [PATCH 444/562] RDMA/core: Allow the ioctl layer to abort a fully created uobject While creating a uobject every create reaches a point where the uobject is fully initialized. For ioctls that go on to copy_to_user this means they need to open code the destruction of a fully created uobject - ie the RDMA_REMOVE_DESTROY sort of flow. Open coding this creates bugs, eg the CQ does not properly flush the events list when it does its error unwind. Provide a uverbs_finalize_uobj_create() function which indicates that the uobject is fully initialized and that abort should call to destroy_hw to destroy the uobj->object and related. Methods can call this function if they go on to have error cases after setting uobj->object. Once done those error cases can simply do return, without an error unwind. Link: https://lore.kernel.org/r/20200519072711.257271-2-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 25 +++++++++++++++---- drivers/infiniband/core/rdma_core.h | 4 +-- drivers/infiniband/core/uverbs_cmd.c | 2 +- drivers/infiniband/core/uverbs_ioctl.c | 22 ++++++++++++++-- drivers/infiniband/core/uverbs_std_types_cq.c | 8 ++---- drivers/infiniband/core/uverbs_std_types_mr.c | 12 +++------ drivers/infiniband/hw/mlx5/devx.c | 10 +++----- drivers/infiniband/hw/mlx5/main.c | 24 +++++------------- drivers/infiniband/hw/mlx5/qos.c | 13 ++++------ include/rdma/ib_verbs.h | 5 ++++ include/rdma/uverbs_ioctl.h | 3 +++ include/rdma/uverbs_std_types.h | 2 +- include/rdma/uverbs_types.h | 3 ++- 13 files changed, 74 insertions(+), 59 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index bf8e149d3191..de3858515275 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -130,6 +130,17 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, lockdep_assert_held(&ufile->hw_destroy_rwsem); assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE); + if (reason == RDMA_REMOVE_ABORT_HWOBJ) { + reason = RDMA_REMOVE_ABORT; + ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason, + attrs); + /* + * Drivers are not permitted to ignore RDMA_REMOVE_ABORT, see + * ib_is_destroy_retryable, cleanup_retryable == false here. + */ + WARN_ON(ret); + } + if (reason == RDMA_REMOVE_ABORT) { WARN_ON(!list_empty(&uobj->list)); WARN_ON(!uobj->context); @@ -647,11 +658,15 @@ void rdma_alloc_commit_uobject(struct ib_uobject *uobj, * object and anything else connected to uobj before calling this. */ void rdma_alloc_abort_uobject(struct ib_uobject *uobj, - struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs, + bool hw_obj_valid) { struct ib_uverbs_file *ufile = uobj->ufile; - uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs); + uverbs_destroy_uobject(uobj, + hw_obj_valid ? RDMA_REMOVE_ABORT_HWOBJ : + RDMA_REMOVE_ABORT, + attrs); /* Matches the down_read in rdma_alloc_begin_uobject */ up_read(&ufile->hw_destroy_rwsem); @@ -921,8 +936,8 @@ uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, } void uverbs_finalize_object(struct ib_uobject *uobj, - enum uverbs_obj_access access, bool commit, - struct uverbs_attr_bundle *attrs) + enum uverbs_obj_access access, bool hw_obj_valid, + bool commit, struct uverbs_attr_bundle *attrs) { /* * refcounts should be handled at the object level and not at the @@ -945,7 +960,7 @@ void uverbs_finalize_object(struct ib_uobject *uobj, if (commit) rdma_alloc_commit_uobject(uobj, attrs); else - rdma_alloc_abort_uobject(uobj, attrs); + rdma_alloc_abort_uobject(uobj, attrs, hw_obj_valid); break; default: WARN_ON(true); diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 33978e0f1262..2b529233e159 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -64,8 +64,8 @@ uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, s64 id, struct uverbs_attr_bundle *attrs); void uverbs_finalize_object(struct ib_uobject *uobj, - enum uverbs_obj_access access, bool commit, - struct uverbs_attr_bundle *attrs); + enum uverbs_obj_access access, bool hw_obj_valid, + bool commit, struct uverbs_attr_bundle *attrs); int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index d5642bcf93ee..86c97221872d 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -311,7 +311,7 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) return 0; err_uobj: - rdma_alloc_abort_uobject(uobj, attrs); + rdma_alloc_abort_uobject(uobj, attrs, false); err_ucontext: kfree(attrs->context); attrs->context = NULL; diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 538affbc517e..42c5696f03bd 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -58,6 +58,7 @@ struct bundle_priv { DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN); DECLARE_BITMAP(spec_finalize, UVERBS_API_ATTR_BKEY_LEN); + DECLARE_BITMAP(uobj_hw_obj_valid, UVERBS_API_ATTR_BKEY_LEN); /* * Must be last. bundle ends in a flex array which overlaps @@ -230,7 +231,8 @@ static void uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi, for (i = 0; i != attr->len; i++) uverbs_finalize_object(attr->uobjects[i], - spec->u2.objs_arr.access, commit, attrs); + spec->u2.objs_arr.access, false, commit, + attrs); } static int uverbs_process_attr(struct bundle_priv *pbundle, @@ -502,7 +504,9 @@ static void bundle_destroy(struct bundle_priv *pbundle, bool commit) uverbs_finalize_object( attr->obj_attr.uobject, - attr->obj_attr.attr_elm->spec.u.obj.access, commit, + attr->obj_attr.attr_elm->spec.u.obj.access, + test_bit(i, pbundle->uobj_hw_obj_valid), + commit, &pbundle->bundle); } @@ -590,6 +594,8 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, sizeof(pbundle->bundle.attr_present)); memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize)); memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize)); + memset(pbundle->uobj_hw_obj_valid, 0, + sizeof(pbundle->uobj_hw_obj_valid)); ret = ib_uverbs_run_method(pbundle, hdr->num_attrs); bundle_destroy(pbundle, ret == 0); @@ -784,3 +790,15 @@ int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, } return uverbs_copy_to(bundle, idx, from, size); } + +/* Once called an abort will call through to the type's destroy_hw() */ +void uverbs_finalize_uobj_create(const struct uverbs_attr_bundle *bundle, + u16 idx) +{ + struct bundle_priv *pbundle = + container_of(bundle, struct bundle_priv, bundle); + + __set_bit(uapi_bkey_attr(uapi_key_attr(idx)), + pbundle->uobj_hw_obj_valid); +} +EXPORT_SYMBOL(uverbs_finalize_uobj_create); diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index da4110a0eea2..73fbbeb2586c 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -129,16 +129,12 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( obj->uevent.uobject.object = cq; obj->uevent.uobject.user_handle = user_handle; rdma_restrack_uadd(&cq->res); + uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE); ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe, sizeof(cq->cqe)); - if (ret) - goto err_cq; + return ret; - return 0; -err_cq: - ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs)); - cq = NULL; err_free: kfree(cq); err_event_file: diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index c1286a52dc84..a2722ef8496e 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -136,21 +136,15 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( uobj->object = mr; + uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE); + ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_LKEY, &mr->lkey, sizeof(mr->lkey)); if (ret) - goto err_dereg; + return ret; ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_RKEY, &mr->rkey, sizeof(mr->rkey)); - if (ret) - goto err_dereg; - - return 0; - -err_dereg: - ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs)); - return ret; } diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index c339dd5ee694..3047e7d60a9b 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2218,14 +2218,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( obj->mdev = dev->mdev; uobj->object = obj; devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id); - err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id, sizeof(obj_id)); - if (err) - goto err_umem_destroy; + uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE); - return 0; + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id, + sizeof(obj_id)); + return err; -err_umem_destroy: - mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, cmd.out, sizeof(cmd.out)); err_umem_release: ib_umem_release(obj->umem); err_obj_free: diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 26f0b39c7f74..623d7898ae6d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6187,26 +6187,20 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_VAR_OBJ_ALLOC)( mmap_offset = mlx5_entry_to_mmap_offset(entry); length = entry->rdma_entry.npages * PAGE_SIZE; uobj->object = entry; + uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE); err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET, &mmap_offset, sizeof(mmap_offset)); if (err) - goto err; + return err; err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID, &entry->page_idx, sizeof(entry->page_idx)); if (err) - goto err; + return err; err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH, &length, sizeof(length)); - if (err) - goto err; - - return 0; - -err: - rdma_user_mmap_entry_remove(&entry->rdma_entry); return err; } @@ -6320,26 +6314,20 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_UAR_OBJ_ALLOC)( mmap_offset = mlx5_entry_to_mmap_offset(entry); length = entry->rdma_entry.npages * PAGE_SIZE; uobj->object = entry; + uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE); err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET, &mmap_offset, sizeof(mmap_offset)); if (err) - goto err; + return err; err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID, &entry->page_idx, sizeof(entry->page_idx)); if (err) - goto err; + return err; err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH, &length, sizeof(length)); - if (err) - goto err; - - return 0; - -err: - rdma_user_mmap_entry_remove(&entry->rdma_entry); return err; } diff --git a/drivers/infiniband/hw/mlx5/qos.c b/drivers/infiniband/hw/mlx5/qos.c index cac878a70edb..dce92554142a 100644 --- a/drivers/infiniband/hw/mlx5/qos.c +++ b/drivers/infiniband/hw/mlx5/qos.c @@ -69,17 +69,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_PP_OBJ_ALLOC)( if (err) goto err; - err = uverbs_copy_to(attrs, MLX5_IB_ATTR_PP_OBJ_ALLOC_INDEX, - &pp_entry->index, sizeof(pp_entry->index)); - if (err) - goto clean; - pp_entry->mdev = dev->mdev; uobj->object = pp_entry; - return 0; + uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_PP_OBJ_ALLOC_HANDLE); + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_PP_OBJ_ALLOC_INDEX, + &pp_entry->index, sizeof(pp_entry->index)); + return err; -clean: - mlx5_rl_remove_rate_raw(dev->mdev, pp_entry->index); err: kfree(pp_entry); return err; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 641f4751b062..c988e9205cf9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1491,6 +1491,11 @@ enum rdma_remove_reason { RDMA_REMOVE_DRIVER_REMOVE, /* uobj is being cleaned-up before being committed */ RDMA_REMOVE_ABORT, + /* + * uobj has been fully created, with the uobj->object set, but is being + * cleaned up before being comitted + */ + RDMA_REMOVE_ABORT_HWOBJ, }; struct ib_rdmacg_object { diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 9f3b1e004046..5bd2b037e914 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -737,6 +737,9 @@ uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) return attr->ptr_attr.len; } +void uverbs_finalize_uobj_create(const struct uverbs_attr_bundle *attrs_bundle, + u16 idx); + /* * uverbs_attr_ptr_get_array_size() - Get array size pointer by a ptr * attribute. diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 1b28ce1aba07..d6784be27e4b 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -107,7 +107,7 @@ static inline void uobj_put_write(struct ib_uobject *uobj) static inline void uobj_alloc_abort(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) { - rdma_alloc_abort_uobject(uobj, attrs); + rdma_alloc_abort_uobject(uobj, attrs, false); } static inline struct ib_uobject * diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index f1cbdae67250..c15b298aa62f 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -139,7 +139,8 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj, struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, struct uverbs_attr_bundle *attrs); void rdma_alloc_abort_uobject(struct ib_uobject *uobj, - struct uverbs_attr_bundle *attrs); + struct uverbs_attr_bundle *attrs, + bool hw_obj_valid); void rdma_alloc_commit_uobject(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); From 98a8890f73489416a1ea49a644565a244d3f729a Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 19 May 2020 10:27:06 +0300 Subject: [PATCH 445/562] IB/uverbs: Refactor related objects to use their own asynchronous event FD Refactor related objects to use their own asynchronous event FD. The ufile event FD will be the default in case an object won't have its own event FD. Link: https://lore.kernel.org/r/20200519072711.257271-3-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs.h | 3 ++- drivers/infiniband/core/uverbs_cmd.c | 25 ++++++++++++++++++- drivers/infiniband/core/uverbs_main.c | 14 +++++------ drivers/infiniband/core/uverbs_std_types_cq.c | 6 +++++ 4 files changed, 39 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 3d189c7ee59e..34c155f88317 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -142,7 +142,7 @@ struct ib_uverbs_file { * ucontext_lock held */ struct ib_ucontext *ucontext; - struct ib_uverbs_async_event_file *async_file; + struct ib_uverbs_async_event_file *default_async_file; struct list_head list; /* @@ -180,6 +180,7 @@ struct ib_uverbs_mcast_entry { struct ib_uevent_object { struct ib_uobject uobject; + struct ib_uverbs_async_event_file *event_file; /* List member for ib_uverbs_async_event_file list */ struct list_head event_list; u32 events_reported; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 86c97221872d..4859ac0df17c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1051,6 +1051,10 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, goto err_free; obj->uevent.uobject.object = cq; + obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file); + if (obj->uevent.event_file) + uverbs_uobject_get(&obj->uevent.event_file->uobj); + memset(&resp, 0, sizeof resp); resp.base.cq_handle = obj->uevent.uobject.id; resp.base.cqe = cq->cqe; @@ -1067,6 +1071,8 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, return obj; err_cb: + if (obj->uevent.event_file) + uverbs_uobject_put(&obj->uevent.event_file->uobj); ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs)); cq = NULL; err_free: @@ -1460,6 +1466,9 @@ static int create_qp(struct uverbs_attr_bundle *attrs, } obj->uevent.uobject.object = qp; + obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file); + if (obj->uevent.event_file) + uverbs_uobject_get(&obj->uevent.event_file->uobj); memset(&resp, 0, sizeof resp); resp.base.qpn = qp->qp_num; @@ -1473,7 +1482,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, ret = uverbs_response(attrs, &resp, sizeof(resp)); if (ret) - goto err_cb; + goto err_uevent; if (xrcd) { obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, @@ -1498,6 +1507,9 @@ static int create_qp(struct uverbs_attr_bundle *attrs, rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); return 0; +err_uevent: + if (obj->uevent.event_file) + uverbs_uobject_put(&obj->uevent.event_file->uobj); err_cb: ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs)); @@ -2975,6 +2987,9 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) atomic_set(&wq->usecnt, 0); atomic_inc(&pd->usecnt); atomic_inc(&cq->usecnt); + obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file); + if (obj->uevent.event_file) + uverbs_uobject_get(&obj->uevent.event_file->uobj); memset(&resp, 0, sizeof(resp)); resp.wq_handle = obj->uevent.uobject.id; @@ -2993,6 +3008,8 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) return 0; err_copy: + if (obj->uevent.event_file) + uverbs_uobject_put(&obj->uevent.event_file->uobj); ib_destroy_wq(wq, uverbs_get_cleared_udata(attrs)); err_put_cq: rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, @@ -3453,6 +3470,10 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, } obj->uevent.uobject.object = srq; + obj->uevent.uobject.user_handle = cmd->user_handle; + obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file); + if (obj->uevent.event_file) + uverbs_uobject_get(&obj->uevent.event_file->uobj); memset(&resp, 0, sizeof resp); resp.srq_handle = obj->uevent.uobject.id; @@ -3477,6 +3498,8 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, return 0; err_copy: + if (obj->uevent.event_file) + uverbs_uobject_put(&obj->uevent.event_file->uobj); ib_destroy_srq_user(srq, uverbs_get_cleared_udata(attrs)); err_put_pd: uobj_put_obj_read(pd); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 6948f8cd1885..47794c85e9af 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -146,8 +146,7 @@ void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file, void ib_uverbs_release_uevent(struct ib_uevent_object *uobj) { - struct ib_uverbs_async_event_file *async_file = - READ_ONCE(uobj->uobject.ufile->async_file); + struct ib_uverbs_async_event_file *async_file = uobj->event_file; struct ib_uverbs_event *evt, *tmp; if (!async_file) @@ -159,6 +158,7 @@ void ib_uverbs_release_uevent(struct ib_uevent_object *uobj) kfree(evt); } spin_unlock_irq(&async_file->ev_queue.lock); + uverbs_uobject_put(&async_file->uobj); } void ib_uverbs_detach_umcast(struct ib_qp *qp, @@ -197,8 +197,8 @@ void ib_uverbs_release_file(struct kref *ref) if (atomic_dec_and_test(&file->device->refcount)) ib_uverbs_comp_dev(file->device); - if (file->async_file) - uverbs_uobject_put(&file->async_file->uobj); + if (file->default_async_file) + uverbs_uobject_put(&file->default_async_file->uobj); put_device(&file->device->dev); if (file->disassociate_page) @@ -427,7 +427,7 @@ void ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file, static void uverbs_uobj_event(struct ib_uevent_object *eobj, struct ib_event *event) { - ib_uverbs_async_handler(READ_ONCE(eobj->uobject.ufile->async_file), + ib_uverbs_async_handler(eobj->event_file, eobj->uobject.user_handle, event->event, &eobj->event_list, &eobj->events_reported); } @@ -484,10 +484,10 @@ void ib_uverbs_init_async_event_file( /* The first async_event_file becomes the default one for the file. */ mutex_lock(&uverbs_file->ucontext_lock); - if (!uverbs_file->async_file) { + if (!uverbs_file->default_async_file) { /* Pairs with the put in ib_uverbs_release_file */ uverbs_uobject_get(&async_file->uobj); - smp_store_release(&uverbs_file->async_file, async_file); + smp_store_release(&uverbs_file->default_async_file, async_file); } mutex_unlock(&uverbs_file->ucontext_lock); diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 73fbbeb2586c..be534b0af4f8 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -100,6 +100,10 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( uverbs_uobject_get(ev_file_uobj); } + obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file); + if (obj->uevent.event_file) + uverbs_uobject_get(&obj->uevent.event_file->uobj); + if (attr.comp_vector >= attrs->ufile->device->num_comp_vectors) { ret = -EINVAL; goto err_event_file; @@ -138,6 +142,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( err_free: kfree(cq); err_event_file: + if (obj->uevent.event_file) + uverbs_uobject_put(&obj->uevent.event_file->uobj); if (ev_file) uverbs_uobject_put(ev_file_uobj); return ret; From cda9ee494248b890973f5d31cf7851c0d21755b9 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 19 May 2020 10:27:07 +0300 Subject: [PATCH 446/562] IB/uverbs: Extend CQ to get its own asynchronous event FD Extend CQ to get its own asynchronous event FD. The event FD is an optional attribute, in case wasn't given the ufile event FD will be used. Link: https://lore.kernel.org/r/20200519072711.257271-4-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs.h | 18 ++++++++++++++++++ drivers/infiniband/core/uverbs_std_types_cq.c | 9 ++++++--- include/uapi/rdma/ib_user_ioctl_cmds.h | 1 + 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 34c155f88317..53a10479958b 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -297,6 +297,24 @@ static inline u32 make_port_cap_flags(const struct ib_port_attr *attr) return res; } +static inline struct ib_uverbs_async_event_file * +ib_uverbs_get_async_event(struct uverbs_attr_bundle *attrs, + u16 id) +{ + struct ib_uobject *async_ev_file_uobj; + struct ib_uverbs_async_event_file *async_ev_file; + + async_ev_file_uobj = uverbs_attr_get_uobject(attrs, id); + if (IS_ERR(async_ev_file_uobj)) + async_ev_file = READ_ONCE(attrs->ufile->default_async_file); + else + async_ev_file = container_of(async_ev_file_uobj, + struct ib_uverbs_async_event_file, + uobj); + if (async_ev_file) + uverbs_uobject_get(&async_ev_file->uobj); + return async_ev_file; +} void copy_port_attr_to_resp(struct ib_port_attr *attr, struct ib_uverbs_query_port_resp *resp, diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index be534b0af4f8..5dce2c7cc323 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -100,9 +100,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( uverbs_uobject_get(ev_file_uobj); } - obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file); - if (obj->uevent.event_file) - uverbs_uobject_get(&obj->uevent.event_file->uobj); + obj->uevent.event_file = ib_uverbs_get_async_event( + attrs, UVERBS_ATTR_CREATE_CQ_EVENT_FD); if (attr.comp_vector >= attrs->ufile->device->num_comp_vectors) { ret = -EINVAL; @@ -173,6 +172,10 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32), UA_MANDATORY), + UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_EVENT_FD, + UVERBS_OBJECT_ASYNC_EVENT, + UVERBS_ACCESS_READ, + UA_OPTIONAL), UVERBS_ATTR_UHW()); static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)( diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index d4ddbe4e696c..286fdc1929e0 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -95,6 +95,7 @@ enum uverbs_attrs_create_cq_cmd_attr_ids { UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_CREATE_CQ_RESP_CQE, + UVERBS_ATTR_CREATE_CQ_EVENT_FD, }; enum uverbs_attrs_destroy_cq_cmd_attr_ids { From 175ba58d62c84e1216cdf8b4f49f79e55e1ed04b Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 19 May 2020 10:27:08 +0300 Subject: [PATCH 447/562] IB/uverbs: Move QP, SRQ, WQ type and flags to UAPI These constants are going to be used in the ioctl interface in coming patches so they are part of the UAPI, place them in the correct header for clarity. Link: https://lore.kernel.org/r/20200519072711.257271-5-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 43 ++++++++++++++----------- include/uapi/rdma/ib_user_ioctl_verbs.h | 34 +++++++++++++++++++ 2 files changed, 58 insertions(+), 19 deletions(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c988e9205cf9..94533ae16697 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1040,9 +1040,9 @@ enum ib_cq_notify_flags { }; enum ib_srq_type { - IB_SRQT_BASIC, - IB_SRQT_XRC, - IB_SRQT_TM, + IB_SRQT_BASIC = IB_UVERBS_SRQT_BASIC, + IB_SRQT_XRC = IB_UVERBS_SRQT_XRC, + IB_SRQT_TM = IB_UVERBS_SRQT_TM, }; static inline bool ib_srq_has_cq(enum ib_srq_type srq_type) @@ -1111,16 +1111,16 @@ enum ib_qp_type { IB_QPT_SMI, IB_QPT_GSI, - IB_QPT_RC, - IB_QPT_UC, - IB_QPT_UD, + IB_QPT_RC = IB_UVERBS_QPT_RC, + IB_QPT_UC = IB_UVERBS_QPT_UC, + IB_QPT_UD = IB_UVERBS_QPT_UD, IB_QPT_RAW_IPV6, IB_QPT_RAW_ETHERTYPE, - IB_QPT_RAW_PACKET = 8, - IB_QPT_XRC_INI = 9, - IB_QPT_XRC_TGT, + IB_QPT_RAW_PACKET = IB_UVERBS_QPT_RAW_PACKET, + IB_QPT_XRC_INI = IB_UVERBS_QPT_XRC_INI, + IB_QPT_XRC_TGT = IB_UVERBS_QPT_XRC_TGT, IB_QPT_MAX, - IB_QPT_DRIVER = 0xFF, + IB_QPT_DRIVER = IB_UVERBS_QPT_DRIVER, /* Reserve a range for qp types internal to the low level driver. * These qp types will not be visible at the IB core layer, so the * IB_QPT_MAX usages should not be affected in the core layer @@ -1139,17 +1139,21 @@ enum ib_qp_type { enum ib_qp_create_flags { IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, - IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = + IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, IB_QP_CREATE_CROSS_CHANNEL = 1 << 2, IB_QP_CREATE_MANAGED_SEND = 1 << 3, IB_QP_CREATE_MANAGED_RECV = 1 << 4, IB_QP_CREATE_NETIF_QP = 1 << 5, IB_QP_CREATE_INTEGRITY_EN = 1 << 6, IB_QP_CREATE_NETDEV_USE = 1 << 7, - IB_QP_CREATE_SCATTER_FCS = 1 << 8, - IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9, + IB_QP_CREATE_SCATTER_FCS = + IB_UVERBS_QP_CREATE_SCATTER_FCS, + IB_QP_CREATE_CVLAN_STRIPPING = + IB_UVERBS_QP_CREATE_CVLAN_STRIPPING, IB_QP_CREATE_SOURCE_QPN = 1 << 10, - IB_QP_CREATE_PCI_WRITE_END_PADDING = 1 << 11, + IB_QP_CREATE_PCI_WRITE_END_PADDING = + IB_UVERBS_QP_CREATE_PCI_WRITE_END_PADDING, /* reserve bits 26-31 for low level drivers' internal use */ IB_QP_CREATE_RESERVED_START = 1 << 26, IB_QP_CREATE_RESERVED_END = 1 << 31, @@ -1654,7 +1658,7 @@ enum ib_raw_packet_caps { }; enum ib_wq_type { - IB_WQT_RQ + IB_WQT_RQ = IB_UVERBS_WQT_RQ, }; enum ib_wq_state { @@ -1677,10 +1681,11 @@ struct ib_wq { }; enum ib_wq_flags { - IB_WQ_FLAGS_CVLAN_STRIPPING = 1 << 0, - IB_WQ_FLAGS_SCATTER_FCS = 1 << 1, - IB_WQ_FLAGS_DELAY_DROP = 1 << 2, - IB_WQ_FLAGS_PCI_WRITE_END_PADDING = 1 << 3, + IB_WQ_FLAGS_CVLAN_STRIPPING = IB_UVERBS_WQ_FLAGS_CVLAN_STRIPPING, + IB_WQ_FLAGS_SCATTER_FCS = IB_UVERBS_WQ_FLAGS_SCATTER_FCS, + IB_WQ_FLAGS_DELAY_DROP = IB_UVERBS_WQ_FLAGS_DELAY_DROP, + IB_WQ_FLAGS_PCI_WRITE_END_PADDING = + IB_UVERBS_WQ_FLAGS_PCI_WRITE_END_PADDING, }; struct ib_wq_init_attr { diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index a640bb814be0..b1662dfe86a6 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -64,6 +64,40 @@ enum ib_uverbs_access_flags { ~(IB_UVERBS_ACCESS_OPTIONAL_FIRST - 1) }; +enum ib_uverbs_srq_type { + IB_UVERBS_SRQT_BASIC, + IB_UVERBS_SRQT_XRC, + IB_UVERBS_SRQT_TM, +}; + +enum ib_uverbs_wq_type { + IB_UVERBS_WQT_RQ, +}; + +enum ib_uverbs_wq_flags { + IB_UVERBS_WQ_FLAGS_CVLAN_STRIPPING = 1 << 0, + IB_UVERBS_WQ_FLAGS_SCATTER_FCS = 1 << 1, + IB_UVERBS_WQ_FLAGS_DELAY_DROP = 1 << 2, + IB_UVERBS_WQ_FLAGS_PCI_WRITE_END_PADDING = 1 << 3, +}; + +enum ib_uverbs_qp_type { + IB_UVERBS_QPT_RC = 2, + IB_UVERBS_QPT_UC, + IB_UVERBS_QPT_UD, + IB_UVERBS_QPT_RAW_PACKET = 8, + IB_UVERBS_QPT_XRC_INI, + IB_UVERBS_QPT_XRC_TGT, + IB_UVERBS_QPT_DRIVER = 0xFF, +}; + +enum ib_uverbs_qp_create_flags { + IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, + IB_UVERBS_QP_CREATE_SCATTER_FCS = 1 << 8, + IB_UVERBS_QP_CREATE_CVLAN_STRIPPING = 1 << 9, + IB_UVERBS_QP_CREATE_PCI_WRITE_END_PADDING = 1 << 11, +}; + enum ib_uverbs_query_port_cap_flags { IB_UVERBS_PCF_SM = 1 << 1, IB_UVERBS_PCF_NOTICE_SUP = 1 << 2, From c3eab946aba443f0b44a08f446735c74495610a9 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 19 May 2020 10:27:09 +0300 Subject: [PATCH 448/562] IB/uverbs: Introduce create/destroy SRQ commands over ioctl Introduce create/destroy SRQ commands over the ioctl interface to let it be extended to get an asynchronous event FD. Link: https://lore.kernel.org/r/20200519072711.257271-6-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/Makefile | 3 +- drivers/infiniband/core/rdma_core.h | 1 + drivers/infiniband/core/uverbs_std_types.c | 32 --- .../infiniband/core/uverbs_std_types_srq.c | 234 ++++++++++++++++++ drivers/infiniband/core/uverbs_uapi.c | 1 + include/uapi/rdma/ib_user_ioctl_cmds.h | 27 ++ 6 files changed, 265 insertions(+), 33 deletions(-) create mode 100644 drivers/infiniband/core/uverbs_std_types_srq.c diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 870f0fcd54d5..d7b46a7c07fd 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -36,6 +36,7 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ uverbs_std_types_flow_action.o uverbs_std_types_dm.o \ uverbs_std_types_mr.o uverbs_std_types_counters.o \ uverbs_uapi.o uverbs_std_types_device.o \ - uverbs_std_types_async_fd.o + uverbs_std_types_async_fd.o \ + uverbs_std_types_srq.o ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 2b529233e159..d623f911b70b 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -159,6 +159,7 @@ extern const struct uapi_definition uverbs_def_obj_dm[]; extern const struct uapi_definition uverbs_def_obj_flow_action[]; extern const struct uapi_definition uverbs_def_obj_intf[]; extern const struct uapi_definition uverbs_def_obj_mr[]; +extern const struct uapi_definition uverbs_def_obj_srq[]; extern const struct uapi_definition uverbs_def_write_intf[]; static inline const struct uverbs_api_write_method * diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 3abfc63225cb..d9b6912eafa8 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -142,31 +142,6 @@ static int uverbs_free_wq(struct ib_uobject *uobject, return ret; } -static int uverbs_free_srq(struct ib_uobject *uobject, - enum rdma_remove_reason why, - struct uverbs_attr_bundle *attrs) -{ - struct ib_srq *srq = uobject->object; - struct ib_uevent_object *uevent = - container_of(uobject, struct ib_uevent_object, uobject); - enum ib_srq_type srq_type = srq->srq_type; - int ret; - - ret = ib_destroy_srq_user(srq, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) - return ret; - - if (srq_type == IB_SRQT_XRC) { - struct ib_usrq_object *us = - container_of(uevent, struct ib_usrq_object, uevent); - - atomic_dec(&us->uxrcd->refcnt); - } - - ib_uverbs_release_uevent(uevent); - return ret; -} - static int uverbs_free_xrcd(struct ib_uobject *uobject, enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs) @@ -267,11 +242,6 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW, UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw), &UVERBS_METHOD(UVERBS_METHOD_MW_DESTROY)); -DECLARE_UVERBS_NAMED_OBJECT( - UVERBS_OBJECT_SRQ, - UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), - uverbs_free_srq)); - DECLARE_UVERBS_NAMED_METHOD_DESTROY( UVERBS_METHOD_AH_DESTROY, UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_AH_HANDLE, @@ -346,8 +316,6 @@ const struct uapi_definition uverbs_def_obj_intf[] = { UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MW, UAPI_DEF_OBJ_NEEDS_FN(dealloc_mw)), - UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_SRQ, - UAPI_DEF_OBJ_NEEDS_FN(destroy_srq)), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_FLOW, UAPI_DEF_OBJ_NEEDS_FN(destroy_flow)), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_WQ, diff --git a/drivers/infiniband/core/uverbs_std_types_srq.c b/drivers/infiniband/core/uverbs_std_types_srq.c new file mode 100644 index 000000000000..c0ecbba26bf4 --- /dev/null +++ b/drivers/infiniband/core/uverbs_std_types_srq.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. + */ + +#include +#include "rdma_core.h" +#include "uverbs.h" + +static int uverbs_free_srq(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct ib_srq *srq = uobject->object; + struct ib_uevent_object *uevent = + container_of(uobject, struct ib_uevent_object, uobject); + enum ib_srq_type srq_type = srq->srq_type; + int ret; + + ret = ib_destroy_srq_user(srq, &attrs->driver_udata); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + if (srq_type == IB_SRQT_XRC) { + struct ib_usrq_object *us = + container_of(uobject, struct ib_usrq_object, + uevent.uobject); + + atomic_dec(&us->uxrcd->refcnt); + } + + ib_uverbs_release_uevent(uevent); + return ret; +} + +static int UVERBS_HANDLER(UVERBS_METHOD_SRQ_CREATE)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_usrq_object *obj = container_of( + uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_SRQ_HANDLE), + typeof(*obj), uevent.uobject); + struct ib_pd *pd = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_CREATE_SRQ_PD_HANDLE); + struct ib_srq_init_attr attr = {}; + struct ib_uobject *xrcd_uobj; + struct ib_srq *srq; + u64 user_handle; + int ret; + + ret = uverbs_copy_from(&attr.attr.max_sge, attrs, + UVERBS_ATTR_CREATE_SRQ_MAX_SGE); + if (!ret) + ret = uverbs_copy_from(&attr.attr.max_wr, attrs, + UVERBS_ATTR_CREATE_SRQ_MAX_WR); + if (!ret) + ret = uverbs_copy_from(&attr.attr.srq_limit, attrs, + UVERBS_ATTR_CREATE_SRQ_LIMIT); + if (!ret) + ret = uverbs_copy_from(&user_handle, attrs, + UVERBS_ATTR_CREATE_SRQ_USER_HANDLE); + if (!ret) + ret = uverbs_get_const(&attr.srq_type, attrs, + UVERBS_ATTR_CREATE_SRQ_TYPE); + if (ret) + return ret; + + if (ib_srq_has_cq(attr.srq_type)) { + attr.ext.cq = uverbs_attr_get_obj(attrs, + UVERBS_ATTR_CREATE_SRQ_CQ_HANDLE); + if (IS_ERR(attr.ext.cq)) + return PTR_ERR(attr.ext.cq); + } + + switch (attr.srq_type) { + case IB_UVERBS_SRQT_XRC: + xrcd_uobj = uverbs_attr_get_uobject(attrs, + UVERBS_ATTR_CREATE_SRQ_XRCD_HANDLE); + if (IS_ERR(xrcd_uobj)) + return PTR_ERR(xrcd_uobj); + + attr.ext.xrc.xrcd = (struct ib_xrcd *)xrcd_uobj->object; + if (!attr.ext.xrc.xrcd) + return -EINVAL; + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, + uobject); + atomic_inc(&obj->uxrcd->refcnt); + break; + case IB_UVERBS_SRQT_TM: + ret = uverbs_copy_from(&attr.ext.tag_matching.max_num_tags, + attrs, + UVERBS_ATTR_CREATE_SRQ_MAX_NUM_TAGS); + if (ret) + return ret; + break; + case IB_UVERBS_SRQT_BASIC: + break; + default: + return -EINVAL; + } + + obj->uevent.event_file = ib_uverbs_get_async_event(attrs, + UVERBS_ATTR_CREATE_SRQ_EVENT_FD); + INIT_LIST_HEAD(&obj->uevent.event_list); + attr.event_handler = ib_uverbs_srq_event_handler; + obj->uevent.uobject.user_handle = user_handle; + + srq = ib_create_srq_user(pd, &attr, obj, &attrs->driver_udata); + if (IS_ERR(srq)) { + ret = PTR_ERR(srq); + goto err; + } + + obj->uevent.uobject.object = srq; + uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_SRQ_HANDLE); + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_SRQ_RESP_MAX_WR, + &attr.attr.max_wr, + sizeof(attr.attr.max_wr)); + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_SRQ_RESP_MAX_SGE, + &attr.attr.max_sge, + sizeof(attr.attr.max_sge)); + if (ret) + return ret; + + if (attr.srq_type == IB_SRQT_XRC) { + ret = uverbs_copy_to(attrs, + UVERBS_ATTR_CREATE_SRQ_RESP_SRQ_NUM, + &srq->ext.xrc.srq_num, + sizeof(srq->ext.xrc.srq_num)); + if (ret) + return ret; + } + + return 0; +err: + if (obj->uevent.event_file) + uverbs_uobject_put(&obj->uevent.event_file->uobj); + if (attr.srq_type == IB_SRQT_XRC) + atomic_dec(&obj->uxrcd->refcnt); + return ret; +}; + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_SRQ_CREATE, + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_HANDLE, + UVERBS_OBJECT_SRQ, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_CONST_IN(UVERBS_ATTR_CREATE_SRQ_TYPE, + enum ib_uverbs_srq_type, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_USER_HANDLE, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_MAX_WR, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_MAX_SGE, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_LIMIT, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_XRCD_HANDLE, + UVERBS_OBJECT_XRCD, + UVERBS_ACCESS_READ, + UA_OPTIONAL), + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_CQ_HANDLE, + UVERBS_OBJECT_CQ, + UVERBS_ACCESS_READ, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_MAX_NUM_TAGS, + UVERBS_ATTR_TYPE(u32), + UA_OPTIONAL), + UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_SRQ_EVENT_FD, + UVERBS_OBJECT_ASYNC_EVENT, + UVERBS_ACCESS_READ, + UA_OPTIONAL), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_SRQ_RESP_MAX_WR, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_SRQ_RESP_MAX_SGE, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_SRQ_RESP_SRQ_NUM, + UVERBS_ATTR_TYPE(u32), + UA_OPTIONAL), + UVERBS_ATTR_UHW()); + +static int UVERBS_HANDLER(UVERBS_METHOD_SRQ_DESTROY)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = + uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_SRQ_HANDLE); + struct ib_usrq_object *obj = + container_of(uobj, struct ib_usrq_object, uevent.uobject); + struct ib_uverbs_destroy_srq_resp resp = { + .events_reported = obj->uevent.events_reported + }; + + return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_SRQ_RESP, &resp, + sizeof(resp)); +} + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_SRQ_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_SRQ_HANDLE, + UVERBS_OBJECT_SRQ, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_SRQ_RESP, + UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_srq_resp), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_SRQ, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), + uverbs_free_srq), + &UVERBS_METHOD(UVERBS_METHOD_SRQ_CREATE), + &UVERBS_METHOD(UVERBS_METHOD_SRQ_DESTROY) +); + +const struct uapi_definition uverbs_def_obj_srq[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_SRQ, + UAPI_DEF_OBJ_NEEDS_FN(destroy_srq)), + {} +}; diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 3f121ac31e0a..3f5627954fe7 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -634,6 +634,7 @@ static const struct uapi_definition uverbs_core_api[] = { UAPI_DEF_CHAIN(uverbs_def_obj_flow_action), UAPI_DEF_CHAIN(uverbs_def_obj_intf), UAPI_DEF_CHAIN(uverbs_def_obj_mr), + UAPI_DEF_CHAIN(uverbs_def_obj_srq), UAPI_DEF_CHAIN(uverbs_def_write_intf), {}, }; diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index 286fdc1929e0..c07af46ff04c 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -121,6 +121,33 @@ enum uverbs_attrs_destroy_flow_action_esp { UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, }; +enum uverbs_attrs_create_srq_cmd_attr_ids { + UVERBS_ATTR_CREATE_SRQ_HANDLE, + UVERBS_ATTR_CREATE_SRQ_PD_HANDLE, + UVERBS_ATTR_CREATE_SRQ_XRCD_HANDLE, + UVERBS_ATTR_CREATE_SRQ_CQ_HANDLE, + UVERBS_ATTR_CREATE_SRQ_USER_HANDLE, + UVERBS_ATTR_CREATE_SRQ_MAX_WR, + UVERBS_ATTR_CREATE_SRQ_MAX_SGE, + UVERBS_ATTR_CREATE_SRQ_LIMIT, + UVERBS_ATTR_CREATE_SRQ_MAX_NUM_TAGS, + UVERBS_ATTR_CREATE_SRQ_TYPE, + UVERBS_ATTR_CREATE_SRQ_EVENT_FD, + UVERBS_ATTR_CREATE_SRQ_RESP_MAX_WR, + UVERBS_ATTR_CREATE_SRQ_RESP_MAX_SGE, + UVERBS_ATTR_CREATE_SRQ_RESP_SRQ_NUM, +}; + +enum uverbs_attrs_destroy_srq_cmd_attr_ids { + UVERBS_ATTR_DESTROY_SRQ_HANDLE, + UVERBS_ATTR_DESTROY_SRQ_RESP, +}; + +enum uverbs_methods_srq { + UVERBS_METHOD_SRQ_CREATE, + UVERBS_METHOD_SRQ_DESTROY, +}; + enum uverbs_methods_cq { UVERBS_METHOD_CQ_CREATE, UVERBS_METHOD_CQ_DESTROY, From ef3bc084a8ed461e3d1f82481f47dacb96596f8f Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 19 May 2020 10:27:10 +0300 Subject: [PATCH 449/562] IB/uverbs: Introduce create/destroy WQ commands over ioctl Introduce create/destroy WQ commands over the ioctl interface to let it be extended to get an asynchronous event FD. Link: https://lore.kernel.org/r/20200519072711.257271-7-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/Makefile | 3 +- drivers/infiniband/core/rdma_core.h | 1 + drivers/infiniband/core/uverbs_std_types.c | 23 --- drivers/infiniband/core/uverbs_std_types_wq.c | 194 ++++++++++++++++++ drivers/infiniband/core/uverbs_uapi.c | 1 + include/uapi/rdma/ib_user_ioctl_cmds.h | 25 +++ 6 files changed, 223 insertions(+), 24 deletions(-) create mode 100644 drivers/infiniband/core/uverbs_std_types_wq.c diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index d7b46a7c07fd..96c0a4b5af18 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -37,6 +37,7 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ uverbs_std_types_mr.o uverbs_std_types_counters.o \ uverbs_uapi.o uverbs_std_types_device.o \ uverbs_std_types_async_fd.o \ - uverbs_std_types_srq.o + uverbs_std_types_srq.o \ + uverbs_std_types_wq.o ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index d623f911b70b..9e9f2fa04fb9 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -160,6 +160,7 @@ extern const struct uapi_definition uverbs_def_obj_flow_action[]; extern const struct uapi_definition uverbs_def_obj_intf[]; extern const struct uapi_definition uverbs_def_obj_mr[]; extern const struct uapi_definition uverbs_def_obj_srq[]; +extern const struct uapi_definition uverbs_def_obj_wq[]; extern const struct uapi_definition uverbs_def_write_intf[]; static inline const struct uverbs_api_write_method * diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index d9b6912eafa8..c328d5194076 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -125,23 +125,6 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, return ret; } -static int uverbs_free_wq(struct ib_uobject *uobject, - enum rdma_remove_reason why, - struct uverbs_attr_bundle *attrs) -{ - struct ib_wq *wq = uobject->object; - struct ib_uwq_object *uwq = - container_of(uobject, struct ib_uwq_object, uevent.uobject); - int ret; - - ret = ib_destroy_wq(wq, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) - return ret; - - ib_uverbs_release_uevent(&uwq->uevent); - return ret; -} - static int uverbs_free_xrcd(struct ib_uobject *uobject, enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs) @@ -266,10 +249,6 @@ DECLARE_UVERBS_NAMED_OBJECT( uverbs_free_flow), &UVERBS_METHOD(UVERBS_METHOD_FLOW_DESTROY)); -DECLARE_UVERBS_NAMED_OBJECT( - UVERBS_OBJECT_WQ, - UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq)); - DECLARE_UVERBS_NAMED_METHOD_DESTROY( UVERBS_METHOD_RWQ_IND_TBL_DESTROY, UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_RWQ_IND_TBL_HANDLE, @@ -318,8 +297,6 @@ const struct uapi_definition uverbs_def_obj_intf[] = { UAPI_DEF_OBJ_NEEDS_FN(dealloc_mw)), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_FLOW, UAPI_DEF_OBJ_NEEDS_FN(destroy_flow)), - UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_WQ, - UAPI_DEF_OBJ_NEEDS_FN(destroy_wq)), UAPI_DEF_CHAIN_OBJ_TREE_NAMED( UVERBS_OBJECT_RWQ_IND_TBL, UAPI_DEF_OBJ_NEEDS_FN(destroy_rwq_ind_table)), diff --git a/drivers/infiniband/core/uverbs_std_types_wq.c b/drivers/infiniband/core/uverbs_std_types_wq.c new file mode 100644 index 000000000000..cad842ede077 --- /dev/null +++ b/drivers/infiniband/core/uverbs_std_types_wq.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. + */ + +#include +#include "rdma_core.h" +#include "uverbs.h" + +static int uverbs_free_wq(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct ib_wq *wq = uobject->object; + struct ib_uwq_object *uwq = + container_of(uobject, struct ib_uwq_object, uevent.uobject); + int ret; + + ret = ib_destroy_wq(wq, &attrs->driver_udata); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + ib_uverbs_release_uevent(&uwq->uevent); + return ret; +} + +static int UVERBS_HANDLER(UVERBS_METHOD_WQ_CREATE)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uwq_object *obj = container_of( + uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_WQ_HANDLE), + typeof(*obj), uevent.uobject); + struct ib_pd *pd = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_CREATE_WQ_PD_HANDLE); + struct ib_cq *cq = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_CREATE_WQ_CQ_HANDLE); + struct ib_wq_init_attr wq_init_attr = {}; + struct ib_wq *wq; + u64 user_handle; + int ret; + + ret = uverbs_get_flags32(&wq_init_attr.create_flags, attrs, + UVERBS_ATTR_CREATE_WQ_FLAGS, + IB_UVERBS_WQ_FLAGS_CVLAN_STRIPPING | + IB_UVERBS_WQ_FLAGS_SCATTER_FCS | + IB_UVERBS_WQ_FLAGS_DELAY_DROP | + IB_UVERBS_WQ_FLAGS_PCI_WRITE_END_PADDING); + if (!ret) + ret = uverbs_copy_from(&wq_init_attr.max_sge, attrs, + UVERBS_ATTR_CREATE_WQ_MAX_SGE); + if (!ret) + ret = uverbs_copy_from(&wq_init_attr.max_wr, attrs, + UVERBS_ATTR_CREATE_WQ_MAX_WR); + if (!ret) + ret = uverbs_copy_from(&user_handle, attrs, + UVERBS_ATTR_CREATE_WQ_USER_HANDLE); + if (!ret) + ret = uverbs_get_const(&wq_init_attr.wq_type, attrs, + UVERBS_ATTR_CREATE_WQ_TYPE); + if (ret) + return ret; + + if (wq_init_attr.wq_type != IB_WQT_RQ) + return -EINVAL; + + obj->uevent.event_file = ib_uverbs_get_async_event(attrs, + UVERBS_ATTR_CREATE_WQ_EVENT_FD); + obj->uevent.uobject.user_handle = user_handle; + INIT_LIST_HEAD(&obj->uevent.event_list); + wq_init_attr.event_handler = ib_uverbs_wq_event_handler; + wq_init_attr.wq_context = attrs->ufile; + wq_init_attr.cq = cq; + + wq = pd->device->ops.create_wq(pd, &wq_init_attr, &attrs->driver_udata); + if (IS_ERR(wq)) { + ret = PTR_ERR(wq); + goto err; + } + + obj->uevent.uobject.object = wq; + wq->wq_type = wq_init_attr.wq_type; + wq->cq = cq; + wq->pd = pd; + wq->device = pd->device; + wq->wq_context = wq_init_attr.wq_context; + atomic_set(&wq->usecnt, 0); + atomic_inc(&pd->usecnt); + atomic_inc(&cq->usecnt); + wq->uobject = obj; + uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_WQ_HANDLE); + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_WQ_RESP_MAX_WR, + &wq_init_attr.max_wr, + sizeof(wq_init_attr.max_wr)); + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_WQ_RESP_MAX_SGE, + &wq_init_attr.max_sge, + sizeof(wq_init_attr.max_sge)); + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_WQ_RESP_WQ_NUM, + &wq->wq_num, + sizeof(wq->wq_num)); + return ret; + +err: + if (obj->uevent.event_file) + uverbs_uobject_put(&obj->uevent.event_file->uobj); + return ret; +}; + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_WQ_CREATE, + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_WQ_HANDLE, + UVERBS_OBJECT_WQ, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_WQ_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_CONST_IN(UVERBS_ATTR_CREATE_WQ_TYPE, + enum ib_wq_type, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_WQ_USER_HANDLE, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_WQ_MAX_WR, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_WQ_MAX_SGE, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_WQ_FLAGS, + enum ib_uverbs_wq_flags, + UA_MANDATORY), + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_WQ_CQ_HANDLE, + UVERBS_OBJECT_CQ, + UVERBS_ACCESS_READ, + UA_OPTIONAL), + UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_WQ_EVENT_FD, + UVERBS_OBJECT_ASYNC_EVENT, + UVERBS_ACCESS_READ, + UA_OPTIONAL), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_WQ_RESP_MAX_WR, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_WQ_RESP_MAX_SGE, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_WQ_RESP_WQ_NUM, + UVERBS_ATTR_TYPE(u32), + UA_OPTIONAL), + UVERBS_ATTR_UHW()); + +static int UVERBS_HANDLER(UVERBS_METHOD_WQ_DESTROY)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = + uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_WQ_HANDLE); + struct ib_uwq_object *obj = + container_of(uobj, struct ib_uwq_object, uevent.uobject); + + return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_WQ_RESP, + &obj->uevent.events_reported, + sizeof(obj->uevent.events_reported)); +} + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_WQ_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_WQ_HANDLE, + UVERBS_OBJECT_WQ, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_WQ_RESP, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_WQ, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq), + &UVERBS_METHOD(UVERBS_METHOD_WQ_CREATE), + &UVERBS_METHOD(UVERBS_METHOD_WQ_DESTROY) +); + +const struct uapi_definition uverbs_def_obj_wq[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_WQ, + UAPI_DEF_OBJ_NEEDS_FN(destroy_wq)), + {} +}; diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 3f5627954fe7..0ec8cf86ecfa 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -635,6 +635,7 @@ static const struct uapi_definition uverbs_core_api[] = { UAPI_DEF_CHAIN(uverbs_def_obj_intf), UAPI_DEF_CHAIN(uverbs_def_obj_mr), UAPI_DEF_CHAIN(uverbs_def_obj_srq), + UAPI_DEF_CHAIN(uverbs_def_obj_wq), UAPI_DEF_CHAIN(uverbs_def_write_intf), {}, }; diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index c07af46ff04c..381b17889d20 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -153,6 +153,31 @@ enum uverbs_methods_cq { UVERBS_METHOD_CQ_DESTROY, }; +enum uverbs_attrs_create_wq_cmd_attr_ids { + UVERBS_ATTR_CREATE_WQ_HANDLE, + UVERBS_ATTR_CREATE_WQ_PD_HANDLE, + UVERBS_ATTR_CREATE_WQ_CQ_HANDLE, + UVERBS_ATTR_CREATE_WQ_USER_HANDLE, + UVERBS_ATTR_CREATE_WQ_TYPE, + UVERBS_ATTR_CREATE_WQ_EVENT_FD, + UVERBS_ATTR_CREATE_WQ_MAX_WR, + UVERBS_ATTR_CREATE_WQ_MAX_SGE, + UVERBS_ATTR_CREATE_WQ_FLAGS, + UVERBS_ATTR_CREATE_WQ_RESP_MAX_WR, + UVERBS_ATTR_CREATE_WQ_RESP_MAX_SGE, + UVERBS_ATTR_CREATE_WQ_RESP_WQ_NUM, +}; + +enum uverbs_attrs_destroy_wq_cmd_attr_ids { + UVERBS_ATTR_DESTROY_WQ_HANDLE, + UVERBS_ATTR_DESTROY_WQ_RESP, +}; + +enum uverbs_methods_wq { + UVERBS_METHOD_WQ_CREATE, + UVERBS_METHOD_WQ_DESTROY, +}; + enum uverbs_methods_actions_flow_action_ops { UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, UVERBS_METHOD_FLOW_ACTION_DESTROY, From 6d1e7ba241e990b5c6ba7fdaa03d466f852f3c9e Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 19 May 2020 10:27:11 +0300 Subject: [PATCH 450/562] IB/uverbs: Introduce create/destroy QP commands over ioctl Introduce create/destroy QP commands over the ioctl interface to let it be extended to get an asynchronous event FD. Link: https://lore.kernel.org/r/20200519072711.257271-8-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/Makefile | 3 +- drivers/infiniband/core/rdma_core.h | 1 + drivers/infiniband/core/uverbs_std_types.c | 40 -- drivers/infiniband/core/uverbs_std_types_qp.c | 401 ++++++++++++++++++ drivers/infiniband/core/uverbs_uapi.c | 1 + include/uapi/rdma/ib_user_ioctl_cmds.h | 28 ++ include/uapi/rdma/ib_user_ioctl_verbs.h | 9 + 7 files changed, 442 insertions(+), 41 deletions(-) create mode 100644 drivers/infiniband/core/uverbs_std_types_qp.c diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 96c0a4b5af18..63c1591223ac 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -38,6 +38,7 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ uverbs_uapi.o uverbs_std_types_device.o \ uverbs_std_types_async_fd.o \ uverbs_std_types_srq.o \ - uverbs_std_types_wq.o + uverbs_std_types_wq.o \ + uverbs_std_types_qp.o ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 9e9f2fa04fb9..33706dad6c0f 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -159,6 +159,7 @@ extern const struct uapi_definition uverbs_def_obj_dm[]; extern const struct uapi_definition uverbs_def_obj_flow_action[]; extern const struct uapi_definition uverbs_def_obj_intf[]; extern const struct uapi_definition uverbs_def_obj_mr[]; +extern const struct uapi_definition uverbs_def_obj_qp[]; extern const struct uapi_definition uverbs_def_obj_srq[]; extern const struct uapi_definition uverbs_def_obj_wq[]; extern const struct uapi_definition uverbs_def_write_intf[]; diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index c328d5194076..08c39cfb1bd9 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -75,40 +75,6 @@ static int uverbs_free_mw(struct ib_uobject *uobject, return uverbs_dealloc_mw((struct ib_mw *)uobject->object); } -static int uverbs_free_qp(struct ib_uobject *uobject, - enum rdma_remove_reason why, - struct uverbs_attr_bundle *attrs) -{ - struct ib_qp *qp = uobject->object; - struct ib_uqp_object *uqp = - container_of(uobject, struct ib_uqp_object, uevent.uobject); - int ret; - - /* - * If this is a user triggered destroy then do not allow destruction - * until the user cleans up all the mcast bindings. Unlike in other - * places we forcibly clean up the mcast attachments for !DESTROY - * because the mcast attaches are not ubojects and will not be - * destroyed by anything else during cleanup processing. - */ - if (why == RDMA_REMOVE_DESTROY) { - if (!list_empty(&uqp->mcast_list)) - return -EBUSY; - } else if (qp == qp->real_qp) { - ib_uverbs_detach_umcast(qp, uqp); - } - - ret = ib_destroy_qp_user(qp, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) - return ret; - - if (uqp->uxrcd) - atomic_dec(&uqp->uxrcd->refcnt); - - ib_uverbs_release_uevent(&uqp->uevent); - return ret; -} - static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs) @@ -210,10 +176,6 @@ DECLARE_UVERBS_NAMED_OBJECT( "[infinibandevent]", O_RDONLY)); -DECLARE_UVERBS_NAMED_OBJECT( - UVERBS_OBJECT_QP, - UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp)); - DECLARE_UVERBS_NAMED_METHOD_DESTROY( UVERBS_METHOD_MW_DESTROY, UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MW_HANDLE, @@ -289,8 +251,6 @@ const struct uapi_definition uverbs_def_obj_intf[] = { UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COMP_CHANNEL, UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)), - UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_QP, - UAPI_DEF_OBJ_NEEDS_FN(destroy_qp)), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_AH, UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MW, diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c new file mode 100644 index 000000000000..3bf8dcdfe7eb --- /dev/null +++ b/drivers/infiniband/core/uverbs_std_types_qp.c @@ -0,0 +1,401 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. + */ + +#include +#include "rdma_core.h" +#include "uverbs.h" +#include "core_priv.h" + +static int uverbs_free_qp(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct ib_qp *qp = uobject->object; + struct ib_uqp_object *uqp = + container_of(uobject, struct ib_uqp_object, uevent.uobject); + int ret; + + /* + * If this is a user triggered destroy then do not allow destruction + * until the user cleans up all the mcast bindings. Unlike in other + * places we forcibly clean up the mcast attachments for !DESTROY + * because the mcast attaches are not ubojects and will not be + * destroyed by anything else during cleanup processing. + */ + if (why == RDMA_REMOVE_DESTROY) { + if (!list_empty(&uqp->mcast_list)) + return -EBUSY; + } else if (qp == qp->real_qp) { + ib_uverbs_detach_umcast(qp, uqp); + } + + ret = ib_destroy_qp_user(qp, &attrs->driver_udata); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + if (uqp->uxrcd) + atomic_dec(&uqp->uxrcd->refcnt); + + ib_uverbs_release_uevent(&uqp->uevent); + return ret; +} + +static int check_creation_flags(enum ib_qp_type qp_type, + u32 create_flags) +{ + create_flags &= ~IB_UVERBS_QP_CREATE_SQ_SIG_ALL; + + if (!create_flags || qp_type == IB_QPT_DRIVER) + return 0; + + if (qp_type != IB_QPT_RAW_PACKET && qp_type != IB_QPT_UD) + return -EINVAL; + + if ((create_flags & IB_UVERBS_QP_CREATE_SCATTER_FCS || + create_flags & IB_UVERBS_QP_CREATE_CVLAN_STRIPPING) && + qp_type != IB_QPT_RAW_PACKET) + return -EINVAL; + + return 0; +} + +static void set_caps(struct ib_qp_init_attr *attr, + struct ib_uverbs_qp_cap *cap, bool req) +{ + if (req) { + attr->cap.max_send_wr = cap->max_send_wr; + attr->cap.max_recv_wr = cap->max_recv_wr; + attr->cap.max_send_sge = cap->max_send_sge; + attr->cap.max_recv_sge = cap->max_recv_sge; + attr->cap.max_inline_data = cap->max_inline_data; + } else { + cap->max_send_wr = attr->cap.max_send_wr; + cap->max_recv_wr = attr->cap.max_recv_wr; + cap->max_send_sge = attr->cap.max_send_sge; + cap->max_recv_sge = attr->cap.max_recv_sge; + cap->max_inline_data = attr->cap.max_inline_data; + } +} + +static int UVERBS_HANDLER(UVERBS_METHOD_QP_CREATE)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uqp_object *obj = container_of( + uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_QP_HANDLE), + typeof(*obj), uevent.uobject); + struct ib_qp_init_attr attr = {}; + struct ib_uverbs_qp_cap cap = {}; + struct ib_rwq_ind_table *rwq_ind_tbl = NULL; + struct ib_qp *qp; + struct ib_pd *pd = NULL; + struct ib_srq *srq = NULL; + struct ib_cq *recv_cq = NULL; + struct ib_cq *send_cq = NULL; + struct ib_xrcd *xrcd = NULL; + struct ib_uobject *xrcd_uobj = NULL; + struct ib_device *device; + u64 user_handle; + int ret; + + ret = uverbs_copy_from_or_zero(&cap, attrs, + UVERBS_ATTR_CREATE_QP_CAP); + if (!ret) + ret = uverbs_copy_from(&user_handle, attrs, + UVERBS_ATTR_CREATE_QP_USER_HANDLE); + if (!ret) + ret = uverbs_get_const(&attr.qp_type, attrs, + UVERBS_ATTR_CREATE_QP_TYPE); + if (ret) + return ret; + + switch (attr.qp_type) { + case IB_QPT_XRC_TGT: + if (uverbs_attr_is_valid(attrs, + UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE) || + uverbs_attr_is_valid(attrs, + UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE) || + uverbs_attr_is_valid(attrs, + UVERBS_ATTR_CREATE_QP_PD_HANDLE) || + uverbs_attr_is_valid(attrs, + UVERBS_ATTR_CREATE_QP_IND_TABLE_HANDLE)) + return -EINVAL; + + xrcd_uobj = uverbs_attr_get_uobject(attrs, + UVERBS_ATTR_CREATE_QP_XRCD_HANDLE); + if (IS_ERR(xrcd_uobj)) + return PTR_ERR(xrcd_uobj); + + xrcd = (struct ib_xrcd *)xrcd_uobj->object; + if (!xrcd) + return -EINVAL; + device = xrcd->device; + break; + case IB_UVERBS_QPT_RAW_PACKET: + if (!capable(CAP_NET_RAW)) + return -EPERM; + fallthrough; + case IB_UVERBS_QPT_RC: + case IB_UVERBS_QPT_UC: + case IB_UVERBS_QPT_UD: + case IB_UVERBS_QPT_XRC_INI: + case IB_UVERBS_QPT_DRIVER: + if (uverbs_attr_is_valid(attrs, + UVERBS_ATTR_CREATE_QP_XRCD_HANDLE) || + (uverbs_attr_is_valid(attrs, + UVERBS_ATTR_CREATE_QP_SRQ_HANDLE) && + attr.qp_type == IB_QPT_XRC_INI)) + return -EINVAL; + + pd = uverbs_attr_get_obj(attrs, + UVERBS_ATTR_CREATE_QP_PD_HANDLE); + if (IS_ERR(pd)) + return PTR_ERR(pd); + + rwq_ind_tbl = uverbs_attr_get_obj(attrs, + UVERBS_ATTR_CREATE_QP_IND_TABLE_HANDLE); + if (!IS_ERR(rwq_ind_tbl)) { + if (cap.max_recv_wr || cap.max_recv_sge || + uverbs_attr_is_valid(attrs, + UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE) || + uverbs_attr_is_valid(attrs, + UVERBS_ATTR_CREATE_QP_SRQ_HANDLE)) + return -EINVAL; + + /* send_cq is optinal */ + if (cap.max_send_wr) { + send_cq = uverbs_attr_get_obj(attrs, + UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE); + if (IS_ERR(send_cq)) + return PTR_ERR(send_cq); + } + attr.rwq_ind_tbl = rwq_ind_tbl; + } else { + send_cq = uverbs_attr_get_obj(attrs, + UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE); + if (IS_ERR(send_cq)) + return PTR_ERR(send_cq); + + if (attr.qp_type != IB_QPT_XRC_INI) { + recv_cq = uverbs_attr_get_obj(attrs, + UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE); + if (IS_ERR(recv_cq)) + return PTR_ERR(recv_cq); + } + } + + device = pd->device; + break; + default: + return -EINVAL; + } + + ret = uverbs_get_flags32(&attr.create_flags, attrs, + UVERBS_ATTR_CREATE_QP_FLAGS, + IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | + IB_UVERBS_QP_CREATE_SCATTER_FCS | + IB_UVERBS_QP_CREATE_CVLAN_STRIPPING | + IB_UVERBS_QP_CREATE_PCI_WRITE_END_PADDING | + IB_UVERBS_QP_CREATE_SQ_SIG_ALL); + if (ret) + return ret; + + ret = check_creation_flags(attr.qp_type, attr.create_flags); + if (ret) + return ret; + + if (uverbs_attr_is_valid(attrs, + UVERBS_ATTR_CREATE_QP_SOURCE_QPN)) { + ret = uverbs_copy_from(&attr.source_qpn, attrs, + UVERBS_ATTR_CREATE_QP_SOURCE_QPN); + if (ret) + return ret; + attr.create_flags |= IB_QP_CREATE_SOURCE_QPN; + } + + srq = uverbs_attr_get_obj(attrs, + UVERBS_ATTR_CREATE_QP_SRQ_HANDLE); + if (!IS_ERR(srq)) { + if ((srq->srq_type == IB_SRQT_XRC && + attr.qp_type != IB_QPT_XRC_TGT) || + (srq->srq_type != IB_SRQT_XRC && + attr.qp_type == IB_QPT_XRC_TGT)) + return -EINVAL; + attr.srq = srq; + } + + obj->uevent.event_file = ib_uverbs_get_async_event(attrs, + UVERBS_ATTR_CREATE_QP_EVENT_FD); + INIT_LIST_HEAD(&obj->uevent.event_list); + INIT_LIST_HEAD(&obj->mcast_list); + obj->uevent.uobject.user_handle = user_handle; + attr.event_handler = ib_uverbs_qp_event_handler; + attr.send_cq = send_cq; + attr.recv_cq = recv_cq; + attr.xrcd = xrcd; + if (attr.create_flags & IB_UVERBS_QP_CREATE_SQ_SIG_ALL) { + /* This creation bit is uverbs one, need to mask before + * calling drivers. It was added to prevent an extra user attr + * only for that when using ioctl. + */ + attr.create_flags &= ~IB_UVERBS_QP_CREATE_SQ_SIG_ALL; + attr.sq_sig_type = IB_SIGNAL_ALL_WR; + } else { + attr.sq_sig_type = IB_SIGNAL_REQ_WR; + } + + set_caps(&attr, &cap, true); + mutex_init(&obj->mcast_lock); + + if (attr.qp_type == IB_QPT_XRC_TGT) + qp = ib_create_qp(pd, &attr); + else + qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, + obj); + + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto err_put; + } + + if (attr.qp_type != IB_QPT_XRC_TGT) { + atomic_inc(&pd->usecnt); + if (attr.send_cq) + atomic_inc(&attr.send_cq->usecnt); + if (attr.recv_cq) + atomic_inc(&attr.recv_cq->usecnt); + if (attr.srq) + atomic_inc(&attr.srq->usecnt); + if (attr.rwq_ind_tbl) + atomic_inc(&attr.rwq_ind_tbl->usecnt); + } else { + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, + uobject); + atomic_inc(&obj->uxrcd->refcnt); + /* It is done in _ib_create_qp for other QP types */ + qp->uobject = obj; + } + + obj->uevent.uobject.object = qp; + uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_QP_HANDLE); + + if (attr.qp_type != IB_QPT_XRC_TGT) { + ret = ib_create_qp_security(qp, device); + if (ret) + return ret; + } + + set_caps(&attr, &cap, false); + ret = uverbs_copy_to_struct_or_zero(attrs, + UVERBS_ATTR_CREATE_QP_RESP_CAP, &cap, + sizeof(cap)); + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_QP_RESP_QP_NUM, + &qp->qp_num, + sizeof(qp->qp_num)); + + return ret; +err_put: + if (obj->uevent.event_file) + uverbs_uobject_put(&obj->uevent.event_file->uobj); + return ret; +}; + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_QP_CREATE, + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_HANDLE, + UVERBS_OBJECT_QP, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_XRCD_HANDLE, + UVERBS_OBJECT_XRCD, + UVERBS_ACCESS_READ, + UA_OPTIONAL), + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_READ, + UA_OPTIONAL), + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_SRQ_HANDLE, + UVERBS_OBJECT_SRQ, + UVERBS_ACCESS_READ, + UA_OPTIONAL), + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE, + UVERBS_OBJECT_CQ, + UVERBS_ACCESS_READ, + UA_OPTIONAL), + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE, + UVERBS_OBJECT_CQ, + UVERBS_ACCESS_READ, + UA_OPTIONAL), + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_IND_TABLE_HANDLE, + UVERBS_OBJECT_RWQ_IND_TBL, + UVERBS_ACCESS_READ, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_QP_USER_HANDLE, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_QP_CAP, + UVERBS_ATTR_STRUCT(struct ib_uverbs_qp_cap, + max_inline_data), + UA_MANDATORY), + UVERBS_ATTR_CONST_IN(UVERBS_ATTR_CREATE_QP_TYPE, + enum ib_uverbs_qp_type, + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_QP_FLAGS, + enum ib_uverbs_qp_create_flags, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_QP_SOURCE_QPN, + UVERBS_ATTR_TYPE(u32), + UA_OPTIONAL), + UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_QP_EVENT_FD, + UVERBS_OBJECT_ASYNC_EVENT, + UVERBS_ACCESS_READ, + UA_OPTIONAL), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_QP_RESP_CAP, + UVERBS_ATTR_STRUCT(struct ib_uverbs_qp_cap, + max_inline_data), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_QP_RESP_QP_NUM, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_UHW()); + +static int UVERBS_HANDLER(UVERBS_METHOD_QP_DESTROY)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = + uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_QP_HANDLE); + struct ib_uqp_object *obj = + container_of(uobj, struct ib_uqp_object, uevent.uobject); + struct ib_uverbs_destroy_qp_resp resp = { + .events_reported = obj->uevent.events_reported + }; + + return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_QP_RESP, &resp, + sizeof(resp)); +} + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_QP_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_QP_HANDLE, + UVERBS_OBJECT_QP, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_QP_RESP, + UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_qp_resp), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_QP, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp), + &UVERBS_METHOD(UVERBS_METHOD_QP_CREATE), + &UVERBS_METHOD(UVERBS_METHOD_QP_DESTROY)); + +const struct uapi_definition uverbs_def_obj_qp[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_QP, + UAPI_DEF_OBJ_NEEDS_FN(destroy_qp)), + {} +}; diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 0ec8cf86ecfa..5addc8fae3f3 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -634,6 +634,7 @@ static const struct uapi_definition uverbs_core_api[] = { UAPI_DEF_CHAIN(uverbs_def_obj_flow_action), UAPI_DEF_CHAIN(uverbs_def_obj_intf), UAPI_DEF_CHAIN(uverbs_def_obj_mr), + UAPI_DEF_CHAIN(uverbs_def_obj_qp), UAPI_DEF_CHAIN(uverbs_def_obj_srq), UAPI_DEF_CHAIN(uverbs_def_obj_wq), UAPI_DEF_CHAIN(uverbs_def_write_intf), diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index 381b17889d20..4961d5e858eb 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -121,6 +121,34 @@ enum uverbs_attrs_destroy_flow_action_esp { UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, }; +enum uverbs_attrs_create_qp_cmd_attr_ids { + UVERBS_ATTR_CREATE_QP_HANDLE, + UVERBS_ATTR_CREATE_QP_XRCD_HANDLE, + UVERBS_ATTR_CREATE_QP_PD_HANDLE, + UVERBS_ATTR_CREATE_QP_SRQ_HANDLE, + UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE, + UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE, + UVERBS_ATTR_CREATE_QP_IND_TABLE_HANDLE, + UVERBS_ATTR_CREATE_QP_USER_HANDLE, + UVERBS_ATTR_CREATE_QP_CAP, + UVERBS_ATTR_CREATE_QP_TYPE, + UVERBS_ATTR_CREATE_QP_FLAGS, + UVERBS_ATTR_CREATE_QP_SOURCE_QPN, + UVERBS_ATTR_CREATE_QP_EVENT_FD, + UVERBS_ATTR_CREATE_QP_RESP_CAP, + UVERBS_ATTR_CREATE_QP_RESP_QP_NUM, +}; + +enum uverbs_attrs_destroy_qp_cmd_attr_ids { + UVERBS_ATTR_DESTROY_QP_HANDLE, + UVERBS_ATTR_DESTROY_QP_RESP, +}; + +enum uverbs_methods_qp { + UVERBS_METHOD_QP_CREATE, + UVERBS_METHOD_QP_DESTROY, +}; + enum uverbs_attrs_create_srq_cmd_attr_ids { UVERBS_ATTR_CREATE_SRQ_HANDLE, UVERBS_ATTR_CREATE_SRQ_PD_HANDLE, diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index b1662dfe86a6..5debab45ebcb 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -96,6 +96,7 @@ enum ib_uverbs_qp_create_flags { IB_UVERBS_QP_CREATE_SCATTER_FCS = 1 << 8, IB_UVERBS_QP_CREATE_CVLAN_STRIPPING = 1 << 9, IB_UVERBS_QP_CREATE_PCI_WRITE_END_PADDING = 1 << 11, + IB_UVERBS_QP_CREATE_SQ_SIG_ALL = 1 << 12, }; enum ib_uverbs_query_port_cap_flags { @@ -219,6 +220,14 @@ struct ib_uverbs_query_port_resp_ex { __u8 reserved[6]; }; +struct ib_uverbs_qp_cap { + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; +}; + enum rdma_driver_id { RDMA_DRIVER_UNKNOWN, RDMA_DRIVER_MLX5, From 47393fb57ba7b914c869f70010326c8b8940c3a0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 19 May 2020 15:03:47 +0300 Subject: [PATCH 451/562] block/rnbd: Fix an IS_ERR() vs NULL check in find_or_create_sess() The alloc_sess() function returns error pointers, it never returns NULL. Fixes: f7a7a5c228d4 ("block/rnbd: client: main functionality") Link: https://lore.kernel.org/r/20200519120347.GD42765@mwanda Signed-off-by: Dan Carpenter Reviewed-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/block/rnbd/rnbd-clt.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 450a571e6a1e..cc6a4e2587ae 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -923,13 +923,12 @@ rnbd_clt_session *find_or_create_sess(const char *sessname, bool *first) sess = __find_and_get_sess(sessname); if (!sess) { sess = alloc_sess(sessname); - if (sess) { - list_add(&sess->list, &sess_list); - *first = true; - } else { + if (IS_ERR(sess)) { mutex_unlock(&sess_lock); - return ERR_PTR(-ENOMEM); + return sess; } + list_add(&sess->list, &sess_list); + *first = true; } else *first = false; mutex_unlock(&sess_lock); From 47a3734b03904bbccbba0c8f78cf2b0f0aa157e7 Mon Sep 17 00:00:00 2001 From: Rodrigo Alencar <455.rodrigo.alencar@gmail.com> Date: Thu, 14 May 2020 10:00:12 -0300 Subject: [PATCH 452/562] gpio: max730x: bring gpiochip_add_data after port config gpiochip_add_data being called before might cause premature calls of the gpiochip operations before the port_config values are initialized, which would possibily write zeros to port configuration registers, an operation not allowed. For example, if there are gpio-hog nodes in a device-tree, the sequence of function calls are performed gpiochip_add_data of_gpiochip_add of_gpiochip_scan_gpios of_gpiochip_add_hog gpiod_hog gpiochip_request_own_desc gpiod_configure_flags gpiod_direction_output/gpiod_direction_input which would call later the gpiochip operation direction_output or direction_input prior the port_config[] initialization. Moreover, gpiochip_get_data is replaced by the container_of macro inside the gpiochip operations, which would allow the calling of max7301_direction_input prior to gpiochip_add_data Signed-off-by: Rodrigo Alencar <455.rodrigo.alencar@gmail.com> [Bartosz: tweaked the commit message] Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-max730x.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/gpio/gpio-max730x.c b/drivers/gpio/gpio-max730x.c index 1e1935c51096..b8c1fe20f49a 100644 --- a/drivers/gpio/gpio-max730x.c +++ b/drivers/gpio/gpio-max730x.c @@ -47,7 +47,7 @@ static int max7301_direction_input(struct gpio_chip *chip, unsigned offset) { - struct max7301 *ts = gpiochip_get_data(chip); + struct max7301 *ts = container_of(chip, struct max7301, chip); u8 *config; u8 offset_bits, pin_config; int ret; @@ -89,7 +89,7 @@ static int __max7301_set(struct max7301 *ts, unsigned offset, int value) static int max7301_direction_output(struct gpio_chip *chip, unsigned offset, int value) { - struct max7301 *ts = gpiochip_get_data(chip); + struct max7301 *ts = container_of(chip, struct max7301, chip); u8 *config; u8 offset_bits; int ret; @@ -189,10 +189,6 @@ int __max730x_probe(struct max7301 *ts) ts->chip.parent = dev; ts->chip.owner = THIS_MODULE; - ret = gpiochip_add_data(&ts->chip, ts); - if (ret) - goto exit_destroy; - /* * initialize pullups according to platform data and cache the * register values for later use. @@ -214,7 +210,9 @@ int __max730x_probe(struct max7301 *ts) } } - return ret; + ret = gpiochip_add_data(&ts->chip, ts); + if (!ret) + return ret; exit_destroy: mutex_destroy(&ts->lock); From 63a3345c2d42a9b29e1ce2d3a4043689b3995cea Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 21 May 2020 10:26:50 +0300 Subject: [PATCH 453/562] IB/cma: Fix ports memory leak in cma_configfs The allocated ports structure in never freed. The free function should be called by release_cma_ports_group, but the group is never released since we don't remove its default group. Remove default groups when device group is deleted. Fixes: 045959db65c6 ("IB/cma: Add configfs for rdma_cm") Link: https://lore.kernel.org/r/20200521072650.567908-1-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma_configfs.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index c672a4978bfd..3c1e2ca564fe 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -322,8 +322,21 @@ static struct config_group *make_cma_dev(struct config_group *group, return ERR_PTR(err); } +static void drop_cma_dev(struct config_group *cgroup, struct config_item *item) +{ + struct config_group *group = + container_of(item, struct config_group, cg_item); + struct cma_dev_group *cma_dev_group = + container_of(group, struct cma_dev_group, device_group); + + configfs_remove_default_groups(&cma_dev_group->ports_group); + configfs_remove_default_groups(&cma_dev_group->device_group); + config_item_put(item); +} + static struct configfs_group_operations cma_subsys_group_ops = { .make_group = make_cma_dev, + .drop_item = drop_cma_dev, }; static const struct config_item_type cma_subsys_type = { From cdb685cb9158fa67f6f4584ea39279ed7ae39253 Mon Sep 17 00:00:00 2001 From: Danil Kipnis Date: Thu, 21 May 2020 20:59:09 +0200 Subject: [PATCH 454/562] RDMA/rnbd: Fix compilation error when CONFIG_MODULES is disabled module_is_live function is only defined when CONFIG_MODULES is enabled. Use try_module_get instead to check whether the module is being removed. When module unload and manuall unmapping is happening in parallel, we can try removing the symlink twice: rnbd_client_exit vs. rnbd_clt_unmap_dev_store. This is probably not the best way to deal with this race in general, but for now this fixes the compilation issue when CONFIG_MODULES is disabled and has no functional impact. Regression tests passed. Fixes: 1eb54f8f5dd8 ("block/rnbd: client: sysfs interface functions") Link: https://lore.kernel.org/r/20200521185909.457245-1-danil.kipnis@cloud.ionos.com Reported-by: Randy Dunlap Suggested-by: Guoqing Jiang Signed-off-by: Danil Kipnis Acked-by: Randy Dunlap Signed-off-by: Jason Gunthorpe --- drivers/block/rnbd/rnbd-clt-sysfs.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c index a4508fcc7ffe..4f4474eecadb 100644 --- a/drivers/block/rnbd/rnbd-clt-sysfs.c +++ b/drivers/block/rnbd/rnbd-clt-sysfs.c @@ -428,12 +428,15 @@ static struct attribute *rnbd_dev_attrs[] = { void rnbd_clt_remove_dev_symlink(struct rnbd_clt_dev *dev) { /* - * The module_is_live() check is crucial and helps to avoid annoying - * sysfs warning raised in sysfs_remove_link(), when the whole sysfs - * path was just removed, see rnbd_close_sessions(). + * The module unload rnbd_client_exit path is racing with unmapping of + * the last single device from the sysfs manually + * i.e. rnbd_clt_unmap_dev_store() leading to a sysfs warning because + * of sysfs link already was removed already. */ - if (strlen(dev->blk_symlink_name) && module_is_live(THIS_MODULE)) + if (strlen(dev->blk_symlink_name) && try_module_get(THIS_MODULE)) { sysfs_remove_link(rnbd_devs_kobj, dev->blk_symlink_name); + module_put(THIS_MODULE); + } } static struct kobj_type rnbd_dev_ktype = { From e172037be757dc7ab6ee67932c6663a2ff8cfd27 Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Fri, 22 May 2020 08:28:33 +0000 Subject: [PATCH 455/562] RDMA/rtrs: server: Use already dereferenced rtrs_sess structure The rtrs_sess structure has already been extracted above from the rtrs_srv_sess structure. Use that to avoid redundant dereferencing. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Link: https://lore.kernel.org/r/20200522082833.1480551-1-haris.phnx@gmail.com Signed-off-by: Md Haris Iqbal Acked-by: Danil Kipnis Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 1fc6ece036ff..5ef8988ee75b 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1822,13 +1822,13 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, /* * Sanity checks */ - if (con_num != sess->s.con_num || cid >= sess->s.con_num) { + if (con_num != s->con_num || cid >= s->con_num) { rtrs_err(s, "Incorrect request: %d, %d\n", cid, con_num); mutex_unlock(&srv->paths_mutex); goto reject_w_econnreset; } - if (sess->s.con[cid]) { + if (s->con[cid]) { rtrs_err(s, "Connection already exists: %d\n", cid); mutex_unlock(&srv->paths_mutex); From a94dae867c5663f36c950b82832e146a6c2f0e42 Mon Sep 17 00:00:00 2001 From: Danil Kipnis Date: Fri, 22 May 2020 07:39:24 +0200 Subject: [PATCH 456/562] RDMA/rtrs: Get rid of the do_next_path while_next_path macros The macros do_each_path/while_each_path lead to a smatch warning: drivers/infiniband/ulp/rtrs/rtrs-clt.c:1196 rtrs_clt_failover_req() warn: inconsistent indenting drivers/infiniband/ulp/rtrs/rtrs-clt.c:2890 rtrs_clt_request() warn: inconsistent indenting Also checkpatch complains: ERROR: Macros with multiple statements should be enclosed in a do - while loop The macros are used only in two places: for a normal IO path and for the failover path triggered after errors. Get rid of the macros and just use a for loop iterating over the list of paths in both places. It is easier to read and also less lines of code. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Link: https://lore.kernel.org/r/20200522053924.528980-1-danil.kipnis@cloud.ionos.com Reported-by: kbuild test robot Signed-off-by: Danil Kipnis Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 29 ++++++++++++-------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 0ab7e5e912c0..564388a85603 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -726,18 +726,6 @@ struct path_it { struct rtrs_clt_sess *(*next_path)(struct path_it *it); }; -#define do_each_path(path, clt, it) { \ - path_it_init(it, clt); \ - rcu_read_lock(); \ - for ((it)->i = 0; ((path) = ((it)->next_path)(it)) && \ - (it)->i < (it)->clt->paths_num; \ - (it)->i++) - -#define while_each_path(it) \ - path_it_deinit(it); \ - rcu_read_unlock(); \ - } - /** * list_next_or_null_rr_rcu - get next list element in round-robin fashion. * @head: the head for the list. @@ -1175,7 +1163,10 @@ static int rtrs_clt_failover_req(struct rtrs_clt *clt, int err = -ECONNABORTED; struct path_it it; - do_each_path(alive_sess, clt, &it) { + rcu_read_lock(); + for (path_it_init(&it, clt); + (alive_sess = it.next_path(&it)) && it.i < it.clt->paths_num; + it.i++) { if (unlikely(READ_ONCE(alive_sess->state) != RTRS_CLT_CONNECTED)) continue; @@ -1191,7 +1182,9 @@ static int rtrs_clt_failover_req(struct rtrs_clt *clt, /* Success path */ rtrs_clt_inc_failover_cnt(alive_sess->stats); break; - } while_each_path(&it); + } + path_it_deinit(&it); + rcu_read_unlock(); return err; } @@ -2862,7 +2855,9 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops, dma_dir = DMA_TO_DEVICE; } - do_each_path(sess, clt, &it) { + rcu_read_lock(); + for (path_it_init(&it, clt); + (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) { if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)) continue; @@ -2887,7 +2882,9 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops, } /* Success path */ break; - } while_each_path(&it); + } + path_it_deinit(&it); + rcu_read_unlock(); return err; } From 494a94e38dcf62543a32a4424d646ff80b4b28bd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 19 May 2020 16:12:30 +0300 Subject: [PATCH 457/562] gpio: dwapb: Call acpi_gpiochip_free_interrupts() on GPIO chip de-registration Add missed acpi_gpiochip_free_interrupts() call when unregistering ports. While at it, drop extra check to call acpi_gpiochip_request_interrupts(). There is no need to have an additional check to call acpi_gpiochip_request_interrupts(). Even without any interrupts available the registered ACPI Event handlers can be useful for debugging purposes. Fixes: e6cb3486f5a1 ("gpio: dwapb: add gpio-signaled acpi event support") Signed-off-by: Andy Shevchenko Tested-by: Serge Semin Acked-by: Serge Semin Link: https://lore.kernel.org/r/20200519131233.59032-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index 8639c4a7f469..e5d844304f8d 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -505,26 +505,33 @@ static int dwapb_gpio_add_port(struct dwapb_gpio *gpio, dwapb_configure_irqs(gpio, port, pp); err = gpiochip_add_data(&port->gc, port); - if (err) + if (err) { dev_err(gpio->dev, "failed to register gpiochip for port%d\n", port->idx); - else - port->is_registered = true; + return err; + } /* Add GPIO-signaled ACPI event support */ - if (pp->has_irq) - acpi_gpiochip_request_interrupts(&port->gc); + acpi_gpiochip_request_interrupts(&port->gc); - return err; + port->is_registered = true; + + return 0; } static void dwapb_gpio_unregister(struct dwapb_gpio *gpio) { unsigned int m; - for (m = 0; m < gpio->nr_ports; ++m) - if (gpio->ports[m].is_registered) - gpiochip_remove(&gpio->ports[m].gc); + for (m = 0; m < gpio->nr_ports; ++m) { + struct dwapb_gpio_port *port = &gpio->ports[m]; + + if (!port->is_registered) + continue; + + acpi_gpiochip_free_interrupts(&port->gc); + gpiochip_remove(&port->gc); + } } static void dwapb_get_irq(struct device *dev, struct fwnode_handle *fwnode, From d7cc23604f367c0d0f272f0af55e2032de50a053 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 19 May 2020 16:12:31 +0300 Subject: [PATCH 458/562] gpio: dwapb: avoid error message for optional IRQ platform_get_irq() will generate an error message if the requested IRQ is not present. Use platform_get_irq_optional() to avoid the error message being generated. Signed-off-by: Andy Shevchenko Tested-by: Serge Semin Reviewed-by: Serge Semin Acked-by: Serge Semin Link: https://lore.kernel.org/r/20200519131233.59032-2-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index e5d844304f8d..944dae80d687 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -549,7 +549,7 @@ static void dwapb_get_irq(struct device *dev, struct fwnode_handle *fwnode, if (np) pp->irq[j] = of_irq_get(np, j); else if (has_acpi_companion(dev)) - pp->irq[j] = platform_get_irq(to_platform_device(dev), j); + pp->irq[j] = platform_get_irq_optional(to_platform_device(dev), j); if (pp->irq[j] >= 0) pp->has_irq = true; From aa90939d22973765cc6318c6748c272b954e202f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 19 May 2020 16:12:32 +0300 Subject: [PATCH 459/562] gpio: dwapb: Don't use IRQ 0 as valid Linux interrupt IRQ 0 is not valid in Linux interrupt number space. Refactor the code with this kept in mind. Signed-off-by: Andy Shevchenko Tested-by: Serge Semin Acked-by: Serge Semin Link: https://lore.kernel.org/r/20200519131233.59032-3-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index 944dae80d687..d3765672c42b 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -417,7 +417,7 @@ static void dwapb_configure_irqs(struct dwapb_gpio *gpio, int i; for (i = 0; i < pp->ngpio; i++) { - if (pp->irq[i] >= 0) + if (pp->irq[i]) irq_set_chained_handler_and_data(pp->irq[i], dwapb_irq_handler, gpio); } @@ -538,20 +538,20 @@ static void dwapb_get_irq(struct device *dev, struct fwnode_handle *fwnode, struct dwapb_port_property *pp) { struct device_node *np = NULL; - int j; + int irq = -ENXIO, j; if (fwnode_property_read_bool(fwnode, "interrupt-controller")) np = to_of_node(fwnode); for (j = 0; j < pp->ngpio; j++) { - pp->irq[j] = -ENXIO; - if (np) - pp->irq[j] = of_irq_get(np, j); + irq = of_irq_get(np, j); else if (has_acpi_companion(dev)) - pp->irq[j] = platform_get_irq_optional(to_platform_device(dev), j); + irq = platform_get_irq_optional(to_platform_device(dev), j); + if (irq > 0) + pp->irq[j] = irq; - if (pp->irq[j] >= 0) + if (pp->irq[j]) pp->has_irq = true; } From 551cb86cbb7d85a3d16d10f6fb4fb954f13a7556 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 19 May 2020 16:12:33 +0300 Subject: [PATCH 460/562] gpio: dwapb: Remove unneeded has_irq member in struct dwapb_port_property has_irq member of struct dwapb_port_property is used only in one place, so, make it local test instead and remove from the structure. This local test is using memchr_inv() which is quite efficient in comparison to the original loop and possible little overhead can be neglected. Signed-off-by: Andy Shevchenko Tested-by: Serge Semin Acked-by: Lee Jones Acked-by: Serge Semin Link: https://lore.kernel.org/r/20200519131233.59032-4-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 14 +++++++------- drivers/mfd/intel_quark_i2c_gpio.c | 1 - include/linux/platform_data/gpio-dwapb.h | 1 - 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index d3765672c42b..1d8d55bd63aa 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -366,6 +366,11 @@ static void dwapb_configure_irqs(struct dwapb_gpio *gpio, irq_hw_number_t hwirq; int err, i; + if (memchr_inv(pp->irq, 0, sizeof(pp->irq)) == NULL) { + dev_warn(gpio->dev, "no IRQ for port%d\n", pp->idx); + return; + } + gpio->domain = irq_domain_create_linear(fwnode, ngpio, &irq_generic_chip_ops, gpio); if (!gpio->domain) @@ -501,7 +506,8 @@ static int dwapb_gpio_add_port(struct dwapb_gpio *gpio, if (pp->idx == 0) port->gc.set_config = dwapb_gpio_set_config; - if (pp->has_irq) + /* Only port A can provide interrupts in all configurations of the IP */ + if (pp->idx == 0) dwapb_configure_irqs(gpio, port, pp); err = gpiochip_add_data(&port->gc, port); @@ -550,13 +556,7 @@ static void dwapb_get_irq(struct device *dev, struct fwnode_handle *fwnode, irq = platform_get_irq_optional(to_platform_device(dev), j); if (irq > 0) pp->irq[j] = irq; - - if (pp->irq[j]) - pp->has_irq = true; } - - if (!pp->has_irq) - dev_warn(dev, "no irq for port%d\n", pp->idx); } static struct dwapb_platform_data *dwapb_gpio_get_pdata(struct device *dev) diff --git a/drivers/mfd/intel_quark_i2c_gpio.c b/drivers/mfd/intel_quark_i2c_gpio.c index 41326b48da55..84ca7902e1df 100644 --- a/drivers/mfd/intel_quark_i2c_gpio.c +++ b/drivers/mfd/intel_quark_i2c_gpio.c @@ -216,7 +216,6 @@ static int intel_quark_gpio_setup(struct pci_dev *pdev, struct mfd_cell *cell) pdata->properties->ngpio = INTEL_QUARK_MFD_NGPIO; pdata->properties->gpio_base = INTEL_QUARK_MFD_GPIO_BASE; pdata->properties->irq[0] = pdev->irq; - pdata->properties->has_irq = true; pdata->properties->irq_shared = true; cell->platform_data = pdata; diff --git a/include/linux/platform_data/gpio-dwapb.h b/include/linux/platform_data/gpio-dwapb.h index 3c606c450d05..ff1be737bad6 100644 --- a/include/linux/platform_data/gpio-dwapb.h +++ b/include/linux/platform_data/gpio-dwapb.h @@ -12,7 +12,6 @@ struct dwapb_port_property { unsigned int ngpio; unsigned int gpio_base; int irq[32]; - bool has_irq; bool irq_shared; }; From 563a6d2c37a0ca28eb66c6f9ad281798a9cc6956 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 20 May 2020 15:09:55 +0300 Subject: [PATCH 461/562] MAINTAINERS: Fix file name for DesignWare GPIO DT schema The commit 657a06df993c ("dt-bindings: gpio: Convert snps,dw-apb-gpio to DT schema") missed MAINTAINERS update. Fixes: 657a06df993c ("dt-bindings: gpio: Convert snps,dw-apb-gpio to DT schema") Signed-off-by: Andy Shevchenko Reviewed-by: Serge Semin Cc: Serge Semin Link: https://lore.kernel.org/r/20200520120955.68427-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 47c601338776..5761cf280869 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16237,7 +16237,7 @@ M: Hoan Tran M: Serge Semin L: linux-gpio@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt +F: Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml F: drivers/gpio/gpio-dwapb.c SYNOPSYS DESIGNWARE AXI DMAC DRIVER From c22fc62b516de5eeea7b518a68e258948e6276ce Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 19 May 2020 10:11:57 +0200 Subject: [PATCH 462/562] dt-bindings: gpio: Add renesas,em-gio bindings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document Device Tree bindings for the Renesas EMMA Mobile General Purpose I/O Interface. Signed-off-by: Geert Uytterhoeven Reviewed-by: Niklas Söderlund Link: https://lore.kernel.org/r/20200519081157.29095-1-geert+renesas@glider.be Signed-off-by: Linus Walleij --- .../bindings/gpio/renesas,em-gio.yaml | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 Documentation/devicetree/bindings/gpio/renesas,em-gio.yaml diff --git a/Documentation/devicetree/bindings/gpio/renesas,em-gio.yaml b/Documentation/devicetree/bindings/gpio/renesas,em-gio.yaml new file mode 100644 index 000000000000..8bdef812c87c --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/renesas,em-gio.yaml @@ -0,0 +1,70 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/gpio/renesas,em-gio.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Renesas EMMA Mobile General Purpose I/O Interface + +maintainers: + - Magnus Damm + +properties: + compatible: + const: renesas,em-gio + + reg: + items: + - description: First set of contiguous registers + - description: Second set of contiguous registers + + interrupts: + items: + - description: Interrupt for the first set of 16 GPIO ports + - description: Interrupt for the second set of 16 GPIO ports + + gpio-controller: true + + '#gpio-cells': + const: 2 + + gpio-ranges: + maxItems: 1 + + ngpios: + minimum: 1 + maximum: 32 + + interrupt-controller: true + + '#interrupt-cells': + const: 2 + +required: + - compatible + - reg + - interrupts + - gpio-controller + - '#gpio-cells' + - gpio-ranges + - ngpios + - interrupt-controller + - '#interrupt-cells' + +additionalProperties: false + +examples: + - | + #include + gpio0: gpio@e0050000 { + compatible = "renesas,em-gio"; + reg = <0xe0050000 0x2c>, <0xe0050040 0x20>; + interrupts = , + ; + gpio-controller; + #gpio-cells = <2>; + gpio-ranges = <&pfc 0 0 32>; + ngpios = <32>; + interrupt-controller; + #interrupt-cells = <2>; + }; From 22e4ebd05890cf2d9423ed7a35f18b0937c0cb3b Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 22 May 2020 12:12:20 +0800 Subject: [PATCH 463/562] gpio: pxa: Add COMPILE_TEST support Add COMPILE_TEST support to the PXA GPIO driver for better compile testing coverage. Signed-off-by: Tiezhu Yang Signed-off-by: Bartosz Golaszewski --- drivers/gpio/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 8ef2179fb999..beccdc1d0467 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -439,7 +439,7 @@ config GPIO_PMIC_EIC_SPRD config GPIO_PXA bool "PXA GPIO support" - depends on ARCH_PXA || ARCH_MMP + depends on ARCH_PXA || ARCH_MMP || COMPILE_TEST help Say yes here to support the PXA GPIO device From e33a58a29c6ad6f844cdc184210aa1feb5e2fbe0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 21 May 2020 00:19:15 +0300 Subject: [PATCH 464/562] gpio: pca935x: Allow IRQ support for driver built as a module Perhaps by some historical reasons the IRQ support has been allowed only for built-in driver. However, there is nothing prevents us to build it as module an use as IRQ chip. Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index beccdc1d0467..78958c8aaf92 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -952,7 +952,7 @@ config GPIO_PCA953X config GPIO_PCA953X_IRQ bool "Interrupt controller support for PCA953x" - depends on GPIO_PCA953X=y + depends on GPIO_PCA953X select GPIOLIB_IRQCHIP help Say yes here to enable the pca953x to be used as an interrupt From 6f8cd246411575703d9312888b70705c396b53a9 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Fri, 22 May 2020 16:08:38 +0800 Subject: [PATCH 465/562] gpio: rcar: Fix runtime PM imbalance on error pm_runtime_get_sync() increments the runtime PM usage counter even when it returns an error code. Thus a pairing decrement is needed on the error handling path to keep the counter balanced. Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20200522080839.32612-1-dinghao.liu@zju.edu.cn Signed-off-by: Linus Walleij --- drivers/gpio/gpio-rcar.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index 7284473c9fe3..eac1582c70da 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -250,8 +250,10 @@ static int gpio_rcar_request(struct gpio_chip *chip, unsigned offset) int error; error = pm_runtime_get_sync(p->dev); - if (error < 0) + if (error < 0) { + pm_runtime_put(p->dev); return error; + } error = pinctrl_gpio_request(chip->base + offset); if (error) From 25966e893143f1cf9b1294bc6e33e3a6b51ed2ad Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Wed, 20 May 2020 21:53:11 +0800 Subject: [PATCH 466/562] RDMA/hns: Let software PI/CI grow naturally The hardware can truncate PI/CI when posting or polling, the driver does not need to do truncation. Therefore keep the software's PI/CI consistent with it in the hardware. Link: https://lore.kernel.org/r/1589982799-28728-2-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d2c58d395962..6229b57e3c29 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -500,8 +500,7 @@ static inline void update_sq_db(struct hns_roce_dev *hr_dev, roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_CMD_M, V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_SQ_DB); roce_set_field(sq_db.parameter, V2_DB_PARAMETER_IDX_M, - V2_DB_PARAMETER_IDX_S, - qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1)); + V2_DB_PARAMETER_IDX_S, qp->sq.head); roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M, V2_DB_PARAMETER_SL_S, qp->sl); @@ -807,7 +806,8 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, srq_db.byte_4 = cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S | (srq->srqn & V2_DB_BYTE_4_TAG_M)); - srq_db.parameter = cpu_to_le32(srq->head); + srq_db.parameter = + cpu_to_le32(srq->head & V2_DB_PARAMETER_IDX_M); hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l); } @@ -2940,8 +2940,7 @@ static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq, roce_set_field(doorbell[0], V2_CQ_DB_BYTE_4_CMD_M, V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_CQ_DB_NTR); roce_set_field(doorbell[1], V2_CQ_DB_PARAMETER_CONS_IDX_M, - V2_CQ_DB_PARAMETER_CONS_IDX_S, - hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1)); + V2_CQ_DB_PARAMETER_CONS_IDX_S, hr_cq->cons_index); roce_set_field(doorbell[1], V2_CQ_DB_PARAMETER_CMD_SN_M, V2_CQ_DB_PARAMETER_CMD_SN_S, hr_cq->arm_sn & 0x3); roce_set_bit(doorbell[1], V2_CQ_DB_PARAMETER_NOTIFY_S, From 05e6a5a63579d4c55cc996e5148bd6da9ed48860 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Wed, 20 May 2020 21:53:12 +0800 Subject: [PATCH 467/562] RDMA/hns: Add CQ flag instead of independent enable flag It's easier to understand and maintain enable flags of cq using a single field in type of u32 than defining a field for every flags in the structure hns_roce_cq, and we can add new flags for features more conveniently in the future. Link: https://lore.kernel.org/r/1589982799-28728-3-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 10 +++++----- drivers/infiniband/hw/hns/hns_roce_device.h | 6 +++--- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 6dd8deaffec8..e87d616f7988 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -186,8 +186,8 @@ static int alloc_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, &hr_cq->db); if (err) return err; - hr_cq->db_en = 1; - resp->cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB; + hr_cq->flags |= HNS_ROCE_CQ_FLAG_RECORD_DB; + resp->cap_flags |= HNS_ROCE_CQ_FLAG_RECORD_DB; } } else { if (has_db) { @@ -196,7 +196,7 @@ static int alloc_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, return err; hr_cq->set_ci_db = hr_cq->db.db_record; *hr_cq->set_ci_db = 0; - hr_cq->db_en = 1; + hr_cq->flags |= HNS_ROCE_CQ_FLAG_RECORD_DB; } hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset + DB_REG_OFFSET * hr_dev->priv_uar.index; @@ -210,10 +210,10 @@ static void free_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, { struct hns_roce_ucontext *uctx; - if (!hr_cq->db_en) + if (!(hr_cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB)) return; - hr_cq->db_en = 0; + hr_cq->flags &= ~HNS_ROCE_CQ_FLAG_RECORD_DB; if (udata) { uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index bd6e295f4669..f614959f97fd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -137,8 +137,8 @@ enum { HNS_ROCE_QP_CAP_SQ_RECORD_DB = BIT(1), }; -enum { - HNS_ROCE_SUPPORT_CQ_RECORD_DB = 1 << 0, +enum hns_roce_cq_flags { + HNS_ROCE_CQ_FLAG_RECORD_DB = BIT(0), }; enum hns_roce_qp_state { @@ -458,7 +458,7 @@ struct hns_roce_cq { struct ib_cq ib_cq; struct hns_roce_mtr mtr; struct hns_roce_db db; - u8 db_en; + u32 flags; spinlock_t lock; u32 cq_depth; u32 cons_index; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 6229b57e3c29..f0021832883d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2898,9 +2898,9 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->byte_40_cqe_ba, V2_CQC_BYTE_40_CQE_BA_M, V2_CQC_BYTE_40_CQE_BA_S, (dma_handle >> (32 + 3))); - if (hr_cq->db_en) - roce_set_bit(cq_context->byte_44_db_record, - V2_CQC_BYTE_44_DB_RECORD_EN_S, 1); + roce_set_bit(cq_context->byte_44_db_record, + V2_CQC_BYTE_44_DB_RECORD_EN_S, + (hr_cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB) ? 1 : 0); roce_set_field(cq_context->byte_44_db_record, V2_CQC_BYTE_44_DB_RECORD_ADDR_M, From 0db6570947f43a39664ab8665f58101f112cedf3 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Wed, 20 May 2020 21:53:13 +0800 Subject: [PATCH 468/562] RDMA/hns: Optimize post and poll process Add unlikely() and likely() to optimize main I/O process code. Link: https://lore.kernel.org/r/1589982799-28728-4-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 27 +++++++++++----------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f0021832883d..af0911e522f8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -187,15 +187,15 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, int i; if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) { - if (le32_to_cpu(rc_sq_wqe->msg_len) > - hr_dev->caps.max_sq_inline) { + if (unlikely(le32_to_cpu(rc_sq_wqe->msg_len) > + hr_dev->caps.max_sq_inline)) { ibdev_err(ibdev, "inline len(1-%d)=%d, illegal", rc_sq_wqe->msg_len, hr_dev->caps.max_sq_inline); return -EINVAL; } - if (wr->opcode == IB_WR_RDMA_READ) { + if (unlikely(wr->opcode == IB_WR_RDMA_READ)) { ibdev_err(ibdev, "Not support inline data!\n"); return -EINVAL; } @@ -526,7 +526,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, spin_lock_irqsave(&qp->sq.lock, flags); ret = check_send_valid(hr_dev, qp); - if (ret) { + if (unlikely(ret)) { *bad_wr = wr; nreq = 0; goto out; @@ -562,7 +562,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, else if (ibqp->qp_type == IB_QPT_RC) ret = set_rc_wqe(qp, wr, wqe, &sge_idx, owner_bit); - if (ret) { + if (unlikely(ret)) { *bad_wr = wr; goto out; } @@ -612,15 +612,15 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, spin_lock_irqsave(&hr_qp->rq.lock, flags); ret = check_recv_valid(hr_dev, hr_qp); - if (ret) { + if (unlikely(ret)) { *bad_wr = wr; nreq = 0; goto out; } for (nreq = 0; wr; ++nreq, wr = wr->next) { - if (hns_roce_wq_overflow(&hr_qp->rq, nreq, - hr_qp->ibqp.recv_cq)) { + if (unlikely(hns_roce_wq_overflow(&hr_qp->rq, nreq, + hr_qp->ibqp.recv_cq))) { ret = -ENOMEM; *bad_wr = wr; goto out; @@ -766,7 +766,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, } wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt); - if (wqe_idx < 0) { + if (unlikely(wqe_idx < 0)) { ret = -ENOMEM; *bad_wr = wr; break; @@ -2977,7 +2977,7 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, wqe_buf += size; } - if (data_len) { + if (unlikely(data_len)) { wc->status = IB_WC_LOC_LEN_ERR; return -EAGAIN; } @@ -3069,7 +3069,8 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, break; } - if (wc->status == IB_WC_SUCCESS || wc->status == IB_WC_WR_FLUSH_ERR) + if (likely(wc->status == IB_WC_SUCCESS || + wc->status == IB_WC_WR_FLUSH_ERR)) return; ibdev_err(&hr_dev->ib_dev, "error cqe status 0x%x:\n", cqe_status); @@ -3164,7 +3165,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, } get_cqe_status(hr_dev, *cur_qp, cqe, wc); - if (wc->status != IB_WC_SUCCESS) + if (unlikely(wc->status != IB_WC_SUCCESS)) return 0; if (is_send) { @@ -3263,7 +3264,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, opcode == HNS_ROCE_V2_OPCODE_SEND_WITH_INV) && (roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_RQ_INLINE_S))) { ret = hns_roce_handle_recv_inl_wqe(cqe, cur_qp, wc); - if (ret) + if (unlikely(ret)) return -EAGAIN; } From b9c93e3aad13048c673999e65acbde0378600317 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Wed, 20 May 2020 21:53:14 +0800 Subject: [PATCH 469/562] RDMA/hns: Remove unused code about assert The codes related to assert are no longer used and need to be deleted. Link: https://lore.kernel.org/r/1589982799-28728-5-git-send-email-liweihang@huawei.com Signed-off-by: Yangyang Li Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_common.h | 4 ---- drivers/infiniband/hw/hns/hns_roce_main.c | 1 - 2 files changed, 5 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 8e95a1aa1b4f..f5669ff8cfeb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -33,10 +33,6 @@ #ifndef _HNS_ROCE_COMMON_H #define _HNS_ROCE_COMMON_H -#ifndef assert -#define assert(cond) -#endif - #define roce_write(dev, reg, val) writel((val), (dev)->reg_base + (reg)) #define roce_read(dev, reg) readl((dev)->reg_base + (reg)) #define roce_raw_write(value, addr) \ diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index fd3581efe9a8..50763cf4fa3d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -233,7 +233,6 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num, enum ib_mtu mtu; u8 port; - assert(port_num > 0); port = port_num - 1; /* props being zeroed by the caller, avoid zeroing it here */ From e9f2cd28250cd9e77db2c0dc8efe0412f6971a76 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Wed, 20 May 2020 21:53:15 +0800 Subject: [PATCH 470/562] RDMA/hns: Rename QP buffer related function Rename the function related to QP buffer to make the code more readable. Link: https://lore.kernel.org/r/1589982799-28728-6-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_qp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index fb71755f6179..5d294209fb23 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -502,9 +502,9 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, return 0; } -static int split_wqe_buf_region(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_roce_buf_attr *buf_attr) +static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_roce_buf_attr *buf_attr) { int buf_size; int idx = 0; @@ -676,7 +676,7 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hr_qp->rq_inl_buf.wqe_list = NULL; } - ret = split_wqe_buf_region(hr_dev, hr_qp, &buf_attr); + ret = set_wqe_buf_attr(hr_dev, hr_qp, &buf_attr); if (ret) { ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret); goto err_inline; From 82d07a4e466fa2e3cc0ac5479beeb739abaa7438 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Wed, 20 May 2020 21:53:16 +0800 Subject: [PATCH 471/562] RDMA/hns: Change all page_shift to unsigned page_shift is used to calculate the page size, it's always non-negative, and should be in type of unsigned. Link: https://lore.kernel.org/r/1589982799-28728-7-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_alloc.c | 2 +- drivers/infiniband/hw/hns/hns_roce_device.h | 25 +++++++++++---------- drivers/infiniband/hw/hns/hns_roce_hem.c | 2 +- drivers/infiniband/hw/hns/hns_roce_hem.h | 2 +- drivers/infiniband/hw/hns/hns_roce_mr.c | 20 +++++++++-------- 5 files changed, 27 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 742aee846676..a522cb2d29ea 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -254,7 +254,7 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int buf_cnt, int start, struct ib_umem *umem, - int page_shift) + unsigned int page_shift) { struct ib_block_iter biter; int total = 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index f614959f97fd..7190c8a5422e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -342,7 +342,7 @@ struct hns_roce_buf_attr { int hopnum; /* multi-hop addressing hop num */ } region[HNS_ROCE_MAX_BT_REGION]; int region_count; /* valid region count */ - int page_shift; /* buffer page shift */ + unsigned int page_shift; /* buffer page shift */ bool fixed_page; /* decide page shift is fixed-size or maximum size */ int user_access; /* umem access flag */ bool mtt_only; /* only alloc buffer-required MTT memory */ @@ -351,14 +351,14 @@ struct hns_roce_buf_attr { /* memory translate region */ struct hns_roce_mtr { struct hns_roce_hem_list hem_list; /* multi-hop addressing resource */ - struct ib_umem *umem; /* user space buffer */ - struct hns_roce_buf *kmem; /* kernel space buffer */ + struct ib_umem *umem; /* user space buffer */ + struct hns_roce_buf *kmem; /* kernel space buffer */ struct { - dma_addr_t root_ba; /* root BA table's address */ - bool is_direct; /* addressing without BA table */ - int ba_pg_shift; /* BA table page shift */ - int buf_pg_shift; /* buffer page shift */ - int buf_pg_count; /* buffer page count */ + dma_addr_t root_ba; /* root BA table's address */ + bool is_direct; /* addressing without BA table */ + unsigned int ba_pg_shift; /* BA table page shift */ + unsigned int buf_pg_shift; /* buffer page shift */ + int buf_pg_count; /* buffer page count */ } hem_cfg; /* config for hardware addressing */ }; @@ -423,7 +423,7 @@ struct hns_roce_buf { struct hns_roce_buf_list *page_list; u32 npages; u32 size; - int page_shift; + unsigned int page_shift; }; struct hns_roce_db_pgdir { @@ -1139,8 +1139,9 @@ void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev); int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr); int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - struct hns_roce_buf_attr *buf_attr, int page_shift, - struct ib_udata *udata, unsigned long user_addr); + struct hns_roce_buf_attr *buf_attr, + unsigned int page_shift, struct ib_udata *udata, + unsigned long user_addr); void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr); int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, @@ -1210,7 +1211,7 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int buf_cnt, int start, struct hns_roce_buf *buf); int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int buf_cnt, int start, struct ib_umem *umem, - int page_shift); + unsigned int page_shift); int hns_roce_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *srq_init_attr, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 37d101eec181..c8db6f8ae018 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1400,7 +1400,7 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, const struct hns_roce_buf_region *regions, - int region_cnt, int bt_pg_shift) + int region_cnt, unsigned int bt_pg_shift) { const struct hns_roce_buf_region *r; int ofs, end; diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 1fa0bdcb1989..b34c940077bb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -133,7 +133,7 @@ int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions, int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, const struct hns_roce_buf_region *regions, - int region_cnt, int bt_pg_shift); + int region_cnt, unsigned int bt_pg_shift); void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list); void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 3075e8450cda..17759de4108d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -706,7 +706,8 @@ static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr) return size; } -static inline int mtr_umem_page_count(struct ib_umem *umem, int page_shift) +static inline int mtr_umem_page_count(struct ib_umem *umem, + unsigned int page_shift) { int count = ib_umem_page_count(umem); @@ -719,7 +720,7 @@ static inline int mtr_umem_page_count(struct ib_umem *umem, int page_shift) } static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size, - int page_shift) + unsigned int page_shift) { if (is_direct) return ALIGN(alloc_size, 1 << page_shift); @@ -732,7 +733,7 @@ static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size, * Returns 0 on success, or the error page num. */ static inline int mtr_check_direct_pages(dma_addr_t *pages, int page_count, - int page_shift) + unsigned int page_shift) { size_t page_size = 1 << page_shift; int i; @@ -765,8 +766,8 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, struct ib_udata *udata, unsigned long user_addr) { struct ib_device *ibdev = &hr_dev->ib_dev; - int max_pg_shift = buf_attr->page_shift; - int best_pg_shift = 0; + unsigned int max_pg_shift = buf_attr->page_shift; + unsigned int best_pg_shift = 0; int all_pg_count = 0; size_t direct_size; size_t total_size; @@ -836,7 +837,7 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, } static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t *pages, int count, int page_shift) + dma_addr_t *pages, int count, unsigned int page_shift) { struct ib_device *ibdev = &hr_dev->ib_dev; int npage; @@ -946,7 +947,7 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, /* convert buffer size to page index and page count */ static int mtr_init_region(struct hns_roce_buf_attr *attr, int page_cnt, struct hns_roce_buf_region *regions, int region_cnt, - int page_shift) + unsigned int page_shift) { unsigned int page_size = 1 << page_shift; int max_region = attr->region_count; @@ -977,8 +978,9 @@ static int mtr_init_region(struct hns_roce_buf_attr *attr, int page_cnt, * @buf_alloced: mtr has private buffer, true means need to alloc */ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - struct hns_roce_buf_attr *buf_attr, int page_shift, - struct ib_udata *udata, unsigned long user_addr) + struct hns_roce_buf_attr *buf_attr, + unsigned int page_shift, struct ib_udata *udata, + unsigned long user_addr) { struct hns_roce_buf_region regions[HNS_ROCE_MAX_BT_REGION] = {}; struct ib_device *ibdev = &hr_dev->ib_dev; From 13aa13dddd5f06d47c35f9de46343e740f7d8b90 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Wed, 20 May 2020 21:53:17 +0800 Subject: [PATCH 472/562] RDMA/hns: Change variables representing quantity to unsigned Number of sge/eqe is always non-negative, they should be defined in type of unsigned. Link: https://lore.kernel.org/r/1589982799-28728-8-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 4 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 17 +++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 7190c8a5422e..f9413722b28e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -408,7 +408,7 @@ struct hns_roce_wq { }; struct hns_roce_sge { - int sge_cnt; /* SGE num */ + unsigned int sge_cnt; /* SGE num */ int offset; int sge_shift; /* SGE size */ }; @@ -734,7 +734,7 @@ struct hns_roce_eq { int arm_st; int hop_num; struct hns_roce_mtr mtr; - int eq_max_cnt; + u16 eq_max_cnt; int eq_period; int shift; int event_type; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index af0911e522f8..4d29a9ec6a84 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -130,7 +130,7 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, static void set_atomic_seg(const struct ib_send_wr *wr, void *wqe, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, - int valid_num_sge) + unsigned int valid_num_sge) { struct hns_roce_wqe_atomic_seg *aseg; @@ -151,12 +151,12 @@ static void set_atomic_seg(const struct ib_send_wr *wr, void *wqe, } static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, - unsigned int *sge_ind, int valid_num_sge) + unsigned int *sge_ind, unsigned int valid_num_sge) { struct hns_roce_v2_wqe_data_seg *dseg; + unsigned int cnt = valid_num_sge; struct ib_sge *sge = wr->sg_list; unsigned int idx = *sge_ind; - int cnt = valid_num_sge; if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { cnt -= HNS_ROCE_SGE_IN_WQE; @@ -177,7 +177,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, void *wqe, unsigned int *sge_ind, - int valid_num_sge) + unsigned int valid_num_sge) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_v2_wqe_data_seg *dseg = wqe; @@ -269,10 +269,11 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, return 0; } -static inline int calc_wr_sge_num(const struct ib_send_wr *wr, u32 *sge_len) +static unsigned int calc_wr_sge_num(const struct ib_send_wr *wr, + unsigned int *sge_len) { - int valid_num = 0; - u32 len = 0; + unsigned int valid_num = 0; + unsigned int len = 0; int i; for (i = 0; i < wr->num_sge; i++) { @@ -403,7 +404,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, { struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe; unsigned int curr_idx = *sge_idx; - int valid_num_sge; + unsigned int valid_num_sge; u32 msg_len = 0; int ret = 0; From 494c3b312255a39d4450f37ec2e675f142a76c8c Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Wed, 20 May 2020 21:53:18 +0800 Subject: [PATCH 473/562] RDMA/hns: Refactor the QP context filling process related to WQE buffer configure Split the code related to WQE buffer configure from the QPC filling process into two functions: config_qp_sq_buf() and config_qp_rq_buf(), this will make the code more readable. Link: https://lore.kernel.org/r/1589982799-28728-9-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 344 +++++++++++---------- 1 file changed, 189 insertions(+), 155 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4d29a9ec6a84..b1674f742361 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3775,27 +3775,16 @@ static bool check_wqe_rq_mtt_count(struct hns_roce_dev *hr_dev, return true; } -static int modify_qp_init_to_rtr(struct ib_qp *ibqp, - const struct ib_qp_attr *attr, int attr_mask, - struct hns_roce_v2_qp_context *context, - struct hns_roce_v2_qp_context *qpc_mask) +static int config_qp_rq_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_roce_v2_qp_context *context, + struct hns_roce_v2_qp_context *qpc_mask) { - const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); - struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - struct ib_device *ibdev = &hr_dev->ib_dev; + struct ib_qp *ibqp = &hr_qp->ibqp; u64 mtts[MTT_MIN_COUNT] = { 0 }; - dma_addr_t dma_handle_3; - dma_addr_t dma_handle_2; u64 wqe_sge_ba; u32 page_size; - u8 port_num; - u64 *mtts_3; - u64 *mtts_2; int count; - u8 *dmac; - u8 *smac; - int port; /* Search qp buf's mtts */ page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift; @@ -3806,29 +3795,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, if (!check_wqe_rq_mtt_count(hr_dev, hr_qp, count, page_size)) return -EINVAL; - /* Search IRRL's mtts */ - mtts_2 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table, - hr_qp->qpn, &dma_handle_2); - if (!mtts_2) { - ibdev_err(ibdev, "failed to find QP irrl_table\n"); - return -EINVAL; - } - - /* Search TRRL's mtts */ - mtts_3 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.trrl_table, - hr_qp->qpn, &dma_handle_3); - if (!mtts_3) { - ibdev_err(ibdev, "failed to find QP trrl_table\n"); - return -EINVAL; - } - - if (attr_mask & IB_QP_ALT_PATH) { - ibdev_err(ibdev, "INIT2RTR attr_mask (0x%x) error\n", - attr_mask); - return -EINVAL; - } - - dmac = (u8 *)attr->ah_attr.roce.dmac; context->wqe_sge_ba = cpu_to_le32(wqe_sge_ba >> 3); qpc_mask->wqe_sge_ba = 0; @@ -3907,83 +3873,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S, 0); - roce_set_field(context->byte_132_trrl, V2_QPC_BYTE_132_TRRL_BA_M, - V2_QPC_BYTE_132_TRRL_BA_S, dma_handle_3 >> 4); - roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_BA_M, - V2_QPC_BYTE_132_TRRL_BA_S, 0); - context->trrl_ba = cpu_to_le32(dma_handle_3 >> (16 + 4)); - qpc_mask->trrl_ba = 0; - roce_set_field(context->byte_140_raq, V2_QPC_BYTE_140_TRRL_BA_M, - V2_QPC_BYTE_140_TRRL_BA_S, - (u32)(dma_handle_3 >> (32 + 16 + 4))); - roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_TRRL_BA_M, - V2_QPC_BYTE_140_TRRL_BA_S, 0); - - context->irrl_ba = cpu_to_le32(dma_handle_2 >> 6); - qpc_mask->irrl_ba = 0; - roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_IRRL_BA_M, - V2_QPC_BYTE_208_IRRL_BA_S, - dma_handle_2 >> (32 + 6)); - roce_set_field(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_IRRL_BA_M, - V2_QPC_BYTE_208_IRRL_BA_S, 0); - - roce_set_bit(context->byte_208_irrl, V2_QPC_BYTE_208_RMT_E2E_S, 1); - roce_set_bit(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_RMT_E2E_S, 0); - - roce_set_bit(context->byte_252_err_txcqn, V2_QPC_BYTE_252_SIG_TYPE_S, - hr_qp->sq_signal_bits); - roce_set_bit(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_SIG_TYPE_S, - 0); - - port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : hr_qp->port; - - smac = (u8 *)hr_dev->dev_addr[port]; - /* when dmac equals smac or loop_idc is 1, it should loopback */ - if (ether_addr_equal_unaligned(dmac, smac) || - hr_dev->loop_idc == 0x1) { - roce_set_bit(context->byte_28_at_fl, V2_QPC_BYTE_28_LBI_S, 1); - roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_LBI_S, 0); - } - - if (attr_mask & IB_QP_DEST_QPN) { - roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, - V2_QPC_BYTE_56_DQPN_S, attr->dest_qp_num); - roce_set_field(qpc_mask->byte_56_dqpn_err, - V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0); - } - - /* Configure GID index */ - port_num = rdma_ah_get_port_num(&attr->ah_attr); - roce_set_field(context->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, - hns_get_gid_index(hr_dev, port_num - 1, - grh->sgid_index)); - roce_set_field(qpc_mask->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, 0); - memcpy(&(context->dmac), dmac, sizeof(u32)); - roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M, - V2_QPC_BYTE_52_DMAC_S, *((u16 *)(&dmac[4]))); - qpc_mask->dmac = 0; - roce_set_field(qpc_mask->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M, - V2_QPC_BYTE_52_DMAC_S, 0); - - /* mtu*(2^LP_PKTN_INI) should not bigger than 1 message length 64kb */ - roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, - V2_QPC_BYTE_56_LP_PKTN_INI_S, - ilog2(hr_dev->caps.max_sq_inline / IB_MTU_4096)); - roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, - V2_QPC_BYTE_56_LP_PKTN_INI_S, 0); - - if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD) - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, - V2_QPC_BYTE_24_MTU_S, IB_MTU_4096); - else if (attr_mask & IB_QP_PATH_MTU) - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, - V2_QPC_BYTE_24_MTU_S, attr->path_mtu); - - roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, - V2_QPC_BYTE_24_MTU_S, 0); - roce_set_field(context->byte_84_rq_ci_pi, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, hr_qp->rq.head); @@ -3994,72 +3883,40 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_84_rq_ci_pi, V2_QPC_BYTE_84_RQ_CONSUMER_IDX_M, V2_QPC_BYTE_84_RQ_CONSUMER_IDX_S, 0); - roce_set_bit(qpc_mask->byte_108_rx_reqepsn, - V2_QPC_BYTE_108_RX_REQ_PSN_ERR_S, 0); - roce_set_field(qpc_mask->byte_96_rx_reqmsn, V2_QPC_BYTE_96_RX_REQ_MSN_M, - V2_QPC_BYTE_96_RX_REQ_MSN_S, 0); - roce_set_field(qpc_mask->byte_108_rx_reqepsn, - V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_M, - V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_S, 0); - - context->rq_rnr_timer = 0; - qpc_mask->rq_rnr_timer = 0; - - roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_HEAD_MAX_M, - V2_QPC_BYTE_132_TRRL_HEAD_MAX_S, 0); - roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M, - V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0); - - /* rocee send 2^lp_sgen_ini segs every time */ - roce_set_field(context->byte_168_irrl_idx, - V2_QPC_BYTE_168_LP_SGEN_INI_M, - V2_QPC_BYTE_168_LP_SGEN_INI_S, 3); - roce_set_field(qpc_mask->byte_168_irrl_idx, - V2_QPC_BYTE_168_LP_SGEN_INI_M, - V2_QPC_BYTE_168_LP_SGEN_INI_S, 0); return 0; } -static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, - const struct ib_qp_attr *attr, int attr_mask, - struct hns_roce_v2_qp_context *context, - struct hns_roce_v2_qp_context *qpc_mask) +static int config_qp_sq_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_roce_v2_qp_context *context, + struct hns_roce_v2_qp_context *qpc_mask) { - struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct ib_device *ibdev = &hr_dev->ib_dev; u64 sge_cur_blk = 0; u64 sq_cur_blk = 0; u32 page_size; int count; - /* Search qp buf's mtts */ + /* search qp buf's mtts */ count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, &sq_cur_blk, 1, NULL); if (count < 1) { - ibdev_err(ibdev, "failed to find QP(0x%lx) SQ buf\n", + ibdev_err(ibdev, "failed to find QP(0x%lx) SQ buf.\n", hr_qp->qpn); return -EINVAL; } - if (hr_qp->sge.sge_cnt > 0) { page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift; count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->sge.offset / page_size, &sge_cur_blk, 1, NULL); if (count < 1) { - ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf\n", + ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf.\n", hr_qp->qpn); return -EINVAL; } } - /* Not support alternate path and path migration */ - if (attr_mask & (IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE)) { - ibdev_err(ibdev, "RTR2RTS attr_mask (0x%x)error\n", attr_mask); - return -EINVAL; - } - /* * In v2 engine, software pass context and context mask to hardware * when modifying qp. If software need modify some fields in context, @@ -4098,6 +3955,183 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_S, 0); + return 0; +} + +static int modify_qp_init_to_rtr(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, int attr_mask, + struct hns_roce_v2_qp_context *context, + struct hns_roce_v2_qp_context *qpc_mask) +{ + const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct ib_device *ibdev = &hr_dev->ib_dev; + dma_addr_t trrl_ba; + dma_addr_t irrl_ba; + u8 port_num; + u64 *mtts; + u8 *dmac; + u8 *smac; + int port; + int ret; + + ret = config_qp_rq_buf(hr_dev, hr_qp, context, qpc_mask); + if (ret) { + ibdev_err(ibdev, "failed to config rq buf, ret = %d.\n", ret); + return ret; + } + + /* Search IRRL's mtts */ + mtts = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table, + hr_qp->qpn, &irrl_ba); + if (!mtts) { + ibdev_err(ibdev, "failed to find qp irrl_table.\n"); + return -EINVAL; + } + + /* Search TRRL's mtts */ + mtts = hns_roce_table_find(hr_dev, &hr_dev->qp_table.trrl_table, + hr_qp->qpn, &trrl_ba); + if (!mtts) { + ibdev_err(ibdev, "failed to find qp trrl_table.\n"); + return -EINVAL; + } + + if (attr_mask & IB_QP_ALT_PATH) { + ibdev_err(ibdev, "INIT2RTR attr_mask (0x%x) error.\n", + attr_mask); + return -EINVAL; + } + + roce_set_field(context->byte_132_trrl, V2_QPC_BYTE_132_TRRL_BA_M, + V2_QPC_BYTE_132_TRRL_BA_S, trrl_ba >> 4); + roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_BA_M, + V2_QPC_BYTE_132_TRRL_BA_S, 0); + context->trrl_ba = cpu_to_le32(trrl_ba >> (16 + 4)); + qpc_mask->trrl_ba = 0; + roce_set_field(context->byte_140_raq, V2_QPC_BYTE_140_TRRL_BA_M, + V2_QPC_BYTE_140_TRRL_BA_S, + (u32)(trrl_ba >> (32 + 16 + 4))); + roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_TRRL_BA_M, + V2_QPC_BYTE_140_TRRL_BA_S, 0); + + context->irrl_ba = cpu_to_le32(irrl_ba >> 6); + qpc_mask->irrl_ba = 0; + roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_IRRL_BA_M, + V2_QPC_BYTE_208_IRRL_BA_S, + irrl_ba >> (32 + 6)); + roce_set_field(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_IRRL_BA_M, + V2_QPC_BYTE_208_IRRL_BA_S, 0); + + roce_set_bit(context->byte_208_irrl, V2_QPC_BYTE_208_RMT_E2E_S, 1); + roce_set_bit(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_RMT_E2E_S, 0); + + roce_set_bit(context->byte_252_err_txcqn, V2_QPC_BYTE_252_SIG_TYPE_S, + hr_qp->sq_signal_bits); + roce_set_bit(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_SIG_TYPE_S, + 0); + + port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : hr_qp->port; + + smac = (u8 *)hr_dev->dev_addr[port]; + /* when dmac equals smac or loop_idc is 1, it should loopback */ + if (ether_addr_equal_unaligned(dmac, smac) || + hr_dev->loop_idc == 0x1) { + roce_set_bit(context->byte_28_at_fl, V2_QPC_BYTE_28_LBI_S, 1); + roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_LBI_S, 0); + } + + if (attr_mask & IB_QP_DEST_QPN) { + roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, + V2_QPC_BYTE_56_DQPN_S, attr->dest_qp_num); + roce_set_field(qpc_mask->byte_56_dqpn_err, + V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0); + } + + /* Configure GID index */ + port_num = rdma_ah_get_port_num(&attr->ah_attr); + roce_set_field(context->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, + hns_get_gid_index(hr_dev, port_num - 1, + grh->sgid_index)); + roce_set_field(qpc_mask->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, 0); + + dmac = (u8 *)attr->ah_attr.roce.dmac; + memcpy(&(context->dmac), dmac, sizeof(u32)); + roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M, + V2_QPC_BYTE_52_DMAC_S, *((u16 *)(&dmac[4]))); + qpc_mask->dmac = 0; + roce_set_field(qpc_mask->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M, + V2_QPC_BYTE_52_DMAC_S, 0); + + /* mtu*(2^LP_PKTN_INI) should not bigger than 1 message length 64kb */ + roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, + V2_QPC_BYTE_56_LP_PKTN_INI_S, + ilog2(hr_dev->caps.max_sq_inline / IB_MTU_4096)); + roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, + V2_QPC_BYTE_56_LP_PKTN_INI_S, 0); + + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD) + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, + V2_QPC_BYTE_24_MTU_S, IB_MTU_4096); + else if (attr_mask & IB_QP_PATH_MTU) + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, + V2_QPC_BYTE_24_MTU_S, attr->path_mtu); + + roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, + V2_QPC_BYTE_24_MTU_S, 0); + + roce_set_bit(qpc_mask->byte_108_rx_reqepsn, + V2_QPC_BYTE_108_RX_REQ_PSN_ERR_S, 0); + roce_set_field(qpc_mask->byte_96_rx_reqmsn, V2_QPC_BYTE_96_RX_REQ_MSN_M, + V2_QPC_BYTE_96_RX_REQ_MSN_S, 0); + roce_set_field(qpc_mask->byte_108_rx_reqepsn, + V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_M, + V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_S, 0); + + context->rq_rnr_timer = 0; + qpc_mask->rq_rnr_timer = 0; + + roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_HEAD_MAX_M, + V2_QPC_BYTE_132_TRRL_HEAD_MAX_S, 0); + roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M, + V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0); + + /* rocee send 2^lp_sgen_ini segs every time */ + roce_set_field(context->byte_168_irrl_idx, + V2_QPC_BYTE_168_LP_SGEN_INI_M, + V2_QPC_BYTE_168_LP_SGEN_INI_S, 3); + roce_set_field(qpc_mask->byte_168_irrl_idx, + V2_QPC_BYTE_168_LP_SGEN_INI_M, + V2_QPC_BYTE_168_LP_SGEN_INI_S, 0); + + return 0; +} + +static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, int attr_mask, + struct hns_roce_v2_qp_context *context, + struct hns_roce_v2_qp_context *qpc_mask) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct ib_device *ibdev = &hr_dev->ib_dev; + int ret; + + /* Not support alternate path and path migration */ + if (attr_mask & (IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE)) { + ibdev_err(ibdev, "RTR2RTS attr_mask (0x%x)error\n", attr_mask); + return -EINVAL; + } + + ret = config_qp_sq_buf(hr_dev, hr_qp, context, qpc_mask); + if (ret) { + ibdev_err(ibdev, "failed to config sq buf, ret %d\n", ret); + return ret; + } + /* * Set some fields in context to zero, Because the default values * of all fields in context are zero, we need not set them to 0 again. From 8e029d386bcef651bb89091c8a48375c2506aefe Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Wed, 20 May 2020 21:53:19 +0800 Subject: [PATCH 474/562] RDMA/hns: Optimize the usage of MTR Currently, the MTR region is configed before hns_roce_mtr_map() is invoked, but in some scenarios, the region is configed at MTR creation, the caller need to store this config and call hns_roce_mtr_map() later. So optimize the usage by wrapping the MTR region config into MTR. Link: https://lore.kernel.org/r/1589982799-28728-10-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 3 +- drivers/infiniband/hw/hns/hns_roce_mr.c | 54 +++++++++++---------- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index f9413722b28e..a77fa6730b2d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -359,6 +359,8 @@ struct hns_roce_mtr { unsigned int ba_pg_shift; /* BA table page shift */ unsigned int buf_pg_shift; /* buffer page shift */ int buf_pg_count; /* buffer page count */ + struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION]; + unsigned int region_count; } hem_cfg; /* config for hardware addressing */ }; @@ -1145,7 +1147,6 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr); int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - struct hns_roce_buf_region *regions, int region_cnt, dma_addr_t *pages, int page_cnt); int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 17759de4108d..4c0bbb12770d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -483,7 +483,7 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_mr *mr = to_hr_mr(ibmr); - struct hns_roce_buf_region region = {}; + struct hns_roce_mtr *mtr = &mr->pbl_mtr; int ret = 0; mr->npages = 0; @@ -499,11 +499,11 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, goto err_page_list; } - region.offset = 0; - region.count = mr->npages; - region.hopnum = mr->pbl_hop_num; - ret = hns_roce_mtr_map(hr_dev, &mr->pbl_mtr, ®ion, 1, mr->page_list, - mr->npages); + mtr->hem_cfg.region[0].offset = 0; + mtr->hem_cfg.region[0].count = mr->npages; + mtr->hem_cfg.region[0].hopnum = mr->pbl_hop_num; + mtr->hem_cfg.region_count = 1; + ret = hns_roce_mtr_map(hr_dev, mtr, mr->page_list, mr->npages); if (ret) { ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret); ret = 0; @@ -863,7 +863,6 @@ static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, } int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - struct hns_roce_buf_region *regions, int region_cnt, dma_addr_t *pages, int page_cnt) { struct ib_device *ibdev = &hr_dev->ib_dev; @@ -871,8 +870,8 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int err; int i; - for (i = 0; i < region_cnt; i++) { - r = ®ions[i]; + for (i = 0; i < mtr->hem_cfg.region_count; i++) { + r = &mtr->hem_cfg.region[i]; if (r->offset + r->count > page_cnt) { err = -EINVAL; ibdev_err(ibdev, @@ -945,15 +944,16 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, } /* convert buffer size to page index and page count */ -static int mtr_init_region(struct hns_roce_buf_attr *attr, int page_cnt, - struct hns_roce_buf_region *regions, int region_cnt, - unsigned int page_shift) +static unsigned int mtr_init_region(struct hns_roce_buf_attr *attr, + int page_cnt, + struct hns_roce_buf_region *regions, + int region_cnt, unsigned int page_shift) { unsigned int page_size = 1 << page_shift; int max_region = attr->region_count; struct hns_roce_buf_region *r; + unsigned int i = 0; int page_idx = 0; - int i = 0; for (; i < region_cnt && i < max_region && page_idx < page_cnt; i++) { r = ®ions[i]; @@ -982,7 +982,6 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, unsigned int page_shift, struct ib_udata *udata, unsigned long user_addr) { - struct hns_roce_buf_region regions[HNS_ROCE_MAX_BT_REGION] = {}; struct ib_device *ibdev = &hr_dev->ib_dev; dma_addr_t *pages = NULL; int region_cnt = 0; @@ -1014,18 +1013,22 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, hns_roce_hem_list_init(&mtr->hem_list); mtr->hem_cfg.is_direct = !has_mtt; mtr->hem_cfg.ba_pg_shift = page_shift; + mtr->hem_cfg.region_count = 0; + region_cnt = mtr_init_region(buf_attr, all_pg_cnt, + mtr->hem_cfg.region, + ARRAY_SIZE(mtr->hem_cfg.region), + mtr->hem_cfg.buf_pg_shift); + if (region_cnt < 1) { + err = -ENOBUFS; + ibdev_err(ibdev, "failed to init mtr region %d\n", region_cnt); + goto err_alloc_bufs; + } + + mtr->hem_cfg.region_count = region_cnt; + if (has_mtt) { - region_cnt = mtr_init_region(buf_attr, all_pg_cnt, - regions, ARRAY_SIZE(regions), - mtr->hem_cfg.buf_pg_shift); - if (region_cnt < 1) { - err = -ENOBUFS; - ibdev_err(ibdev, "Failed to init mtr region %d\n", - region_cnt); - goto err_alloc_bufs; - } err = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, - regions, region_cnt, + mtr->hem_cfg.region, region_cnt, page_shift); if (err) { ibdev_err(ibdev, "Failed to request mtr hem, err %d\n", @@ -1061,8 +1064,7 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, mtr->hem_cfg.root_ba = pages[0]; } else { /* write buffer's dma address to BA table */ - err = hns_roce_mtr_map(hr_dev, mtr, regions, region_cnt, pages, - all_pg_cnt); + err = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt); if (err) { ibdev_err(ibdev, "Failed to map mtr pages, err %d\n", err); From 14ba87304bf98a0a0c069708bb14e92a616420d1 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Fri, 22 May 2020 21:02:56 +0800 Subject: [PATCH 475/562] RDMA/hns: Remove redundant type cast for general pointers There is no need to do a type cast on genernal pointers, they could be assigned to any type of variables. In addition, optimize initialization of some variables and adjust order of them. Link: https://lore.kernel.org/r/1590152579-32364-2-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 182 +++++++-------------- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 28 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 2 +- 3 files changed, 74 insertions(+), 138 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index b4b98e818328..8ff6b922b4d7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -503,16 +503,13 @@ static void hns_roce_set_odb(struct hns_roce_dev *hr_dev, u32 odb_alept, static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept, u32 ext_sdb_alful) { + struct hns_roce_v1_priv *priv = hr_dev->priv; + struct hns_roce_db_table *db = &priv->db_table; struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_v1_priv *priv; - struct hns_roce_db_table *db; dma_addr_t sdb_dma_addr; __le32 tmp; u32 val; - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - db = &priv->db_table; - /* Configure extend SDB threshold */ roce_write(hr_dev, ROCEE_EXT_DB_SQ_WL_EMPTY_REG, ext_sdb_alept); roce_write(hr_dev, ROCEE_EXT_DB_SQ_WL_REG, ext_sdb_alful); @@ -545,16 +542,13 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept, static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept, u32 ext_odb_alful) { + struct hns_roce_v1_priv *priv = hr_dev->priv; + struct hns_roce_db_table *db = &priv->db_table; struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_v1_priv *priv; - struct hns_roce_db_table *db; dma_addr_t odb_dma_addr; __le32 tmp; u32 val; - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - db = &priv->db_table; - /* Configure extend ODB threshold */ roce_write(hr_dev, ROCEE_EXT_DB_OTHERS_WL_EMPTY_REG, ext_odb_alept); roce_write(hr_dev, ROCEE_EXT_DB_OTHERS_WL_REG, ext_odb_alful); @@ -583,16 +577,13 @@ static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept, static int hns_roce_db_ext_init(struct hns_roce_dev *hr_dev, u32 sdb_ext_mod, u32 odb_ext_mod) { + struct hns_roce_v1_priv *priv = hr_dev->priv; + struct hns_roce_db_table *db = &priv->db_table; struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_v1_priv *priv; - struct hns_roce_db_table *db; dma_addr_t sdb_dma_addr; dma_addr_t odb_dma_addr; int ret = 0; - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - db = &priv->db_table; - db->ext_db = kmalloc(sizeof(*db->ext_db), GFP_KERNEL); if (!db->ext_db) return -ENOMEM; @@ -692,14 +683,14 @@ static struct hns_roce_qp *hns_roce_v1_create_lp_qp(struct hns_roce_dev *hr_dev, static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) { + struct hns_roce_v1_priv *priv = hr_dev->priv; + struct hns_roce_free_mr *free_mr = &priv->free_mr; struct hns_roce_caps *caps = &hr_dev->caps; + struct ib_device *ibdev = &hr_dev->ib_dev; struct device *dev = &hr_dev->pdev->dev; struct ib_cq_init_attr cq_init_attr; - struct hns_roce_free_mr *free_mr; struct ib_qp_attr attr = { 0 }; - struct hns_roce_v1_priv *priv; struct hns_roce_qp *hr_qp; - struct ib_device *ibdev; struct ib_cq *cq; struct ib_pd *pd; union ib_gid dgid; @@ -712,14 +703,10 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) u8 port = 0; u8 sl; - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - free_mr = &priv->free_mr; - /* Reserved cq for loop qp */ cq_init_attr.cqe = HNS_ROCE_MIN_WQE_NUM * 2; cq_init_attr.comp_vector = 0; - ibdev = &hr_dev->ib_dev; cq = rdma_zalloc_drv_obj(ibdev, ib_cq); if (!cq) return -ENOMEM; @@ -868,16 +855,13 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) { + struct hns_roce_v1_priv *priv = hr_dev->priv; + struct hns_roce_free_mr *free_mr = &priv->free_mr; struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_free_mr *free_mr; - struct hns_roce_v1_priv *priv; struct hns_roce_qp *hr_qp; int ret; int i; - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - free_mr = &priv->free_mr; - for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) { hr_qp = free_mr->mr_free_qp[i]; if (!hr_qp) @@ -897,18 +881,15 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) static int hns_roce_db_init(struct hns_roce_dev *hr_dev) { + struct hns_roce_v1_priv *priv = hr_dev->priv; + struct hns_roce_db_table *db = &priv->db_table; struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_v1_priv *priv; - struct hns_roce_db_table *db; u32 sdb_ext_mod; u32 odb_ext_mod; u32 sdb_evt_mod; u32 odb_evt_mod; int ret = 0; - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - db = &priv->db_table; - memset(db, 0, sizeof(*db)); /* Default DB mode */ @@ -954,15 +935,12 @@ static void hns_roce_v1_recreate_lp_qp_work_fn(struct work_struct *work) static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev) { - struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_recreate_lp_qp_work *lp_qp_work; - struct hns_roce_free_mr *free_mr; - struct hns_roce_v1_priv *priv; - struct completion comp; long end = HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS; - - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - free_mr = &priv->free_mr; + struct hns_roce_v1_priv *priv = hr_dev->priv; + struct hns_roce_free_mr *free_mr = &priv->free_mr; + struct hns_roce_recreate_lp_qp_work *lp_qp_work; + struct device *dev = &hr_dev->pdev->dev; + struct completion comp; lp_qp_work = kzalloc(sizeof(struct hns_roce_recreate_lp_qp_work), GFP_KERNEL); @@ -1021,29 +999,21 @@ static int hns_roce_v1_send_lp_wqe(struct hns_roce_qp *hr_qp) static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) { - struct hns_roce_mr_free_work *mr_work; - struct ib_wc wc[HNS_ROCE_V1_RESV_QP]; - struct hns_roce_free_mr *free_mr; - struct hns_roce_cq *mr_free_cq; - struct hns_roce_v1_priv *priv; - struct hns_roce_dev *hr_dev; - struct hns_roce_mr *hr_mr; - struct hns_roce_qp *hr_qp; - struct device *dev; unsigned long end = msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies; - int i; - int ret; + struct hns_roce_mr_free_work *mr_work = + container_of(work, struct hns_roce_mr_free_work, work); + struct hns_roce_dev *hr_dev = to_hr_dev(mr_work->ib_dev); + struct hns_roce_v1_priv *priv = hr_dev->priv; + struct hns_roce_free_mr *free_mr = &priv->free_mr; + struct hns_roce_cq *mr_free_cq = free_mr->mr_free_cq; + struct hns_roce_mr *hr_mr = mr_work->mr; + struct device *dev = &hr_dev->pdev->dev; + struct ib_wc wc[HNS_ROCE_V1_RESV_QP]; + struct hns_roce_qp *hr_qp; int ne = 0; - - mr_work = container_of(work, struct hns_roce_mr_free_work, work); - hr_mr = (struct hns_roce_mr *)mr_work->mr; - hr_dev = to_hr_dev(mr_work->ib_dev); - dev = &hr_dev->pdev->dev; - - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - free_mr = &priv->free_mr; - mr_free_cq = free_mr->mr_free_cq; + int ret; + int i; for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) { hr_qp = free_mr->mr_free_qp[i]; @@ -1092,18 +1062,15 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, struct ib_udata *udata) { + struct hns_roce_v1_priv *priv = hr_dev->priv; + struct hns_roce_free_mr *free_mr = &priv->free_mr; + long end = HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS; struct device *dev = &hr_dev->pdev->dev; struct hns_roce_mr_free_work *mr_work; - struct hns_roce_free_mr *free_mr; - struct hns_roce_v1_priv *priv; - struct completion comp; - long end = HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS; unsigned long start = jiffies; + struct completion comp; int ret = 0; - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - free_mr = &priv->free_mr; - if (mr->enabled) { if (hns_roce_hw_destroy_mpt(hr_dev, NULL, key_to_hw_index(mr->key) & @@ -1155,12 +1122,9 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, static void hns_roce_db_free(struct hns_roce_dev *hr_dev) { + struct hns_roce_v1_priv *priv = hr_dev->priv; + struct hns_roce_db_table *db = &priv->db_table; struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_v1_priv *priv; - struct hns_roce_db_table *db; - - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - db = &priv->db_table; if (db->sdb_ext_mod) { dma_free_coherent(dev, HNS_ROCE_V1_EXT_SDB_SIZE, @@ -1181,17 +1145,14 @@ static void hns_roce_db_free(struct hns_roce_dev *hr_dev) static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) { - int ret; - u32 val; - __le32 tmp; + struct hns_roce_v1_priv *priv = hr_dev->priv; + struct hns_roce_raq_table *raq = raq = &priv->raq_table; + struct device *dev = &hr_dev->pdev->dev; int raq_shift = 0; dma_addr_t addr; - struct hns_roce_v1_priv *priv; - struct hns_roce_raq_table *raq; - struct device *dev = &hr_dev->pdev->dev; - - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - raq = &priv->raq_table; + __le32 tmp; + u32 val; + int ret; raq->e_raq_buf = kzalloc(sizeof(*(raq->e_raq_buf)), GFP_KERNEL); if (!raq->e_raq_buf) @@ -1271,12 +1232,9 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) static void hns_roce_raq_free(struct hns_roce_dev *hr_dev) { + struct hns_roce_v1_priv *priv = hr_dev->priv; + struct hns_roce_raq_table *raq = &priv->raq_table; struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_v1_priv *priv; - struct hns_roce_raq_table *raq; - - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - raq = &priv->raq_table; dma_free_coherent(dev, HNS_ROCE_V1_RAQ_SIZE, raq->e_raq_buf->buf, raq->e_raq_buf->map); @@ -1310,12 +1268,10 @@ static void hns_roce_port_enable(struct hns_roce_dev *hr_dev, int enable_flag) static int hns_roce_bt_init(struct hns_roce_dev *hr_dev) { + struct hns_roce_v1_priv *priv = hr_dev->priv; struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_v1_priv *priv; int ret; - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - priv->bt_table.qpc_buf.buf = dma_alloc_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.qpc_buf.map, GFP_KERNEL); @@ -1353,10 +1309,8 @@ static int hns_roce_bt_init(struct hns_roce_dev *hr_dev) static void hns_roce_bt_free(struct hns_roce_dev *hr_dev) { + struct hns_roce_v1_priv *priv = hr_dev->priv; struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_v1_priv *priv; - - priv = (struct hns_roce_v1_priv *)hr_dev->priv; dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE, priv->bt_table.cqc_buf.buf, priv->bt_table.cqc_buf.map); @@ -1370,12 +1324,9 @@ static void hns_roce_bt_free(struct hns_roce_dev *hr_dev) static int hns_roce_tptr_init(struct hns_roce_dev *hr_dev) { + struct hns_roce_v1_priv *priv = hr_dev->priv; + struct hns_roce_buf_list *tptr_buf = &priv->tptr_table.tptr_buf; struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_buf_list *tptr_buf; - struct hns_roce_v1_priv *priv; - - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - tptr_buf = &priv->tptr_table.tptr_buf; /* * This buffer will be used for CQ's tptr(tail pointer), also @@ -1396,12 +1347,9 @@ static int hns_roce_tptr_init(struct hns_roce_dev *hr_dev) static void hns_roce_tptr_free(struct hns_roce_dev *hr_dev) { + struct hns_roce_v1_priv *priv = hr_dev->priv; + struct hns_roce_buf_list *tptr_buf = &priv->tptr_table.tptr_buf; struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_buf_list *tptr_buf; - struct hns_roce_v1_priv *priv; - - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - tptr_buf = &priv->tptr_table.tptr_buf; dma_free_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE, tptr_buf->buf, tptr_buf->map); @@ -1409,14 +1357,11 @@ static void hns_roce_tptr_free(struct hns_roce_dev *hr_dev) static int hns_roce_free_mr_init(struct hns_roce_dev *hr_dev) { + struct hns_roce_v1_priv *priv = hr_dev->priv; + struct hns_roce_free_mr *free_mr = &priv->free_mr; struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_free_mr *free_mr; - struct hns_roce_v1_priv *priv; int ret = 0; - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - free_mr = &priv->free_mr; - free_mr->free_mr_wq = create_singlethread_workqueue("hns_roce_free_mr"); if (!free_mr->free_mr_wq) { dev_err(dev, "Create free mr workqueue failed!\n"); @@ -1435,11 +1380,8 @@ static int hns_roce_free_mr_init(struct hns_roce_dev *hr_dev) static void hns_roce_free_mr_free(struct hns_roce_dev *hr_dev) { - struct hns_roce_free_mr *free_mr; - struct hns_roce_v1_priv *priv; - - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - free_mr = &priv->free_mr; + struct hns_roce_v1_priv *priv = hr_dev->priv; + struct hns_roce_free_mr *free_mr = &priv->free_mr; flush_workqueue(free_mr->free_mr_wq); destroy_workqueue(free_mr->free_mr_wq); @@ -2050,16 +1992,12 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, dma_addr_t dma_handle) { - struct hns_roce_cq_context *cq_context = NULL; - struct hns_roce_buf_list *tptr_buf; - struct hns_roce_v1_priv *priv; + struct hns_roce_v1_priv *priv = hr_dev->priv; + struct hns_roce_buf_list *tptr_buf = &priv->tptr_table.tptr_buf; + struct hns_roce_cq_context *cq_context = mb_buf; dma_addr_t tptr_dma_addr; int offset; - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - tptr_buf = &priv->tptr_table.tptr_buf; - - cq_context = mb_buf; memset(cq_context, 0, sizeof(*cq_context)); /* Get the tptr for this CQ. */ @@ -2400,16 +2338,14 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, int obj, int step_idx) { + struct hns_roce_v1_priv *priv = hr_dev->priv; struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_v1_priv *priv; - unsigned long flags = 0; long end = HW_SYNC_TIMEOUT_MSECS; __le32 bt_cmd_val[2] = {0}; + unsigned long flags = 0; void __iomem *bt_cmd; u64 bt_ba = 0; - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - switch (table->type) { case HEM_TYPE_QPC: bt_ba = priv->bt_table.qpc_buf.map >> 12; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b1674f742361..97d087ae8988 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -845,7 +845,7 @@ static int hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev, unsigned long instance_stage, unsigned long reset_stage) { - struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hns_roce_v2_priv *priv = hr_dev->priv; struct hnae3_handle *handle = priv->handle; const struct hnae3_ae_ops *ops = handle->ae_algo->ops; @@ -871,7 +871,7 @@ static int hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev, static int hns_roce_v2_cmd_sw_resetting(struct hns_roce_dev *hr_dev) { - struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hns_roce_v2_priv *priv = hr_dev->priv; struct hnae3_handle *handle = priv->handle; const struct hnae3_ae_ops *ops = handle->ae_algo->ops; @@ -888,7 +888,7 @@ static int hns_roce_v2_cmd_sw_resetting(struct hns_roce_dev *hr_dev) static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev) { - struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hns_roce_v2_priv *priv = hr_dev->priv; struct hnae3_handle *handle = priv->handle; const struct hnae3_ae_ops *ops = handle->ae_algo->ops; unsigned long instance_stage; /* the current instance stage */ @@ -968,7 +968,7 @@ static void hns_roce_free_cmq_desc(struct hns_roce_dev *hr_dev, static int hns_roce_init_cmq_ring(struct hns_roce_dev *hr_dev, bool ring_type) { - struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_v2_cmq_ring *ring = (ring_type == TYPE_CSQ) ? &priv->cmq.csq : &priv->cmq.crq; @@ -981,7 +981,7 @@ static int hns_roce_init_cmq_ring(struct hns_roce_dev *hr_dev, bool ring_type) static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type) { - struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_v2_cmq_ring *ring = (ring_type == TYPE_CSQ) ? &priv->cmq.csq : &priv->cmq.crq; dma_addr_t dma = ring->desc_dma_addr; @@ -1007,7 +1007,7 @@ static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type) static int hns_roce_v2_cmq_init(struct hns_roce_dev *hr_dev) { - struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hns_roce_v2_priv *priv = hr_dev->priv; int ret; /* Setup the queue entries for command queue */ @@ -1051,7 +1051,7 @@ static int hns_roce_v2_cmq_init(struct hns_roce_dev *hr_dev) static void hns_roce_v2_cmq_exit(struct hns_roce_dev *hr_dev) { - struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hns_roce_v2_priv *priv = hr_dev->priv; hns_roce_free_cmq_desc(hr_dev, &priv->cmq.csq); hns_roce_free_cmq_desc(hr_dev, &priv->cmq.crq); @@ -1073,15 +1073,15 @@ static void hns_roce_cmq_setup_basic_desc(struct hns_roce_cmq_desc *desc, static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev) { - struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; u32 head = roce_read(hr_dev, ROCEE_TX_CMQ_HEAD_REG); + struct hns_roce_v2_priv *priv = hr_dev->priv; return head == priv->cmq.csq.next_to_use; } static int hns_roce_cmq_csq_clean(struct hns_roce_dev *hr_dev) { - struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; struct hns_roce_cmq_desc *desc; u16 ntc = csq->next_to_clean; @@ -1106,7 +1106,7 @@ static int hns_roce_cmq_csq_clean(struct hns_roce_dev *hr_dev) static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc *desc, int num) { - struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; struct hns_roce_cmq_desc *desc_to_use; bool complete = false; @@ -1234,7 +1234,7 @@ static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev) static bool hns_roce_func_clr_chk_rst(struct hns_roce_dev *hr_dev) { - struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hns_roce_v2_priv *priv = hr_dev->priv; struct hnae3_handle *handle = priv->handle; const struct hnae3_ae_ops *ops = handle->ae_algo->ops; unsigned long reset_cnt; @@ -1254,7 +1254,7 @@ static bool hns_roce_func_clr_chk_rst(struct hns_roce_dev *hr_dev) static void hns_roce_func_clr_rst_prc(struct hns_roce_dev *hr_dev, int retval, int flag) { - struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hns_roce_v2_priv *priv = hr_dev->priv; struct hnae3_handle *handle = priv->handle; const struct hnae3_ae_ops *ops = handle->ae_algo->ops; unsigned long instance_stage; @@ -6041,7 +6041,7 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, bool reset) { - struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv; + struct hns_roce_dev *hr_dev = handle->priv; if (!hr_dev) return; @@ -6121,7 +6121,7 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) handle->rinfo.reset_state = HNS_ROCE_STATE_RST_DOWN; clear_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state); - hr_dev = (struct hns_roce_dev *)handle->priv; + hr_dev = handle->priv; if (!hr_dev) return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 532dcf6a05ff..e176b0aaa4ac 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1979,7 +1979,7 @@ int hns_roce_v2_query_cqc_info(struct hns_roce_dev *hr_dev, u32 cqn, static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2], void __iomem *dest) { - struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hns_roce_v2_priv *priv = hr_dev->priv; struct hnae3_handle *handle = priv->handle; const struct hnae3_ae_ops *ops = handle->ae_algo->ops; From f226f6765f7fe435e033da67698565ca876c2b8d Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Fri, 22 May 2020 21:02:57 +0800 Subject: [PATCH 476/562] RDMA/hns: Remove redundant parameters from free_srq/qp_wrid() The redundant parameters "hr_dev" need to be removed from free_kernel_wrid() and free_srq_wrid(). Link: https://lore.kernel.org/r/1590152579-32364-3-git-send-email-liweihang@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_qp.c | 7 +++---- drivers/infiniband/hw/hns/hns_roce_srq.c | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 5d294209fb23..a0a47bd66975 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -842,8 +842,7 @@ static int alloc_kernel_wrid(struct hns_roce_dev *hr_dev, return ret; } -static void free_kernel_wrid(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp) +static void free_kernel_wrid(struct hns_roce_qp *hr_qp) { kfree(hr_qp->rq.wrid); kfree(hr_qp->sq.wrid); @@ -996,7 +995,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, err_db: free_qp_db(hr_dev, hr_qp, udata); err_wrid: - free_kernel_wrid(hr_dev, hr_qp); + free_kernel_wrid(hr_qp); return ret; } @@ -1010,7 +1009,7 @@ void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, free_qpc(hr_dev, hr_qp); free_qpn(hr_dev, hr_qp); free_qp_buf(hr_dev, hr_qp); - free_kernel_wrid(hr_dev, hr_qp); + free_kernel_wrid(hr_qp); free_qp_db(hr_dev, hr_qp, udata); kfree(hr_qp); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 3018c981f1d1..f40a000e94ee 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -270,7 +270,7 @@ static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) return 0; } -static void free_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +static void free_srq_wrid(struct hns_roce_srq *srq) { kfree(srq->wrid); srq->wrid = NULL; @@ -355,7 +355,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, err_srqc_alloc: free_srqc(hr_dev, srq); err_wrid_alloc: - free_srq_wrid(hr_dev, srq); + free_srq_wrid(srq); err_idx_alloc: free_srq_idx(hr_dev, srq); err_buf_alloc: @@ -370,7 +370,7 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) free_srqc(hr_dev, srq); free_srq_idx(hr_dev, srq); - free_srq_wrid(hr_dev, srq); + free_srq_wrid(srq); free_srq_buf(hr_dev, srq); } From e4aaf4bad4651358a923ad756f8b0c9d447ea21f Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Fri, 22 May 2020 21:02:58 +0800 Subject: [PATCH 477/562] RDMA/hns: Simplify process related to poll cq Set hns_roce_v2_cq_set_ci to inline type and remove unnecessary next_cqe_sw_v2(). Link: https://lore.kernel.org/r/1590152579-32364-4-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 97d087ae8988..24c661a32e01 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2776,14 +2776,9 @@ static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) !!(n & hr_cq->cq_depth)) ? cqe : NULL; } -static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq) +static inline void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 ci) { - return get_sw_cqe_v2(hr_cq, hr_cq->cons_index); -} - -static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) -{ - *hr_cq->set_ci_db = cons_index & V2_CQ_DB_PARAMETER_CONS_IDX_M; + *hr_cq->set_ci_db = ci & V2_CQ_DB_PARAMETER_CONS_IDX_M; } static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, @@ -3106,7 +3101,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, int ret; /* Find cqe according to consumer index */ - cqe = next_cqe_sw_v2(hr_cq); + cqe = get_sw_cqe_v2(hr_cq, hr_cq->cons_index); if (!cqe) return -EAGAIN; From e1b43f07c0d4c82fd5591ae24d045fee2b00edf3 Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Fri, 22 May 2020 21:02:59 +0800 Subject: [PATCH 478/562] RDMA/hns: Make the end of sge process more clear Instead of i with the sge number of wr will make the comparision more clear, that is, when the sge number in wr is small than the maximum supported sge number in the queue, then a stop sge needed to be filled at the end of sges in wr. Link: https://lore.kernel.org/r/1590152579-32364-5-git-send-email-liweihang@huawei.com Signed-off-by: Yixian Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 24c661a32e01..6454ac4ad06f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -646,7 +646,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, dseg++; } - if (i < hr_qp->rq.max_gs) { + if (wr->num_sge < hr_qp->rq.max_gs) { dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); dseg->addr = 0; dseg->len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); @@ -782,7 +782,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr); } - if (i < srq->max_gs) { + if (wr->num_sge < srq->max_gs) { dseg[i].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); dseg[i].addr = 0; From 49ea0c036ede81f126f1a9389d377999fdf5c5a1 Mon Sep 17 00:00:00 2001 From: Potnuri Bharat Teja Date: Mon, 25 May 2020 00:38:14 +0530 Subject: [PATCH 479/562] RDMA/iw_cxgb4: cleanup device debugfs entries on ULD remove Remove device specific debugfs entries immediately if LLD detaches a particular ULD device in case of fatal PCI errors. Link: https://lore.kernel.org/r/20200524190814.17599-1-bharat@chelsio.com Signed-off-by: Potnuri Bharat Teja Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 599340c1f0b8..541dbcf22d0e 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -953,6 +953,7 @@ void c4iw_dealloc(struct uld_ctx *ctx) static void c4iw_remove(struct uld_ctx *ctx) { pr_debug("c4iw_dev %p\n", ctx->dev); + debugfs_remove_recursive(ctx->dev->debugfs_root); c4iw_unregister_device(ctx->dev); c4iw_dealloc(ctx); } From ebd6e96b33a23fe84d1a64441a04d7bc91ccc519 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 25 May 2020 16:03:05 +0300 Subject: [PATCH 480/562] RDMA/ipoib: Remove can_sleep parameter from iboib_mcast_alloc can_sleep is always 0 when iboib_mcast_alloc() is called, so remove it and use GFP_ATOMIC instead of GFP_KERNEL. Link: https://lore.kernel.org/r/20200525130305.171509-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 3d5f6b848c9e..9bfa514473d5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -135,12 +135,11 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast) kfree(mcast); } -static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, - int can_sleep) +static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev) { struct ipoib_mcast *mcast; - mcast = kzalloc(sizeof(*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC); + mcast = kzalloc(sizeof(*mcast), GFP_ATOMIC); if (!mcast) return NULL; @@ -599,7 +598,7 @@ void ipoib_mcast_join_task(struct work_struct *work) if (!priv->broadcast) { struct ipoib_mcast *broadcast; - broadcast = ipoib_mcast_alloc(dev, 0); + broadcast = ipoib_mcast_alloc(dev); if (!broadcast) { ipoib_warn(priv, "failed to allocate broadcast group\n"); /* @@ -782,7 +781,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n", mgid); - mcast = ipoib_mcast_alloc(dev, 0); + mcast = ipoib_mcast_alloc(dev); if (!mcast) { ipoib_warn(priv, "unable to allocate memory " "for multicast structure\n"); @@ -936,7 +935,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) ipoib_dbg_mcast(priv, "adding multicast entry for mgid %pI6\n", mgid.raw); - nmcast = ipoib_mcast_alloc(dev, 0); + nmcast = ipoib_mcast_alloc(dev); if (!nmcast) { ipoib_warn(priv, "unable to allocate memory for multicast structure\n"); continue; From 1518ac272e789cae8c555d69951b032a275b7602 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Mon, 11 May 2020 00:34:50 -0400 Subject: [PATCH 481/562] vfio/pci: fix memory leaks of eventfd ctx Finished a qemu-kvm (-device vfio-pci,host=0001:01:00.0) triggers a few memory leaks after a while because vfio_pci_set_ctx_trigger_single() calls eventfd_ctx_fdget() without the matching eventfd_ctx_put() later. Fix it by calling eventfd_ctx_put() for those memory in vfio_pci_release() before vfio_device_release(). unreferenced object 0xebff008981cc2b00 (size 128): comm "qemu-kvm", pid 4043, jiffies 4294994816 (age 9796.310s) hex dump (first 32 bytes): 01 00 00 00 6b 6b 6b 6b 00 00 00 00 ad 4e ad de ....kkkk.....N.. ff ff ff ff 6b 6b 6b 6b ff ff ff ff ff ff ff ff ....kkkk........ backtrace: [<00000000917e8f8d>] slab_post_alloc_hook+0x74/0x9c [<00000000df0f2aa2>] kmem_cache_alloc_trace+0x2b4/0x3d4 [<000000005fcec025>] do_eventfd+0x54/0x1ac [<0000000082791a69>] __arm64_sys_eventfd2+0x34/0x44 [<00000000b819758c>] do_el0_svc+0x128/0x1dc [<00000000b244e810>] el0_sync_handler+0xd0/0x268 [<00000000d495ef94>] el0_sync+0x164/0x180 unreferenced object 0x29ff008981cc4180 (size 128): comm "qemu-kvm", pid 4043, jiffies 4294994818 (age 9796.290s) hex dump (first 32 bytes): 01 00 00 00 6b 6b 6b 6b 00 00 00 00 ad 4e ad de ....kkkk.....N.. ff ff ff ff 6b 6b 6b 6b ff ff ff ff ff ff ff ff ....kkkk........ backtrace: [<00000000917e8f8d>] slab_post_alloc_hook+0x74/0x9c [<00000000df0f2aa2>] kmem_cache_alloc_trace+0x2b4/0x3d4 [<000000005fcec025>] do_eventfd+0x54/0x1ac [<0000000082791a69>] __arm64_sys_eventfd2+0x34/0x44 [<00000000b819758c>] do_el0_svc+0x128/0x1dc [<00000000b244e810>] el0_sync_handler+0xd0/0x268 [<00000000d495ef94>] el0_sync+0x164/0x180 Signed-off-by: Qian Cai Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 6c6b37b5c04e..080e6608f297 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -519,6 +519,10 @@ static void vfio_pci_release(void *device_data) vfio_pci_vf_token_user_add(vdev, -1); vfio_spapr_pci_eeh_release(vdev->pdev); vfio_pci_disable(vdev); + if (vdev->err_trigger) + eventfd_ctx_put(vdev->err_trigger); + if (vdev->req_trigger) + eventfd_ctx_put(vdev->req_trigger); } mutex_unlock(&vdev->reflck->lock); From 3ce419662dd4c9cf8db7869c4972ad51ccdf2ee3 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Tue, 5 May 2020 09:19:08 +0800 Subject: [PATCH 482/562] scsi: iscsi: Register sysfs for iscsi workqueue This patch enables setting cpu affinity through "cpumask" for iscsi workqueues (iscsi_q_xx and iscsi_eh), so as to get performance isolation. The max number of active worker was changed form 1 to 2, because "cpumask" of ordered workqueue isn't allowed to change. Link: https://lore.kernel.org/r/20200505011908.15538-1-bob.liu@oracle.com Reviewed-by: Lee Duncan Signed-off-by: Bob Liu Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 4 +++- drivers/scsi/scsi_transport_iscsi.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 874dd4beed10..e5a64d4f255c 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -2627,7 +2627,9 @@ struct Scsi_Host *iscsi_host_alloc(struct scsi_host_template *sht, if (xmit_can_sleep) { snprintf(ihost->workq_name, sizeof(ihost->workq_name), "iscsi_q_%d", shost->host_no); - ihost->workq = create_singlethread_workqueue(ihost->workq_name); + ihost->workq = alloc_workqueue("%s", + WQ_SYSFS | __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_UNBOUND, + 2, ihost->workq_name); if (!ihost->workq) goto free_host; } diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index b2a803c51288..7bbeb563ab8c 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -4728,7 +4728,9 @@ static __init int iscsi_transport_init(void) goto unregister_flashnode_bus; } - iscsi_eh_timer_workq = create_singlethread_workqueue("iscsi_eh"); + iscsi_eh_timer_workq = alloc_workqueue("%s", + WQ_SYSFS | __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_UNBOUND, + 2, "iscsi_eh"); if (!iscsi_eh_timer_workq) { err = -ENOMEM; goto release_nls; From ce9a9321c11817e54ab36d75c2fab62537fcc993 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 19 May 2020 21:07:38 -0700 Subject: [PATCH 483/562] scsi: qla2xxx: Remove an unused function This was detected by building the qla2xxx driver with clang. See also commit a9083016a531 ("[SCSI] qla2xxx: Add ISP82XX support"). Link: https://lore.kernel.org/r/20200520040738.1017-1-bvanassche@acm.org Cc: Arun Easi Cc: Nilesh Javali Cc: Himanshu Madhani Cc: Hannes Reinecke Cc: Daniel Wagner Cc: Martin Wilck Cc: Roman Bolshakov Reviewed-by: Roman Bolshakov Reviewed-by: Himanshu Madhani Reviewed-by: Daniel Wagner Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_nx.c | 41 ----------------------------------- 1 file changed, 41 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index 21f968e4a584..0baf55b7e88f 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c @@ -380,47 +380,6 @@ qla82xx_pci_set_crbwindow_2M(struct qla_hw_data *ha, ulong off_in, *off_out = (off_in & MASK(16)) + CRB_INDIRECT_2M + ha->nx_pcibase; } -static inline unsigned long -qla82xx_pci_set_crbwindow(struct qla_hw_data *ha, u64 off) -{ - scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev); - /* See if we are currently pointing to the region we want to use next */ - if ((off >= QLA82XX_CRB_PCIX_HOST) && (off < QLA82XX_CRB_DDR_NET)) { - /* No need to change window. PCIX and PCIEregs are in both - * regs are in both windows. - */ - return off; - } - - if ((off >= QLA82XX_CRB_PCIX_HOST) && (off < QLA82XX_CRB_PCIX_HOST2)) { - /* We are in first CRB window */ - if (ha->curr_window != 0) - WARN_ON(1); - return off; - } - - if ((off > QLA82XX_CRB_PCIX_HOST2) && (off < QLA82XX_CRB_MAX)) { - /* We are in second CRB window */ - off = off - QLA82XX_CRB_PCIX_HOST2 + QLA82XX_CRB_PCIX_HOST; - - if (ha->curr_window != 1) - return off; - - /* We are in the QM or direct access - * register region - do nothing - */ - if ((off >= QLA82XX_PCI_DIRECT_CRB) && - (off < QLA82XX_PCI_CAMQM_MAX)) - return off; - } - /* strange address given */ - ql_dbg(ql_dbg_p3p, vha, 0xb001, - "%s: Warning: unm_nic_pci_set_crbwindow " - "called with an unknown address(%llx).\n", - QLA2XXX_DRIVER_NAME, off); - return off; -} - static int qla82xx_pci_get_crb_addr_2M(struct qla_hw_data *ha, ulong off_in, void __iomem **off_out) From ac988c49367a52b466285239361ede0f74c672da Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 20 May 2020 15:08:19 +0200 Subject: [PATCH 484/562] scsi: qla2xxx: Remove return value from qla_nvme_ls() The function always returns QLA_SUCCESS and the caller qla2x00_start_sp() doesn't even evalute the return value. So there is no point in returning a status. Link: https://lore.kernel.org/r/20200520130819.90625-1-dwagner@suse.de Reviewed-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Reviewed-by: Roman Bolshakov Reviewed-by: Himanshu Madhani Signed-off-by: Daniel Wagner Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_iocb.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index b039bd83f947..8865c35d3421 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -3607,11 +3607,10 @@ static void qla2x00_send_notify_ack_iocb(srb_t *sp, /* * Build NVME LS request */ -static int +static void qla_nvme_ls(srb_t *sp, struct pt_ls4_request *cmd_pkt) { struct srb_iocb *nvme; - int rval = QLA_SUCCESS; nvme = &sp->u.iocb_cmd; cmd_pkt->entry_type = PT_LS4_REQUEST; @@ -3631,8 +3630,6 @@ qla_nvme_ls(srb_t *sp, struct pt_ls4_request *cmd_pkt) cmd_pkt->rx_byte_count = cpu_to_le32(nvme->u.nvme.rsp_len); cmd_pkt->dsd[1].length = cpu_to_le32(nvme->u.nvme.rsp_len); put_unaligned_le64(nvme->u.nvme.rsp_dma, &cmd_pkt->dsd[1].address); - - return rval; } static void From 5ae6a6a915033bfee79e76e0c374d4f927909edc Mon Sep 17 00:00:00 2001 From: Sudhakar Panneerselvam Date: Fri, 22 May 2020 16:51:57 +0000 Subject: [PATCH 485/562] scsi: vhost: Notify TCM about the maximum sg entries supported per command vhost-scsi pre-allocates the maximum sg entries per command and if a command requires more than VHOST_SCSI_PREALLOC_SGLS entries, then that command is failed by it. This patch lets vhost communicate the max sg limit when it registers vhost_scsi_ops with TCM. With this change, TCM would report the max sg entries through "Block Limits" VPD page which will be typically queried by the SCSI initiator during device discovery. By knowing this limit, the initiator could ensure the maximum transfer length is less than or equal to what is reported by vhost-scsi. Link: https://lore.kernel.org/r/1590166317-953-1-git-send-email-sudhakar.panneerselvam@oracle.com Cc: Michael S. Tsirkin Cc: Jason Wang Cc: Paolo Bonzini Cc: Stefan Hajnoczi Reviewed-by: Mike Christie Signed-off-by: Sudhakar Panneerselvam Signed-off-by: Martin K. Petersen --- drivers/vhost/scsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 7653667a8cdc..3146ab880400 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -2280,6 +2280,7 @@ static struct configfs_attribute *vhost_scsi_wwn_attrs[] = { static const struct target_core_fabric_ops vhost_scsi_ops = { .module = THIS_MODULE, .fabric_name = "vhost", + .max_data_sg_nents = VHOST_SCSI_PREALLOC_SGLS, .tpg_get_wwn = vhost_scsi_get_fabric_wwn, .tpg_get_tag = vhost_scsi_get_tpgt, .tpg_check_demo_mode = vhost_scsi_check_true, From 9d7464b18892332e35ff37f0b024429a1a9835e6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 23 May 2020 13:11:29 +0300 Subject: [PATCH 486/562] scsi: target: tcmu: Fix a use after free in tcmu_check_expired_queue_cmd() The pr_debug() dereferences "cmd" after we already freed it by calling tcmu_free_cmd(cmd). The debug printk needs to be done earlier. Link: https://lore.kernel.org/r/20200523101129.GB98132@mwanda Fixes: 61fb24822166 ("scsi: target: tcmu: Userspace must not complete queued commands") Reviewed-by: Mike Christie Reviewed-by: David Disseldorp Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/target/target_core_user.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 1d030d57c28c..0e281d30d81e 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1292,13 +1292,13 @@ static void tcmu_check_expired_queue_cmd(struct tcmu_cmd *cmd) if (!time_after(jiffies, cmd->deadline)) return; + pr_debug("Timing out queued cmd %p on dev %s.\n", + cmd, cmd->tcmu_dev->name); + list_del_init(&cmd->queue_entry); se_cmd = cmd->se_cmd; tcmu_free_cmd(cmd); - pr_debug("Timing out queued cmd %p on dev %s.\n", - cmd, cmd->tcmu_dev->name); - target_complete_cmd(se_cmd, SAM_STAT_TASK_SET_FULL); } From 7217e6e694da3aae6d17db8a7f7460c8d4817ebf Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Mon, 25 May 2020 22:16:24 +0800 Subject: [PATCH 487/562] scsi: lpfc: Fix lpfc_nodelist leak when processing unsolicited event In order to create or activate a new node, lpfc_els_unsol_buffer() invokes lpfc_nlp_init() or lpfc_enable_node() or lpfc_nlp_get(), all of them will return a reference of the specified lpfc_nodelist object to "ndlp" with increased refcnt. When lpfc_els_unsol_buffer() returns, local variable "ndlp" becomes invalid, so the refcount should be decreased to keep refcount balanced. The reference counting issue happens in one exception handling path of lpfc_els_unsol_buffer(). When "ndlp" in DEV_LOSS, the function forgets to decrease the refcnt increased by lpfc_nlp_init() or lpfc_enable_node() or lpfc_nlp_get(), causing a refcnt leak. Fix this issue by calling lpfc_nlp_put() when "ndlp" in DEV_LOSS. Link: https://lore.kernel.org/r/1590416184-52592-1-git-send-email-xiyuyang19@fudan.edu.cn Reviewed-by: Daniel Wagner Reviewed-by: James Smart Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 565a21401660..3d670568a276 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -8508,6 +8508,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, spin_lock_irq(shost->host_lock); if (ndlp->nlp_flag & NLP_IN_DEV_LOSS) { spin_unlock_irq(shost->host_lock); + if (newnode) + lpfc_nlp_put(ndlp); goto dropit; } spin_unlock_irq(shost->host_lock); From f56577e8c7d0f3054f97d1f0d1cbe9a4d179cc47 Mon Sep 17 00:00:00 2001 From: Suganath Prabu S Date: Fri, 22 May 2020 06:35:58 -0400 Subject: [PATCH 488/562] scsi: mpt3sas: Fix reply queue count in non RDPQ mode For non RDPQ mode, the driver allocates a single contiguous block of memory pool for all reply descriptor post queues and passes down a single address in the ReplyDescriptorPostQueueAddress field of the IOC Init Request Message to the firmware. So reply_post queue will have only one entry which holds the address of this single contiguous block of memory pool. While allocating the reply descriptor post queue pool, driver should loop only once in non-RDPQ mode. But the driver is looping for ioc->reply_queue_count number of times even though reply_post queue's queue depth is only one in non-RDPQ mode. This leads to 'BUG: KASAN: use-after-free in base_alloc_rdpq_dma_pool'. The fix is to loop only once while allocating memory for the reply descriptor post queue in non-RDPQ mode Fixes: 8012209eb26b ("scsi: mpt3sas: Handle RDPQ DMA allocation in same 4G region") Link: https://lore.kernel.org/r/20200522103558.5710-1-suganath-prabu.subramani@broadcom.com Reported-by: Tomas Henzl Reviewed-by: Tomas Henzl Signed-off-by: Suganath Prabu S Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index dc260fef9897..beaea1933f5c 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -4809,6 +4809,7 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) int j = 0; int dma_alloc_count = 0; struct chain_tracker *ct; + int count = ioc->rdpq_array_enable ? ioc->reply_queue_count : 1; dexitprintk(ioc, ioc_info(ioc, "%s\n", __func__)); @@ -4850,9 +4851,9 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) } if (ioc->reply_post) { - dma_alloc_count = DIV_ROUND_UP(ioc->reply_queue_count, + dma_alloc_count = DIV_ROUND_UP(count, RDPQ_MAX_INDEX_IN_ONE_CHUNK); - for (i = 0; i < ioc->reply_queue_count; i++) { + for (i = 0; i < count; i++) { if (i % RDPQ_MAX_INDEX_IN_ONE_CHUNK == 0 && dma_alloc_count) { if (ioc->reply_post[i].reply_post_free) { @@ -4973,14 +4974,14 @@ base_alloc_rdpq_dma_pool(struct MPT3SAS_ADAPTER *ioc, int sz) * Driver uses limitation of * VENTURA_SERIES to manage INVADER_SERIES as well. */ - dma_alloc_count = DIV_ROUND_UP(ioc->reply_queue_count, + dma_alloc_count = DIV_ROUND_UP(count, RDPQ_MAX_INDEX_IN_ONE_CHUNK); ioc->reply_post_free_dma_pool = dma_pool_create("reply_post_free pool", &ioc->pdev->dev, sz, 16, 0); if (!ioc->reply_post_free_dma_pool) return -ENOMEM; - for (i = 0; i < ioc->reply_queue_count; i++) { + for (i = 0; i < count; i++) { if ((i % RDPQ_MAX_INDEX_IN_ONE_CHUNK == 0) && dma_alloc_count) { ioc->reply_post[i].reply_post_free = dma_pool_alloc(ioc->reply_post_free_dma_pool, From 3be60b564de49875e47974c37fabced893cd0931 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 25 May 2020 13:41:25 -0700 Subject: [PATCH 489/562] scsi: ufs-qcom: Fix scheduling while atomic issue ufs_qcom_dump_dbg_regs() uses usleep_range, a sleeping function, but can be called from atomic context in the following flow: ufshcd_intr -> ufshcd_sl_intr -> ufshcd_check_errors -> ufshcd_print_host_regs -> ufshcd_vops_dbg_register_dump -> ufs_qcom_dump_dbg_regs This causes a boot crash on the Lenovo Miix 630 when the interrupt is handled on the idle thread. Fix the issue by switching to udelay(). Link: https://lore.kernel.org/r/20200525204125.46171-1-jeffrey.l.hugo@gmail.com Fixes: 9c46b8676271 ("scsi: ufs-qcom: dump additional testbus registers") Reviewed-by: Bean Huo Reviewed-by: Avri Altman Signed-off-by: Jeffrey Hugo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-qcom.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index 00ce8d6cbb36..2e6ddb5cdfc2 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -1658,11 +1658,11 @@ static void ufs_qcom_dump_dbg_regs(struct ufs_hba *hba) /* sleep a bit intermittently as we are dumping too much data */ ufs_qcom_print_hw_debug_reg_all(hba, NULL, ufs_qcom_dump_regs_wrapper); - usleep_range(1000, 1100); + udelay(1000); ufs_qcom_testbus_read(hba); - usleep_range(1000, 1100); + udelay(1000); ufs_qcom_print_unipro_testbus(hba); - usleep_range(1000, 1100); + udelay(1000); } /** From 3a66ae512b09971410abf9f1d546944c07b852ec Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Fri, 22 May 2020 16:32:09 +0800 Subject: [PATCH 490/562] scsi: ufs: Remove unnecessary memset for dev_info The whole UFS host instance has been zero-initialized by scsi_host_alloc(), thus UFS driver does not need to clear "dev_info" member specifically in ufshcd_device_params_init(). Simply remove the unnecessary code. Link: https://lore.kernel.org/r/20200522083212.4008-2-stanley.chu@mediatek.com Reviewed-by: Avri Altman Reviewed-by: Asutosh Das Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index c3389c9a4f29..9e55c524f330 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -7267,9 +7267,6 @@ static int ufshcd_device_params_init(struct ufs_hba *hba) bool flag; int ret; - /* Clear any previous UFS device information */ - memset(&hba->dev_info, 0, sizeof(hba->dev_info)); - /* Init check for device descriptor sizes */ ufshcd_init_desc_sizes(hba); From c7cee3e746a5b5a79d415112f7b7af90e8259fa1 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Fri, 22 May 2020 16:32:10 +0800 Subject: [PATCH 491/562] scsi: ufs: Allow WriteBooster on UFS 2.2 devices According to the UFS specification, WriteBooster is officially supported by UFS 2.2. Since UFS 2.2 specification has been finalized in JEDEC and such devices have also showed up in the market, modify the checking rule for ufshcd_wb_probe() to allow these devices to enable WriteBooster. Link: https://lore.kernel.org/r/20200522083212.4008-3-stanley.chu@mediatek.com Reviewed-by: Avri Altman Reviewed-by: Asutosh Das Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 9e55c524f330..0dbd8a7a6642 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -6930,6 +6930,7 @@ static int ufs_get_device_desc(struct ufs_hba *hba) * UFS_DEVICE_QUIRK_SUPPORT_EXTENDED_FEATURES enabled */ if (dev_info->wspecversion >= 0x310 || + dev_info->wspecversion == 0x220 || (hba->dev_quirks & UFS_DEVICE_QUIRK_SUPPORT_EXTENDED_FEATURES)) ufshcd_wb_probe(hba, desc_buf); From e31011ab370989022abf216c606f947a3b02a53a Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Fri, 22 May 2020 16:32:11 +0800 Subject: [PATCH 492/562] scsi: ufs: Fix index of attributes query for WriteBooster feature For WriteBooster feature related attributes, the index used by query shall be LUN ID if LU Dedicated buffer mode is enabled. Link: https://lore.kernel.org/r/20200522083212.4008-4-stanley.chu@mediatek.com Reviewed-by: Avri Altman Reviewed-by: Asutosh Das Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-sysfs.c | 13 +++++++++++-- drivers/scsi/ufs/ufshcd.c | 16 ++++++++++------ drivers/scsi/ufs/ufshcd.h | 2 +- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/ufs/ufs-sysfs.c b/drivers/scsi/ufs/ufs-sysfs.c index a0b3763e1dc2..2d71d232a69d 100644 --- a/drivers/scsi/ufs/ufs-sysfs.c +++ b/drivers/scsi/ufs/ufs-sysfs.c @@ -637,7 +637,7 @@ static ssize_t _name##_show(struct device *dev, \ int ret; \ struct ufs_hba *hba = dev_get_drvdata(dev); \ if (ufshcd_is_wb_flags(QUERY_FLAG_IDN##_uname)) \ - index = ufshcd_wb_get_flag_index(hba); \ + index = ufshcd_wb_get_query_index(hba); \ pm_runtime_get_sync(hba->dev); \ ret = ufshcd_query_flag(hba, UPIU_QUERY_OPCODE_READ_FLAG, \ QUERY_FLAG_IDN##_uname, index, &flag); \ @@ -680,6 +680,12 @@ static const struct attribute_group ufs_sysfs_flags_group = { .attrs = ufs_sysfs_device_flags, }; +static inline bool ufshcd_is_wb_attrs(enum attr_idn idn) +{ + return ((idn >= QUERY_ATTR_IDN_WB_FLUSH_STATUS) && + (idn <= QUERY_ATTR_IDN_CURR_WB_BUFF_SIZE)); +} + #define UFS_ATTRIBUTE(_name, _uname) \ static ssize_t _name##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -687,9 +693,12 @@ static ssize_t _name##_show(struct device *dev, \ struct ufs_hba *hba = dev_get_drvdata(dev); \ u32 value; \ int ret; \ + u8 index = 0; \ + if (ufshcd_is_wb_attrs(QUERY_ATTR_IDN##_uname)) \ + index = ufshcd_wb_get_query_index(hba); \ pm_runtime_get_sync(hba->dev); \ ret = ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_READ_ATTR, \ - QUERY_ATTR_IDN##_uname, 0, 0, &value); \ + QUERY_ATTR_IDN##_uname, index, 0, &value); \ pm_runtime_put_sync(hba->dev); \ if (ret) \ return -EINVAL; \ diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 0dbd8a7a6642..3262ac0e39d4 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -5199,7 +5199,7 @@ static int ufshcd_wb_ctrl(struct ufs_hba *hba, bool enable) else opcode = UPIU_QUERY_OPCODE_CLEAR_FLAG; - index = ufshcd_wb_get_flag_index(hba); + index = ufshcd_wb_get_query_index(hba); ret = ufshcd_query_flag_retry(hba, opcode, QUERY_FLAG_IDN_WB_EN, index, NULL); if (ret) { @@ -5225,7 +5225,7 @@ static int ufshcd_wb_toggle_flush_during_h8(struct ufs_hba *hba, bool set) else val = UPIU_QUERY_OPCODE_CLEAR_FLAG; - index = ufshcd_wb_get_flag_index(hba); + index = ufshcd_wb_get_query_index(hba); return ufshcd_query_flag_retry(hba, val, QUERY_FLAG_IDN_WB_BUFF_FLUSH_DURING_HIBERN8, index, NULL); @@ -5248,7 +5248,7 @@ static int ufshcd_wb_buf_flush_enable(struct ufs_hba *hba) if (!ufshcd_is_wb_allowed(hba) || hba->wb_buf_flush_enabled) return 0; - index = ufshcd_wb_get_flag_index(hba); + index = ufshcd_wb_get_query_index(hba); ret = ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_SET_FLAG, QUERY_FLAG_IDN_WB_BUFF_FLUSH_EN, index, NULL); @@ -5270,7 +5270,7 @@ static int ufshcd_wb_buf_flush_disable(struct ufs_hba *hba) if (!ufshcd_is_wb_allowed(hba) || !hba->wb_buf_flush_enabled) return 0; - index = ufshcd_wb_get_flag_index(hba); + index = ufshcd_wb_get_query_index(hba); ret = ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_CLEAR_FLAG, QUERY_FLAG_IDN_WB_BUFF_FLUSH_EN, index, NULL); @@ -5290,10 +5290,12 @@ static bool ufshcd_wb_presrv_usrspc_keep_vcc_on(struct ufs_hba *hba, { u32 cur_buf; int ret; + u8 index; + index = ufshcd_wb_get_query_index(hba); ret = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR, QUERY_ATTR_IDN_CURR_WB_BUFF_SIZE, - 0, 0, &cur_buf); + index, 0, &cur_buf); if (ret) { dev_err(hba->dev, "%s dCurWriteBoosterBufferSize read failed %d\n", __func__, ret); @@ -5316,6 +5318,7 @@ static bool ufshcd_wb_keep_vcc_on(struct ufs_hba *hba) { int ret; u32 avail_buf; + u8 index; if (!ufshcd_is_wb_allowed(hba)) return false; @@ -5330,9 +5333,10 @@ static bool ufshcd_wb_keep_vcc_on(struct ufs_hba *hba) * buffer (dCurrentWriteBoosterBufferSize). There's no point in * keeping vcc on when current buffer is empty. */ + index = ufshcd_wb_get_query_index(hba); ret = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR, QUERY_ATTR_IDN_AVAIL_WB_BUFF_SIZE, - 0, 0, &avail_buf); + index, 0, &avail_buf); if (ret) { dev_warn(hba->dev, "%s dAvailableWriteBoosterBufferSize read failed %d\n", __func__, ret); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 88d420242782..9acaeaf94d29 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -868,7 +868,7 @@ static inline bool ufshcd_keep_autobkops_enabled_except_suspend( return hba->caps & UFSHCD_CAP_KEEP_AUTO_BKOPS_ENABLED_EXCEPT_SUSPEND; } -static inline u8 ufshcd_wb_get_flag_index(struct ufs_hba *hba) +static inline u8 ufshcd_wb_get_query_index(struct ufs_hba *hba) { if (hba->dev_info.b_wb_buffer_type == WB_BUF_MODE_LU_DEDICATED) return hba->dev_info.wb_dedicated_lu; From 51dd905bd2f617f5d6e60273bb5f0940bbd86611 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Fri, 22 May 2020 16:32:12 +0800 Subject: [PATCH 493/562] scsi: ufs: Fix WriteBooster flush during runtime suspend Currently UFS host driver promises VCC supply if UFS device needs to do WriteBooster flush during runtime suspend. However the UFS specification mentions: "While the flushing operation is in progress, the device is in Active power mode." Therefore UFS host driver needs to promise more: Keep UFS device as "Active power mode", otherwise UFS device shall not do any flush if device enters Sleep or PowerDown power mode. Similarly, the same promises shall be applied if device needs urgent BKOP during runtime suspend. Fix this by not changing device power mode if WriteBooster flush or urgent BKOP is required in ufshcd_suspend(). Now, if device finishes its job but is not resumed for a very long time, system will have unnecessary power drain because VCC is still supplied. A method to re-check the threshold of keeping VCC supply is required to fix the power drain. However, the threshold re-check needs to re-activate the link first because the decision depends on the latest device status. Also introduce a delayed work to force runtime resume after a certain delay during runtime suspend. This makes threshold re-check happen natually in the entry of the next runtime-suspend. The device can continue its WriteBooster flush or urgent BKOP jobs soon after resumed if device has no upcoming requests and link enters hibern8 state either by Auto-Hibern8 or hibern8 during clk-gating scheme. This solution not only prevents power drain but also makes as much use of time as possible for device's background jobs. Link: https://lore.kernel.org/r/20200522083212.4008-5-stanley.chu@mediatek.com Reviewed-by: Asutosh Das Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs.h | 2 +- drivers/scsi/ufs/ufshcd.c | 77 ++++++++++++++++++++++++++++----------- drivers/scsi/ufs/ufshcd.h | 1 + 3 files changed, 58 insertions(+), 22 deletions(-) diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h index fadba3a3bbcd..c70845d41449 100644 --- a/drivers/scsi/ufs/ufs.h +++ b/drivers/scsi/ufs/ufs.h @@ -574,7 +574,7 @@ struct ufs_dev_info { u32 d_ext_ufs_feature_sup; u8 b_wb_buffer_type; u32 d_wb_alloc_units; - bool keep_vcc_on; + bool b_rpm_dev_flush_capable; u8 b_presrv_uspc_en; }; diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 3262ac0e39d4..5db18f444ea9 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -94,6 +94,9 @@ /* default delay of autosuspend: 2000 ms */ #define RPM_AUTOSUSPEND_DELAY_MS 2000 +/* Default delay of RPM device flush delayed work */ +#define RPM_DEV_FLUSH_RECHECK_WORK_DELAY_MS 5000 + /* Default value of wait time before gating device ref clock */ #define UFSHCD_REF_CLK_GATING_WAIT_US 0xFF /* microsecs */ @@ -5314,7 +5317,7 @@ static bool ufshcd_wb_presrv_usrspc_keep_vcc_on(struct ufs_hba *hba, return false; } -static bool ufshcd_wb_keep_vcc_on(struct ufs_hba *hba) +static bool ufshcd_wb_need_flush(struct ufs_hba *hba) { int ret; u32 avail_buf; @@ -5352,6 +5355,21 @@ static bool ufshcd_wb_keep_vcc_on(struct ufs_hba *hba) return ufshcd_wb_presrv_usrspc_keep_vcc_on(hba, avail_buf); } +static void ufshcd_rpm_dev_flush_recheck_work(struct work_struct *work) +{ + struct ufs_hba *hba = container_of(to_delayed_work(work), + struct ufs_hba, + rpm_dev_flush_recheck_work); + /* + * To prevent unnecessary VCC power drain after device finishes + * WriteBooster buffer flush or Auto BKOPs, force runtime resume + * after a certain delay to recheck the threshold by next runtime + * suspend. + */ + pm_runtime_get_sync(hba->dev); + pm_runtime_put_sync(hba->dev); +} + /** * ufshcd_exception_event_handler - handle exceptions raised by device * @work: pointer to work data @@ -8099,8 +8117,7 @@ static void ufshcd_vreg_set_lpm(struct ufs_hba *hba) !hba->dev_info.is_lu_power_on_wp) { ufshcd_setup_vreg(hba, false); } else if (!ufshcd_is_ufs_dev_active(hba)) { - if (!hba->dev_info.keep_vcc_on) - ufshcd_toggle_vreg(hba->dev, hba->vreg_info.vcc, false); + ufshcd_toggle_vreg(hba->dev, hba->vreg_info.vcc, false); if (!ufshcd_is_link_active(hba)) { ufshcd_config_vreg_lpm(hba, hba->vreg_info.vccq); ufshcd_config_vreg_lpm(hba, hba->vreg_info.vccq2); @@ -8225,27 +8242,30 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) ufshcd_disable_auto_bkops(hba); } /* - * With wb enabled, if the bkops is enabled or if the - * configured WB type is 70% full, keep vcc ON - * for the device to flush the wb buffer + * If device needs to do BKOP or WB buffer flush during + * Hibern8, keep device power mode as "active power mode" + * and VCC supply. */ - if ((hba->auto_bkops_enabled && ufshcd_is_wb_allowed(hba)) || - ufshcd_wb_keep_vcc_on(hba)) - hba->dev_info.keep_vcc_on = true; - else - hba->dev_info.keep_vcc_on = false; - } else { - hba->dev_info.keep_vcc_on = false; + hba->dev_info.b_rpm_dev_flush_capable = + hba->auto_bkops_enabled || + (((req_link_state == UIC_LINK_HIBERN8_STATE) || + ((req_link_state == UIC_LINK_ACTIVE_STATE) && + ufshcd_is_auto_hibern8_enabled(hba))) && + ufshcd_wb_need_flush(hba)); } - if ((req_dev_pwr_mode != hba->curr_dev_pwr_mode) && - ((ufshcd_is_runtime_pm(pm_op) && !hba->auto_bkops_enabled) || - !ufshcd_is_runtime_pm(pm_op))) { - /* ensure that bkops is disabled */ - ufshcd_disable_auto_bkops(hba); - ret = ufshcd_set_dev_pwr_mode(hba, req_dev_pwr_mode); - if (ret) - goto enable_gating; + if (req_dev_pwr_mode != hba->curr_dev_pwr_mode) { + if ((ufshcd_is_runtime_pm(pm_op) && !hba->auto_bkops_enabled) || + !ufshcd_is_runtime_pm(pm_op)) { + /* ensure that bkops is disabled */ + ufshcd_disable_auto_bkops(hba); + } + + if (!hba->dev_info.b_rpm_dev_flush_capable) { + ret = ufshcd_set_dev_pwr_mode(hba, req_dev_pwr_mode); + if (ret) + goto enable_gating; + } } flush_work(&hba->eeh_work); @@ -8298,9 +8318,16 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) if (hba->clk_scaling.is_allowed) ufshcd_resume_clkscaling(hba); hba->clk_gating.is_suspended = false; + hba->dev_info.b_rpm_dev_flush_capable = false; ufshcd_release(hba); out: + if (hba->dev_info.b_rpm_dev_flush_capable) { + schedule_delayed_work(&hba->rpm_dev_flush_recheck_work, + msecs_to_jiffies(RPM_DEV_FLUSH_RECHECK_WORK_DELAY_MS)); + } + hba->pm_op_in_progress = 0; + if (ret) ufshcd_update_reg_hist(&hba->ufs_stats.suspend_err, (u32)ret); return ret; @@ -8389,6 +8416,11 @@ static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) /* Enable Auto-Hibernate if configured */ ufshcd_auto_hibern8_enable(hba); + if (hba->dev_info.b_rpm_dev_flush_capable) { + hba->dev_info.b_rpm_dev_flush_capable = false; + cancel_delayed_work(&hba->rpm_dev_flush_recheck_work); + } + /* Schedule clock gating in case of no access to UFS device yet */ ufshcd_release(hba); @@ -8862,6 +8894,9 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) UFS_SLEEP_PWR_MODE, UIC_LINK_HIBERN8_STATE); + INIT_DELAYED_WORK(&hba->rpm_dev_flush_recheck_work, + ufshcd_rpm_dev_flush_recheck_work); + /* Set the default auto-hiberate idle timer value to 150 ms */ if (ufshcd_is_auto_hibern8_supported(hba) && !hba->ahit) { hba->ahit = FIELD_PREP(UFSHCI_AHIBERN8_TIMER_MASK, 150) | diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 9acaeaf94d29..bf97d616e597 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -745,6 +745,7 @@ struct ufs_hba { struct request_queue *bsg_queue; bool wb_buf_flush_enabled; bool wb_enabled; + struct delayed_work rpm_dev_flush_recheck_work; }; /* Returns true if clocks can be gated. Otherwise false */ From 7e7cd796f2776d055351d80328f45633bbb0aae5 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Tue, 19 May 2020 22:29:59 -0400 Subject: [PATCH 494/562] scsi: iscsi: Fix deadlock on recovery path during GFP_IO reclaim iSCSI suffers from a deadlock in case a management command submitted via the netlink socket sleeps on an allocation while holding the rx_queue_mutex if that allocation causes a memory reclaim that writebacks to a failed iSCSI device. The recovery procedure can never make progress to recover the failed disk or abort outstanding IO operations to complete the reclaim (since rx_queue_mutex is locked), thus locking the system. Nevertheless, just marking all allocations under rx_queue_mutex as GFP_NOIO (or locking the userspace process with something like PF_MEMALLOC_NOIO) is not enough, since the iSCSI command code relies on other subsystems that try to grab locked mutexes, whose threads are GFP_IO, leading to the same deadlock. One instance where this situation can be observed is in the backtraces below, stitched from multiple bugs reports, involving the kobj uevent sent when a session is created. The root of the problem is not the fact that iSCSI does GFP_IO allocations, that is acceptable. The actual problem is that rx_queue_mutex has a very large granularity, covering every unrelated netlink command execution at the same time as the error recovery path. The proposed fix leverages the recently added mechanism to stop failed connections from the kernel, by enabling it to execute even though a management command from the netlink socket is being run (rx_queue_mutex is held), provided that the command is known to be safe. It splits the rx_queue_mutex in two mutexes, one protecting from concurrent command execution from the netlink socket, and one protecting stop_conn from racing with other connection management operations that might conflict with it. It is not very pretty, but it is the simplest way to resolve the deadlock. I considered making it a lock per connection, but some external mutex would still be needed to deal with iscsi_if_destroy_conn. The patch was tested by forcing a memory shrinker (unrelated, but used bufio/dm-verity) to reclaim iSCSI pages every time ISCSI_UEVENT_CREATE_SESSION happens, which is reasonable to simulate reclaims that might happen with GFP_KERNEL on that path. Then, a faulty hung target causes a connection to fail during intensive IO, at the same time a new session is added by iscsid. The following stacktraces are stiches from several bug reports, showing a case where the deadlock can happen. iSCSI-write holding: rx_queue_mutex waiting: uevent_sock_mutex kobject_uevent_env+0x1bd/0x419 kobject_uevent+0xb/0xd device_add+0x48a/0x678 scsi_add_host_with_dma+0xc5/0x22d iscsi_host_add+0x53/0x55 iscsi_sw_tcp_session_create+0xa6/0x129 iscsi_if_rx+0x100/0x1247 netlink_unicast+0x213/0x4f0 netlink_sendmsg+0x230/0x3c0 iscsi_fail iscsi_conn_failure waiting: rx_queue_mutex schedule_preempt_disabled+0x325/0x734 __mutex_lock_slowpath+0x18b/0x230 mutex_lock+0x22/0x40 iscsi_conn_failure+0x42/0x149 worker_thread+0x24a/0xbc0 EventManager_ holding: uevent_sock_mutex waiting: dm_bufio_client->lock dm_bufio_lock+0xe/0x10 shrink+0x34/0xf7 shrink_slab+0x177/0x5d0 do_try_to_free_pages+0x129/0x470 try_to_free_mem_cgroup_pages+0x14f/0x210 memcg_kmem_newpage_charge+0xa6d/0x13b0 __alloc_pages_nodemask+0x4a3/0x1a70 fallback_alloc+0x1b2/0x36c __kmalloc_node_track_caller+0xb9/0x10d0 __alloc_skb+0x83/0x2f0 kobject_uevent_env+0x26b/0x419 dm_kobject_uevent+0x70/0x79 dev_suspend+0x1a9/0x1e7 ctl_ioctl+0x3e9/0x411 dm_ctl_ioctl+0x13/0x17 do_vfs_ioctl+0xb3/0x460 SyS_ioctl+0x5e/0x90 MemcgReclaimerD" holding: dm_bufio_client->lock waiting: stuck io to finish (needs iscsi_fail thread to progress) schedule at ffffffffbd603618 io_schedule at ffffffffbd603ba4 do_io_schedule at ffffffffbdaf0d94 __wait_on_bit at ffffffffbd6008a6 out_of_line_wait_on_bit at ffffffffbd600960 wait_on_bit.constprop.10 at ffffffffbdaf0f17 __make_buffer_clean at ffffffffbdaf18ba __cleanup_old_buffer at ffffffffbdaf192f shrink at ffffffffbdaf19fd do_shrink_slab at ffffffffbd6ec000 shrink_slab at ffffffffbd6ec24a do_try_to_free_pages at ffffffffbd6eda09 try_to_free_mem_cgroup_pages at ffffffffbd6ede7e mem_cgroup_resize_limit at ffffffffbd7024c0 mem_cgroup_write at ffffffffbd703149 cgroup_file_write at ffffffffbd6d9c6e sys_write at ffffffffbd6662ea system_call_fastpath at ffffffffbdbc34a2 Link: https://lore.kernel.org/r/20200520022959.1912856-1-krisman@collabora.com Reported-by: Khazhismel Kumykov Reviewed-by: Lee Duncan Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 64 +++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 7bbeb563ab8c..f4cc08eb47ba 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1616,6 +1616,12 @@ static DECLARE_TRANSPORT_CLASS(iscsi_connection_class, static struct sock *nls; static DEFINE_MUTEX(rx_queue_mutex); +/* + * conn_mutex protects the {start,bind,stop,destroy}_conn from racing + * against the kernel stop_connection recovery mechanism + */ +static DEFINE_MUTEX(conn_mutex); + static LIST_HEAD(sesslist); static LIST_HEAD(sessdestroylist); static DEFINE_SPINLOCK(sesslock); @@ -2445,6 +2451,32 @@ int iscsi_offload_mesg(struct Scsi_Host *shost, } EXPORT_SYMBOL_GPL(iscsi_offload_mesg); +/* + * This can be called without the rx_queue_mutex, if invoked by the kernel + * stop work. But, in that case, it is guaranteed not to race with + * iscsi_destroy by conn_mutex. + */ +static void iscsi_if_stop_conn(struct iscsi_cls_conn *conn, int flag) +{ + /* + * It is important that this path doesn't rely on + * rx_queue_mutex, otherwise, a thread doing allocation on a + * start_session/start_connection could sleep waiting on a + * writeback to a failed iscsi device, that cannot be recovered + * because the lock is held. If we don't hold it here, the + * kernel stop_conn_work_fn has a chance to stop the broken + * session and resolve the allocation. + * + * Still, the user invoked .stop_conn() needs to be serialized + * with stop_conn_work_fn by a private mutex. Not pretty, but + * it works. + */ + mutex_lock(&conn_mutex); + conn->transport->stop_conn(conn, flag); + mutex_unlock(&conn_mutex); + +} + static void stop_conn_work_fn(struct work_struct *work) { struct iscsi_cls_conn *conn, *tmp; @@ -2463,30 +2495,17 @@ static void stop_conn_work_fn(struct work_struct *work) uint32_t sid = iscsi_conn_get_sid(conn); struct iscsi_cls_session *session; - mutex_lock(&rx_queue_mutex); - session = iscsi_session_lookup(sid); if (session) { if (system_state != SYSTEM_RUNNING) { session->recovery_tmo = 0; - conn->transport->stop_conn(conn, - STOP_CONN_TERM); + iscsi_if_stop_conn(conn, STOP_CONN_TERM); } else { - conn->transport->stop_conn(conn, - STOP_CONN_RECOVER); + iscsi_if_stop_conn(conn, STOP_CONN_RECOVER); } } list_del_init(&conn->conn_list_err); - - mutex_unlock(&rx_queue_mutex); - - /* we don't want to hold rx_queue_mutex for too long, - * for instance if many conns failed at the same time, - * since this stall other iscsi maintenance operations. - * Give other users a chance to proceed. - */ - cond_resched(); } } @@ -2846,8 +2865,11 @@ iscsi_if_destroy_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev spin_unlock_irqrestore(&connlock, flags); ISCSI_DBG_TRANS_CONN(conn, "Destroying transport conn\n"); + + mutex_lock(&conn_mutex); if (transport->destroy_conn) transport->destroy_conn(conn); + mutex_unlock(&conn_mutex); return 0; } @@ -3689,9 +3711,12 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) break; } + mutex_lock(&conn_mutex); ev->r.retcode = transport->bind_conn(session, conn, ev->u.b_conn.transport_eph, ev->u.b_conn.is_leading); + mutex_unlock(&conn_mutex); + if (ev->r.retcode || !transport->ep_connect) break; @@ -3713,9 +3738,11 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) case ISCSI_UEVENT_START_CONN: conn = iscsi_conn_lookup(ev->u.start_conn.sid, ev->u.start_conn.cid); if (conn) { + mutex_lock(&conn_mutex); ev->r.retcode = transport->start_conn(conn); if (!ev->r.retcode) conn->state = ISCSI_CONN_UP; + mutex_unlock(&conn_mutex); } else err = -EINVAL; @@ -3723,17 +3750,20 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) case ISCSI_UEVENT_STOP_CONN: conn = iscsi_conn_lookup(ev->u.stop_conn.sid, ev->u.stop_conn.cid); if (conn) - transport->stop_conn(conn, ev->u.stop_conn.flag); + iscsi_if_stop_conn(conn, ev->u.stop_conn.flag); else err = -EINVAL; break; case ISCSI_UEVENT_SEND_PDU: conn = iscsi_conn_lookup(ev->u.send_pdu.sid, ev->u.send_pdu.cid); - if (conn) + if (conn) { + mutex_lock(&conn_mutex); ev->r.retcode = transport->send_pdu(conn, (struct iscsi_hdr*)((char*)ev + sizeof(*ev)), (char*)ev + sizeof(*ev) + ev->u.send_pdu.hdr_size, ev->u.send_pdu.data_size); + mutex_unlock(&conn_mutex); + } else err = -EINVAL; break; From 1f93ad177d24e23480194a1ea34e3c5d8a47ef92 Mon Sep 17 00:00:00 2001 From: Chen Tao Date: Wed, 20 May 2020 17:10:36 +0800 Subject: [PATCH 495/562] scsi: ibmvscsi: Make some functions static Fix the following warning: drivers/scsi/ibmvscsi/ibmvscsi.c:2387:12: warning: symbol 'ibmvscsi_module_init' was not declared. Should it be static? drivers/scsi/ibmvscsi/ibmvscsi.c:2409:13: warning: symbol 'ibmvscsi_module_exit' was not declared. Should it be static? Link: https://lore.kernel.org/r/20200520091036.247286-1-chentao107@huawei.com Signed-off-by: Chen Tao Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvscsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 7f66a7783209..be173135e05e 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -2388,7 +2388,7 @@ static struct vio_driver ibmvscsi_driver = { static struct srp_function_template ibmvscsi_transport_functions = { }; -int __init ibmvscsi_module_init(void) +static int __init ibmvscsi_module_init(void) { int ret; @@ -2410,7 +2410,7 @@ int __init ibmvscsi_module_init(void) return ret; } -void __exit ibmvscsi_module_exit(void) +static void __exit ibmvscsi_module_exit(void) { vio_unregister_driver(&ibmvscsi_driver); srp_release_transport(ibmvscsi_transport_template); From b6170a49c59c27a10efed26c5a2969403e69aaba Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 21 May 2020 15:12:21 +0300 Subject: [PATCH 496/562] scsi: cxgb3i: Fix some leaks in init_act_open() There wasn't any clean up done if cxgb3_alloc_atid() failed and also the original code didn't release "csk->l2t". Link: https://lore.kernel.org/r/20200521121221.GA247492@mwanda Fixes: 6f7efaabefeb ("[SCSI] cxgb3i: change cxgb3i to use libcxgbi") Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/scsi/cxgbi/cxgb3i/cxgb3i.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c index 524cdbcd29aa..ec7d01f6e2d5 100644 --- a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c +++ b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c @@ -959,6 +959,7 @@ static int init_act_open(struct cxgbi_sock *csk) struct net_device *ndev = cdev->ports[csk->port_id]; struct cxgbi_hba *chba = cdev->hbas[csk->port_id]; struct sk_buff *skb = NULL; + int ret; log_debug(1 << CXGBI_DBG_TOE | 1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx.\n", csk, csk->state, csk->flags); @@ -979,16 +980,16 @@ static int init_act_open(struct cxgbi_sock *csk) csk->atid = cxgb3_alloc_atid(t3dev, &t3_client, csk); if (csk->atid < 0) { pr_err("NO atid available.\n"); - return -EINVAL; + ret = -EINVAL; + goto put_sock; } cxgbi_sock_set_flag(csk, CTPF_HAS_ATID); cxgbi_sock_get(csk); skb = alloc_wr(sizeof(struct cpl_act_open_req), 0, GFP_KERNEL); if (!skb) { - cxgb3_free_atid(t3dev, csk->atid); - cxgbi_sock_put(csk); - return -ENOMEM; + ret = -ENOMEM; + goto free_atid; } skb->sk = (struct sock *)csk; set_arp_failure_handler(skb, act_open_arp_failure); @@ -1010,6 +1011,15 @@ static int init_act_open(struct cxgbi_sock *csk) cxgbi_sock_set_state(csk, CTP_ACTIVE_OPEN); send_act_open_req(csk, skb, csk->l2t); return 0; + +free_atid: + cxgb3_free_atid(t3dev, csk->atid); +put_sock: + cxgbi_sock_put(csk); + l2t_release(t3dev, csk->l2t); + csk->l2t = NULL; + + return ret; } cxgb3_cpl_handler_func cxgb3i_cpl_handlers[NUM_CPL_CMDS] = { From 22617e21633142dd2b81541cb3b95d6fb59aa85f Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Tue, 26 May 2020 15:33:40 +0530 Subject: [PATCH 497/562] scsi: ufs: ti-j721e-ufs: Fix unwinding of pm_runtime changes Fix unwinding of pm_runtime changes when bailing out of driver probe due to a failure and also on removal of driver. Link: https://lore.kernel.org/r/20200526100340.15032-1-vigneshr@ti.com Fixes: 6979e56cec97 ("scsi: ufs: Add driver for TI wrapper for Cadence UFS IP") Reported-by: Dinghao Liu Signed-off-by: Vignesh Raghavendra Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ti-j721e-ufs.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ufs/ti-j721e-ufs.c b/drivers/scsi/ufs/ti-j721e-ufs.c index 5216d228cdd9..46bb905b4d6a 100644 --- a/drivers/scsi/ufs/ti-j721e-ufs.c +++ b/drivers/scsi/ufs/ti-j721e-ufs.c @@ -32,14 +32,14 @@ static int ti_j721e_ufs_probe(struct platform_device *pdev) ret = pm_runtime_get_sync(dev); if (ret < 0) { pm_runtime_put_noidle(dev); - return ret; + goto disable_pm; } /* Select MPHY refclk frequency */ clk = devm_clk_get(dev, NULL); if (IS_ERR(clk)) { dev_err(dev, "Cannot claim MPHY clock.\n"); - return PTR_ERR(clk); + goto clk_err; } clk_rate = clk_get_rate(clk); if (clk_rate == 26000000) @@ -54,16 +54,23 @@ static int ti_j721e_ufs_probe(struct platform_device *pdev) dev); if (ret) { dev_err(dev, "failed to populate child nodes %d\n", ret); - pm_runtime_put_sync(dev); + goto clk_err; } return ret; + +clk_err: + pm_runtime_put_sync(dev); +disable_pm: + pm_runtime_disable(dev); + return ret; } static int ti_j721e_ufs_remove(struct platform_device *pdev) { of_platform_depopulate(&pdev->dev); pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); return 0; } From a34d5e56315007519f6c748e3ca0cb367264bbda Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 25 May 2020 22:40:28 +0300 Subject: [PATCH 498/562] gpiolib: Separate GPIO_GET_LINEINFO_WATCH_IOCTL conditional We already have two conditionals inside the outer one to check if the command is GPIO_GET_LINEINFO_WATCH_IOCTL. I think it's time to actually do what I have proposed in the first place, i.e. to separate GPIO_GET_LINEINFO_WATCH_IOCTL from GPIO_GET_LINEINFO_IOCTL. It's +13 LOCs, and surprisingly only +13 bytes of binary on x86_32, but for the price of much better readability. Signed-off-by: Andy Shevchenko Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20200525194028.74236-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 2e4aacbad8fe..34259c1e78ca 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1265,8 +1265,7 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (copy_to_user(ip, &chipinfo, sizeof(chipinfo))) return -EFAULT; return 0; - } else if (cmd == GPIO_GET_LINEINFO_IOCTL || - cmd == GPIO_GET_LINEINFO_WATCH_IOCTL) { + } else if (cmd == GPIO_GET_LINEINFO_IOCTL) { struct gpioline_info lineinfo; if (copy_from_user(&lineinfo, ip, sizeof(lineinfo))) @@ -1278,8 +1277,28 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) hwgpio = gpio_chip_hwgpio(desc); - if (cmd == GPIO_GET_LINEINFO_WATCH_IOCTL && - test_bit(hwgpio, priv->watched_lines)) + gpio_desc_to_lineinfo(desc, &lineinfo); + + if (copy_to_user(ip, &lineinfo, sizeof(lineinfo))) + return -EFAULT; + return 0; + } else if (cmd == GPIO_GET_LINEHANDLE_IOCTL) { + return linehandle_create(gdev, ip); + } else if (cmd == GPIO_GET_LINEEVENT_IOCTL) { + return lineevent_create(gdev, ip); + } else if (cmd == GPIO_GET_LINEINFO_WATCH_IOCTL) { + struct gpioline_info lineinfo; + + if (copy_from_user(&lineinfo, ip, sizeof(lineinfo))) + return -EFAULT; + + desc = gpiochip_get_desc(gc, lineinfo.line_offset); + if (IS_ERR(desc)) + return PTR_ERR(desc); + + hwgpio = gpio_chip_hwgpio(desc); + + if (test_bit(hwgpio, priv->watched_lines)) return -EBUSY; gpio_desc_to_lineinfo(desc, &lineinfo); @@ -1287,14 +1306,8 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (copy_to_user(ip, &lineinfo, sizeof(lineinfo))) return -EFAULT; - if (cmd == GPIO_GET_LINEINFO_WATCH_IOCTL) - set_bit(hwgpio, priv->watched_lines); - + set_bit(hwgpio, priv->watched_lines); return 0; - } else if (cmd == GPIO_GET_LINEHANDLE_IOCTL) { - return linehandle_create(gdev, ip); - } else if (cmd == GPIO_GET_LINEEVENT_IOCTL) { - return lineevent_create(gdev, ip); } else if (cmd == GPIO_GET_LINEINFO_UNWATCH_IOCTL) { if (copy_from_user(&offset, ip, sizeof(offset))) return -EFAULT; From bebcfe85f4338ba1434561a460169a5e0af78f98 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 27 May 2020 09:41:52 -0500 Subject: [PATCH 499/562] RDMA/core: Use sizeof_field() helper Make use of the sizeof_field() helper instead of an open-coded version. Link: https://lore.kernel.org/r/20200527144152.GA22605@embeddedor Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sa_query.c | 14 +++++++------- drivers/infiniband/core/ud_header.c | 2 +- drivers/infiniband/core/uverbs_cmd.c | 2 +- drivers/infiniband/core/uverbs_ioctl.c | 2 +- include/rdma/uverbs_ioctl.h | 12 ++++++------ 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 5c878646ff62..a2ed09a3c714 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -190,7 +190,7 @@ static u32 tid; #define PATH_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct sa_path_rec, field), \ - .struct_size_bytes = sizeof((struct sa_path_rec *)0)->field, \ + .struct_size_bytes = sizeof_field(struct sa_path_rec, field), \ .field_name = "sa_path_rec:" #field static const struct ib_field path_rec_table[] = { @@ -292,7 +292,7 @@ static const struct ib_field path_rec_table[] = { .struct_offset_bytes = \ offsetof(struct sa_path_rec, field), \ .struct_size_bytes = \ - sizeof((struct sa_path_rec *)0)->field, \ + sizeof_field(struct sa_path_rec, field), \ .field_name = "sa_path_rec:" #field static const struct ib_field opa_path_rec_table[] = { @@ -420,7 +420,7 @@ static const struct ib_field opa_path_rec_table[] = { #define MCMEMBER_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \ - .struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \ + .struct_size_bytes = sizeof_field(struct ib_sa_mcmember_rec, field), \ .field_name = "sa_mcmember_rec:" #field static const struct ib_field mcmember_rec_table[] = { @@ -504,7 +504,7 @@ static const struct ib_field mcmember_rec_table[] = { #define SERVICE_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_service_rec, field), \ - .struct_size_bytes = sizeof ((struct ib_sa_service_rec *) 0)->field, \ + .struct_size_bytes = sizeof_field(struct ib_sa_service_rec, field), \ .field_name = "sa_service_rec:" #field static const struct ib_field service_rec_table[] = { @@ -552,7 +552,7 @@ static const struct ib_field service_rec_table[] = { #define CLASSPORTINFO_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_class_port_info, field), \ - .struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \ + .struct_size_bytes = sizeof_field(struct ib_class_port_info, field), \ .field_name = "ib_class_port_info:" #field static const struct ib_field ib_classport_info_rec_table[] = { @@ -630,7 +630,7 @@ static const struct ib_field ib_classport_info_rec_table[] = { .struct_offset_bytes =\ offsetof(struct opa_class_port_info, field), \ .struct_size_bytes = \ - sizeof((struct opa_class_port_info *)0)->field, \ + sizeof_field(struct opa_class_port_info, field), \ .field_name = "opa_class_port_info:" #field static const struct ib_field opa_classport_info_rec_table[] = { @@ -710,7 +710,7 @@ static const struct ib_field opa_classport_info_rec_table[] = { #define GUIDINFO_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \ - .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \ + .struct_size_bytes = sizeof_field(struct ib_sa_guidinfo_rec, field), \ .field_name = "sa_guidinfo_rec:" #field static const struct ib_field guidinfo_rec_table[] = { diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index 29a45d2f8898..d65d541b9a25 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -41,7 +41,7 @@ #define STRUCT_FIELD(header, field) \ .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ - .struct_size_bytes = sizeof ((struct ib_unpacked_ ## header *) 0)->field, \ + .struct_size_bytes = sizeof_field(struct ib_unpacked_ ## header, field), \ .field_name = #header ":" #field static const struct ib_field lrh_table[] = { diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 4859ac0df17c..2067a939788b 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3741,7 +3741,7 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs) #define UAPI_DEF_WRITE_IO(req, resp) \ .write.has_resp = 1 + \ BUILD_BUG_ON_ZERO(offsetof(req, response) != 0) + \ - BUILD_BUG_ON_ZERO(sizeof(((req *)0)->response) != \ + BUILD_BUG_ON_ZERO(sizeof_field(req, response) != \ sizeof(u64)), \ .write.req_size = sizeof(req), .write.resp_size = sizeof(resp) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 42c5696f03bd..2d882c02387c 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -137,7 +137,7 @@ EXPORT_SYMBOL(_uverbs_alloc); static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, u16 len) { - if (uattr->len > sizeof(((struct ib_uverbs_attr *)0)->data)) + if (uattr->len > sizeof_field(struct ib_uverbs_attr, data)) return ib_is_buffer_cleared(u64_to_user_ptr(uattr->data) + len, uattr->len - len); diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 5bd2b037e914..0418d7bddf3e 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -420,9 +420,9 @@ struct uapi_definition { .scope = UAPI_SCOPE_OBJECT, \ .needs_fn_offset = \ offsetof(struct ib_device_ops, ibdev_fn) + \ - BUILD_BUG_ON_ZERO( \ - sizeof(((struct ib_device_ops *)0)->ibdev_fn) != \ - sizeof(void *)), \ + BUILD_BUG_ON_ZERO(sizeof_field(struct ib_device_ops, \ + ibdev_fn) != \ + sizeof(void *)), \ } /* @@ -435,9 +435,9 @@ struct uapi_definition { .scope = UAPI_SCOPE_METHOD, \ .needs_fn_offset = \ offsetof(struct ib_device_ops, ibdev_fn) + \ - BUILD_BUG_ON_ZERO( \ - sizeof(((struct ib_device_ops *)0)->ibdev_fn) != \ - sizeof(void *)), \ + BUILD_BUG_ON_ZERO(sizeof_field(struct ib_device_ops, \ + ibdev_fn) != \ + sizeof(void *)), \ } /* Call a function to determine if the entire object is supported or not */ From d246a3061528be6d852156d25c02ea69d6db7e65 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 27 May 2020 16:57:03 +0300 Subject: [PATCH 500/562] IB/mlx5: Fix DEVX support for MLX5_CMD_OP_INIT2INIT_QP command The commit citied in the Fixes line wasn't complete and solved only part of the problems. Update the mlx5_ib to properly support MLX5_CMD_OP_INIT2INIT_QP command in the DEVX, that is required when modify the QP tx_port_affinity. Fixes: 819f7427bafd ("RDMA/mlx5: Add init2init as a modify command") Link: https://lore.kernel.org/r/20200527135703.482501-1-leon@kernel.org Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 3047e7d60a9b..9454a66c12cc 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -495,6 +495,10 @@ static u64 devx_get_obj_id(const void *in) obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, MLX5_GET(rst2init_qp_in, in, qpn)); break; + case MLX5_CMD_OP_INIT2INIT_QP: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + MLX5_GET(init2init_qp_in, in, qpn)); + break; case MLX5_CMD_OP_INIT2RTR_QP: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, MLX5_GET(init2rtr_qp_in, in, qpn)); From fef17f91da7d4af11dde1ff832b82d6a64f89562 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 May 2020 13:32:59 +0300 Subject: [PATCH 501/562] RDMA/cm: Add Enhanced Connection Establishment (ECE) bits Extend REQ (request for communications), REP (reply to request for communication), rejected reason and SIDR_REP (service ID resolution response) structures with hardware vendor ID bits according to IBTA v1.4. Link: https://lore.kernel.org/r/20200526103304.196371-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ibta_vol1_c12.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/rdma/ibta_vol1_c12.h b/include/rdma/ibta_vol1_c12.h index 269904425d3f..960c86bec76c 100644 --- a/include/rdma/ibta_vol1_c12.h +++ b/include/rdma/ibta_vol1_c12.h @@ -38,6 +38,7 @@ /* Table 106 REQ Message Contents */ #define CM_REQ_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_req_msg, 0, 32) +#define CM_REQ_VENDOR_ID CM_FIELD32_LOC(struct cm_req_msg, 5, 24) #define CM_REQ_SERVICE_ID CM_FIELD64_LOC(struct cm_req_msg, 8) #define CM_REQ_LOCAL_CA_GUID CM_FIELD64_LOC(struct cm_req_msg, 16) #define CM_REQ_LOCAL_Q_KEY CM_FIELD32_LOC(struct cm_req_msg, 28, 32) @@ -119,8 +120,11 @@ CM_STRUCT(struct cm_rej_msg, 84 * 8 + 1184); #define CM_REP_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_rep_msg, 4, 32) #define CM_REP_LOCAL_Q_KEY CM_FIELD32_LOC(struct cm_rep_msg, 8, 32) #define CM_REP_LOCAL_QPN CM_FIELD32_LOC(struct cm_rep_msg, 12, 24) +#define CM_REP_VENDOR_ID_H CM_FIELD8_LOC(struct cm_rep_msg, 15, 8) #define CM_REP_LOCAL_EE_CONTEXT_NUMBER CM_FIELD32_LOC(struct cm_rep_msg, 16, 24) +#define CM_REP_VENDOR_ID_M CM_FIELD8_LOC(struct cm_rep_msg, 19, 8) #define CM_REP_STARTING_PSN CM_FIELD32_LOC(struct cm_rep_msg, 20, 24) +#define CM_REP_VENDOR_ID_L CM_FIELD8_LOC(struct cm_rep_msg, 23, 8) #define CM_REP_RESPONDER_RESOURCES CM_FIELD8_LOC(struct cm_rep_msg, 24, 8) #define CM_REP_INITIATOR_DEPTH CM_FIELD8_LOC(struct cm_rep_msg, 25, 8) #define CM_REP_TARGET_ACK_DELAY CM_FIELD8_LOC(struct cm_rep_msg, 26, 5) @@ -201,7 +205,9 @@ CM_STRUCT(struct cm_sidr_req_msg, 16 * 8 + 1728); #define CM_SIDR_REP_STATUS CM_FIELD8_LOC(struct cm_sidr_rep_msg, 4, 8) #define CM_SIDR_REP_ADDITIONAL_INFORMATION_LENGTH \ CM_FIELD8_LOC(struct cm_sidr_rep_msg, 5, 8) +#define CM_SIDR_REP_VENDOR_ID_H CM_FIELD16_LOC(struct cm_sidr_rep_msg, 6, 16) #define CM_SIDR_REP_QPN CM_FIELD32_LOC(struct cm_sidr_rep_msg, 8, 24) +#define CM_SIDR_REP_VENDOR_ID_L CM_FIELD8_LOC(struct cm_sidr_rep_msg, 11, 8) #define CM_SIDR_REP_SERVICEID CM_FIELD64_LOC(struct cm_sidr_rep_msg, 12) #define CM_SIDR_REP_Q_KEY CM_FIELD32_LOC(struct cm_sidr_rep_msg, 20, 32) #define CM_SIDR_REP_ADDITIONAL_INFORMATION \ From 34e2ab57a911f8b32b22580d11a02f0b79108245 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 May 2020 13:33:00 +0300 Subject: [PATCH 502/562] RDMA/ucma: Extend ucma_connect to receive ECE parameters Active side of CMID initiates connection through librdmacm's rdma_connect() and kernel's ucma_connect(). Extend UCMA interface to handle those new parameters. Link: https://lore.kernel.org/r/20200526103304.196371-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 21 +++++++++++++++++++++ drivers/infiniband/core/cma_priv.h | 1 + drivers/infiniband/core/ucma.c | 14 +++++++++++--- include/rdma/rdma_cm.h | 3 +++ include/uapi/rdma/rdma_user_cm.h | 6 ++++++ 5 files changed, 42 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 432eec472164..e81b8a523a3e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4036,6 +4036,27 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) } EXPORT_SYMBOL(rdma_connect); +/** + * rdma_connect_ece - Initiate an active connection request with ECE data. + * @id: Connection identifier to connect. + * @conn_param: Connection information used for connected QPs. + * @ece: ECE parameters + * + * See rdma_connect() explanation. + */ +int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, + struct rdma_ucm_ece *ece) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + + id_priv->ece.vendor_id = ece->vendor_id; + id_priv->ece.attr_mod = ece->attr_mod; + + return rdma_connect(id, conn_param); +} +EXPORT_SYMBOL(rdma_connect_ece); + static int cma_accept_ib(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h index 5edcf44a9307..caece96ebcf5 100644 --- a/drivers/infiniband/core/cma_priv.h +++ b/drivers/infiniband/core/cma_priv.h @@ -95,6 +95,7 @@ struct rdma_id_private { * Internal to RDMA/core, don't use in the drivers */ struct rdma_restrack_entry res; + struct rdma_ucm_ece ece; }; #if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 06127c800a49..7cbb63690241 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1072,12 +1072,15 @@ static void ucma_copy_conn_param(struct rdma_cm_id *id, static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { - struct rdma_ucm_connect cmd; struct rdma_conn_param conn_param; + struct rdma_ucm_ece ece = {}; + struct rdma_ucm_connect cmd; struct ucma_context *ctx; + size_t in_size; int ret; - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + in_size = min_t(size_t, in_len, sizeof(cmd)); + if (copy_from_user(&cmd, inbuf, in_size)) return -EFAULT; if (!cmd.conn_param.valid) @@ -1088,8 +1091,13 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, return PTR_ERR(ctx); ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); + if (offsetofend(typeof(cmd), ece) <= in_size) { + ece.vendor_id = cmd.ece.vendor_id; + ece.attr_mod = cmd.ece.attr_mod; + } + mutex_lock(&ctx->mutex); - ret = rdma_connect(ctx->cm_id, &conn_param); + ret = rdma_connect_ece(ctx->cm_id, &conn_param, &ece); mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index ea8e794785ed..4e2975eb3643 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -264,6 +264,9 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, */ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param); +int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, + struct rdma_ucm_ece *ece); + /** * rdma_listen - This function is called by the passive side to * listen for incoming connection requests. diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 1bb6e75d254b..c1409dd7225f 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -210,10 +210,16 @@ struct rdma_ucm_ud_param { __u8 reserved[7]; }; +struct rdma_ucm_ece { + __u32 vendor_id; + __u32 attr_mod; +}; + struct rdma_ucm_connect { struct rdma_ucm_conn_param conn_param; __u32 id; __u32 reserved; + struct rdma_ucm_ece ece; }; struct rdma_ucm_listen { From 93531ee7b9d1313227d2b4f354989895e8d57b72 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 May 2020 13:33:01 +0300 Subject: [PATCH 503/562] RDMA/ucma: Deliver ECE parameters through UCMA events Passive side of CMID connection receives ECE request through REQ message and needs to respond with relevant REP message which will be forwarded to active side. The UCMA events interface is responsible for such communication with the user space (librdmacm). Extend it to provide ECE wire data. Link: https://lore.kernel.org/r/20200526103304.196371-4-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucma.c | 6 +++++- include/rdma/rdma_cm.h | 1 + include/uapi/rdma/rdma_user_cm.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 7cbb63690241..3e5268cfa164 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -360,6 +360,9 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, ucma_copy_conn_event(&uevent->resp.param.conn, &event->param.conn); + uevent->resp.ece.vendor_id = event->ece.vendor_id; + uevent->resp.ece.attr_mod = event->ece.attr_mod; + if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { if (!ctx->backlog) { ret = -ENOMEM; @@ -404,7 +407,8 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, * Old 32 bit user space does not send the 4 byte padding in the * reserved field. We don't care, allow it to keep working. */ - if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved)) + if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved) - + sizeof(uevent->resp.ece)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 4e2975eb3643..418590c9a9e8 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -111,6 +111,7 @@ struct rdma_cm_event { struct rdma_conn_param conn; struct rdma_ud_param ud; } param; + struct rdma_ucm_ece ece; }; struct rdma_cm_id; diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index c1409dd7225f..19c5c3f74af9 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -297,6 +297,7 @@ struct rdma_ucm_event_resp { struct rdma_ucm_ud_param ud; } param; __u32 reserved; + struct rdma_ucm_ece ece; }; /* Option levels */ From a20652e175f2c5cea74c90503eeaeafabd08abed Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 May 2020 13:33:02 +0300 Subject: [PATCH 504/562] RDMA/cm: Send and receive ECE parameter over the wire ECE parameters are exchanged through REQ->REP/SIDR_REP messages, this patch adds the data to provide to other side of CMID communication channel. Link: https://lore.kernel.org/r/20200526103304.196371-5-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 39 ++++++++++++++++++++++++++++++----- drivers/infiniband/core/cma.c | 8 +++++++ include/rdma/ib_cm.h | 9 +++++++- 3 files changed, 50 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index f38ff46abe8f..085c146fe400 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -66,6 +66,8 @@ static const char * const ibcm_rej_reason_strs[] = { [IB_CM_REJ_INVALID_CLASS_VERSION] = "invalid class version", [IB_CM_REJ_INVALID_FLOW_LABEL] = "invalid flow label", [IB_CM_REJ_INVALID_ALT_FLOW_LABEL] = "invalid alt flow label", + [IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED] = + "vendor option is not supported", }; const char *__attribute_const__ ibcm_reject_msg(int reason) @@ -290,6 +292,8 @@ struct cm_id_private { struct list_head work_list; atomic_t work_count; + + struct rdma_ucm_ece ece; }; static void cm_work_handler(struct work_struct *work); @@ -1318,6 +1322,13 @@ static void cm_format_mad_hdr(struct ib_mad_hdr *hdr, hdr->tid = tid; } +static void cm_format_mad_ece_hdr(struct ib_mad_hdr *hdr, __be16 attr_id, + __be64 tid, u32 attr_mod) +{ + cm_format_mad_hdr(hdr, attr_id, tid); + hdr->attr_mod = cpu_to_be32(attr_mod); +} + static void cm_format_req(struct cm_req_msg *req_msg, struct cm_id_private *cm_id_priv, struct ib_cm_req_param *param) @@ -1330,8 +1341,8 @@ static void cm_format_req(struct cm_req_msg *req_msg, pri_ext = opa_is_extended_lid(pri_path->opa.dlid, pri_path->opa.slid); - cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, - cm_form_tid(cm_id_priv)); + cm_format_mad_ece_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, + cm_form_tid(cm_id_priv), param->ece.attr_mod); IBA_SET(CM_REQ_LOCAL_COMM_ID, req_msg, be32_to_cpu(cm_id_priv->id.local_id)); @@ -1454,6 +1465,7 @@ static void cm_format_req(struct cm_req_msg *req_msg, cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, alt_path->packet_life_time)); } + IBA_SET(CM_REQ_VENDOR_ID, req_msg, param->ece.vendor_id); if (param->private_data && param->private_data_len) IBA_SET_MEM(CM_REQ_PRIVATE_DATA, req_msg, param->private_data, @@ -1810,6 +1822,9 @@ static void cm_format_req_event(struct cm_work *work, param->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg); param->srq = IBA_GET(CM_REQ_SRQ, req_msg); param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr; + param->ece.vendor_id = IBA_GET(CM_REQ_VENDOR_ID, req_msg); + param->ece.attr_mod = be32_to_cpu(req_msg->hdr.attr_mod); + work->cm_event.private_data = IBA_GET_MEM_PTR(CM_REQ_PRIVATE_DATA, req_msg); } @@ -2202,7 +2217,8 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg, struct cm_id_private *cm_id_priv, struct ib_cm_rep_param *param) { - cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid); + cm_format_mad_ece_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid, + param->ece.attr_mod); IBA_SET(CM_REP_LOCAL_COMM_ID, rep_msg, be32_to_cpu(cm_id_priv->id.local_id)); IBA_SET(CM_REP_REMOTE_COMM_ID, rep_msg, @@ -2229,6 +2245,10 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg, IBA_SET(CM_REP_LOCAL_EE_CONTEXT_NUMBER, rep_msg, param->qp_num); } + IBA_SET(CM_REP_VENDOR_ID_L, rep_msg, param->ece.vendor_id); + IBA_SET(CM_REP_VENDOR_ID_M, rep_msg, param->ece.vendor_id >> 8); + IBA_SET(CM_REP_VENDOR_ID_H, rep_msg, param->ece.vendor_id >> 16); + if (param->private_data && param->private_data_len) IBA_SET_MEM(CM_REP_PRIVATE_DATA, rep_msg, param->private_data, param->private_data_len); @@ -2376,6 +2396,11 @@ static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type) param->flow_control = IBA_GET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg); param->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg); param->srq = IBA_GET(CM_REP_SRQ, rep_msg); + param->ece.vendor_id = IBA_GET(CM_REP_VENDOR_ID_H, rep_msg) << 16; + param->ece.vendor_id |= IBA_GET(CM_REP_VENDOR_ID_M, rep_msg) << 8; + param->ece.vendor_id |= IBA_GET(CM_REP_VENDOR_ID_L, rep_msg); + param->ece.attr_mod = be32_to_cpu(rep_msg->hdr.attr_mod); + work->cm_event.private_data = IBA_GET_MEM_PTR(CM_REP_PRIVATE_DATA, rep_msg); } @@ -3597,8 +3622,8 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg, struct cm_id_private *cm_id_priv, struct ib_cm_sidr_rep_param *param) { - cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID, - cm_id_priv->tid); + cm_format_mad_ece_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID, + cm_id_priv->tid, param->ece.attr_mod); IBA_SET(CM_SIDR_REP_REQUESTID, sidr_rep_msg, be32_to_cpu(cm_id_priv->id.remote_id)); IBA_SET(CM_SIDR_REP_STATUS, sidr_rep_msg, param->status); @@ -3606,6 +3631,10 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg, IBA_SET(CM_SIDR_REP_SERVICEID, sidr_rep_msg, be64_to_cpu(cm_id_priv->id.service_id)); IBA_SET(CM_SIDR_REP_Q_KEY, sidr_rep_msg, param->qkey); + IBA_SET(CM_SIDR_REP_VENDOR_ID_L, sidr_rep_msg, + param->ece.vendor_id & 0xFF); + IBA_SET(CM_SIDR_REP_VENDOR_ID_H, sidr_rep_msg, + (param->ece.vendor_id >> 8) & 0xFF); if (param->info && param->info_length) IBA_SET_MEM(CM_SIDR_REP_ADDITIONAL_INFORMATION, sidr_rep_msg, diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e81b8a523a3e..f554a371f4fa 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1911,6 +1911,9 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event, event->param.conn.rnr_retry_count = rep_data->rnr_retry_count; event->param.conn.srq = rep_data->srq; event->param.conn.qp_num = rep_data->remote_qpn; + + event->ece.vendor_id = rep_data->ece.vendor_id; + event->ece.attr_mod = rep_data->ece.attr_mod; } static int cma_cm_event_handler(struct rdma_id_private *id_priv, @@ -2129,6 +2132,9 @@ static void cma_set_req_event_data(struct rdma_cm_event *event, event->param.conn.rnr_retry_count = req_data->rnr_retry_count; event->param.conn.srq = req_data->srq; event->param.conn.qp_num = req_data->remote_qpn; + + event->ece.vendor_id = req_data->ece.vendor_id; + event->ece.attr_mod = req_data->ece.attr_mod; } static int cma_ib_check_req_qp_type(const struct rdma_cm_id *id, @@ -3947,6 +3953,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; req.max_cm_retries = CMA_MAX_CM_RETRIES; req.srq = id_priv->srq ? 1 : 0; + req.ece.vendor_id = id_priv->ece.vendor_id; + req.ece.attr_mod = id_priv->ece.attr_mod; trace_cm_send_req(id_priv); ret = ib_send_cm_req(id_priv->cm_id.ib, &req); diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 058cfbc2b37f..0f1ea5f2d01c 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -11,6 +11,7 @@ #include #include +#include /* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */ extern struct class cm_class; @@ -115,6 +116,7 @@ struct ib_cm_req_event_param { unsigned int retry_count:3; unsigned int rnr_retry_count:3; unsigned int srq:1; + struct rdma_ucm_ece ece; }; struct ib_cm_rep_event_param { @@ -129,6 +131,7 @@ struct ib_cm_rep_event_param { unsigned int flow_control:1; unsigned int rnr_retry_count:3; unsigned int srq:1; + struct rdma_ucm_ece ece; }; enum ib_cm_rej_reason { @@ -164,7 +167,8 @@ enum ib_cm_rej_reason { IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID = 30, IB_CM_REJ_INVALID_CLASS_VERSION = 31, IB_CM_REJ_INVALID_FLOW_LABEL = 32, - IB_CM_REJ_INVALID_ALT_FLOW_LABEL = 33 + IB_CM_REJ_INVALID_ALT_FLOW_LABEL = 33, + IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED = 35, }; struct ib_cm_rej_event_param { @@ -369,6 +373,7 @@ struct ib_cm_req_param { u8 rnr_retry_count; u8 max_cm_retries; u8 srq; + struct rdma_ucm_ece ece; }; /** @@ -392,6 +397,7 @@ struct ib_cm_rep_param { u8 flow_control; u8 rnr_retry_count; u8 srq; + struct rdma_ucm_ece ece; }; /** @@ -546,6 +552,7 @@ struct ib_cm_sidr_rep_param { u8 info_length; const void *private_data; u8 private_data_len; + struct rdma_ucm_ece ece; }; /** From 0cb15372a615a9835893f43e86ae45399eb63996 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 May 2020 13:33:03 +0300 Subject: [PATCH 505/562] RDMA/cma: Connect ECE to rdma_accept The rdma_accept() is called by both passive and active sides of CMID connection to mark readiness to start data transfer. For passive side, this is called explicitly, for active side, it is called implicitly while receiving REP message. Provide ECE data to rdma_accept function needed for passive side to send that REP message. Link: https://lore.kernel.org/r/20200526103304.196371-6-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 19 +++++++++++++++++++ drivers/infiniband/core/ucma.c | 14 +++++++++++--- include/rdma/rdma_cm.h | 3 +++ include/uapi/rdma/rdma_user_cm.h | 1 + 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f554a371f4fa..d449afe5557b 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4090,6 +4090,8 @@ static int cma_accept_ib(struct rdma_id_private *id_priv, rep.flow_control = conn_param->flow_control; rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); rep.srq = id_priv->srq ? 1 : 0; + rep.ece.vendor_id = id_priv->ece.vendor_id; + rep.ece.attr_mod = id_priv->ece.attr_mod; trace_cm_send_rep(id_priv); ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); @@ -4137,7 +4139,11 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv, return ret; rep.qp_num = id_priv->qp_num; rep.qkey = id_priv->qkey; + + rep.ece.vendor_id = id_priv->ece.vendor_id; + rep.ece.attr_mod = id_priv->ece.attr_mod; } + rep.private_data = private_data; rep.private_data_len = private_data_len; @@ -4195,6 +4201,19 @@ int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, } EXPORT_SYMBOL(__rdma_accept); +int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, + const char *caller, struct rdma_ucm_ece *ece) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + + id_priv->ece.vendor_id = ece->vendor_id; + id_priv->ece.attr_mod = ece->attr_mod; + + return __rdma_accept(id, conn_param, caller); +} +EXPORT_SYMBOL(__rdma_accept_ece); + int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) { struct rdma_id_private *id_priv; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 3e5268cfa164..6b27b210b890 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1135,28 +1135,36 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, { struct rdma_ucm_accept cmd; struct rdma_conn_param conn_param; + struct rdma_ucm_ece ece = {}; struct ucma_context *ctx; + size_t in_size; int ret; - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + in_size = min_t(size_t, in_len, sizeof(cmd)); + if (copy_from_user(&cmd, inbuf, in_size)) return -EFAULT; ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); + if (offsetofend(typeof(cmd), ece) <= in_size) { + ece.vendor_id = cmd.ece.vendor_id; + ece.attr_mod = cmd.ece.attr_mod; + } + if (cmd.conn_param.valid) { ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); mutex_lock(&file->mut); mutex_lock(&ctx->mutex); - ret = __rdma_accept(ctx->cm_id, &conn_param, NULL); + ret = __rdma_accept_ece(ctx->cm_id, &conn_param, NULL, &ece); mutex_unlock(&ctx->mutex); if (!ret) ctx->uid = cmd.uid; mutex_unlock(&file->mut); } else { mutex_lock(&ctx->mutex); - ret = __rdma_accept(ctx->cm_id, NULL, NULL); + ret = __rdma_accept_ece(ctx->cm_id, NULL, NULL, &ece); mutex_unlock(&ctx->mutex); } ucma_put_ctx(ctx); diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 418590c9a9e8..7ac91677660f 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -280,6 +280,9 @@ int rdma_listen(struct rdma_cm_id *id, int backlog); int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, const char *caller); +int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, + const char *caller, struct rdma_ucm_ece *ece); + /** * rdma_accept - Called to accept a connection request or response. * @id: Connection identifier associated with the request. diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 19c5c3f74af9..6b883dde7064 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -232,6 +232,7 @@ struct rdma_ucm_accept { struct rdma_ucm_conn_param conn_param; __u32 id; __u32 reserved; + struct rdma_ucm_ece ece; }; struct rdma_ucm_reject { From 8094ba0ace7f6cd1e31ea8b151fba3594cadfa9a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 May 2020 13:33:04 +0300 Subject: [PATCH 506/562] RDMA/cma: Provide ECE reject reason IBTA declares "vendor option not supported" reject reason in REJ messages if passive side doesn't want to accept proposed ECE options. Due to the fact that ECE is managed by userspace, there is a need to let users to provide such rejected reason. Link: https://lore.kernel.org/r/20200526103304.196371-7-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 9 ++++----- drivers/infiniband/core/ucma.c | 15 ++++++++++++++- drivers/infiniband/ulp/isert/ib_isert.c | 5 +++-- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 3 ++- drivers/infiniband/ulp/srpt/ib_srpt.c | 3 ++- drivers/nvme/target/rdma.c | 4 +++- include/rdma/rdma_cm.h | 2 +- include/uapi/rdma/rdma_user_cm.h | 3 ++- net/rds/ib_cm.c | 4 +++- 9 files changed, 34 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index d449afe5557b..8026ee56546a 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4196,7 +4196,7 @@ int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, return 0; reject: cma_modify_qp_err(id_priv); - rdma_reject(id, NULL, 0); + rdma_reject(id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED); return ret; } EXPORT_SYMBOL(__rdma_accept); @@ -4236,7 +4236,7 @@ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) EXPORT_SYMBOL(rdma_notify); int rdma_reject(struct rdma_cm_id *id, const void *private_data, - u8 private_data_len) + u8 private_data_len, u8 reason) { struct rdma_id_private *id_priv; int ret; @@ -4251,9 +4251,8 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, private_data, private_data_len); } else { trace_cm_send_rej(id_priv); - ret = ib_send_cm_rej(id_priv->cm_id.ib, - IB_CM_REJ_CONSUMER_DEFINED, NULL, - 0, private_data, private_data_len); + ret = ib_send_cm_rej(id_priv->cm_id.ib, reason, NULL, 0, + private_data, private_data_len); } } else if (rdma_cap_iw_cm(id->device, id->port_num)) { ret = iw_cm_reject(id_priv->cm_id.iw, diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 6b27b210b890..5b87eee8ccc8 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include "core_priv.h" @@ -1181,12 +1182,24 @@ static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; + if (!cmd.reason) + cmd.reason = IB_CM_REJ_CONSUMER_DEFINED; + + switch (cmd.reason) { + case IB_CM_REJ_CONSUMER_DEFINED: + case IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED: + break; + default: + return -EINVAL; + } + ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); mutex_lock(&ctx->mutex); - ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len); + ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len, + cmd.reason); mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index a1a035270cab..b7df38ee8ae0 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -502,7 +503,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) if (!np->enabled) { spin_unlock_bh(&np->np_thread_lock); isert_dbg("iscsi_np is not enabled, reject connect request\n"); - return rdma_reject(cma_id, NULL, 0); + return rdma_reject(cma_id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED); } spin_unlock_bh(&np->np_thread_lock); @@ -553,7 +554,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) isert_free_login_buf(isert_conn); out: kfree(isert_conn); - rdma_reject(cma_id, NULL, 0); + rdma_reject(cma_id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED); return ret; } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 5ef8988ee75b..0d9241f5d9e6 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -15,6 +15,7 @@ #include "rtrs-srv.h" #include "rtrs-log.h" +#include MODULE_DESCRIPTION("RDMA Transport Server"); MODULE_LICENSE("GPL"); @@ -1576,7 +1577,7 @@ static int rtrs_rdma_do_reject(struct rdma_cm_id *cm_id, int errno) .errno = cpu_to_le16(errno), }; - err = rdma_reject(cm_id, &msg, sizeof(msg)); + err = rdma_reject(cm_id, &msg, sizeof(msg), IB_CM_REJ_CONSUMER_DEFINED); if (err) pr_err("rdma_reject(), err: %d\n", err); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index a294630f2100..cdc8c239d6c0 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2497,7 +2497,8 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, SRP_BUF_FORMAT_INDIRECT); if (rdma_cm_id) - rdma_reject(rdma_cm_id, rej, sizeof(*rej)); + rdma_reject(rdma_cm_id, rej, sizeof(*rej), + IB_CM_REJ_CONSUMER_DEFINED); else ib_send_cm_rej(ib_cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, rej, sizeof(*rej)); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index fd47de0e4e4e..55aaf03a9580 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "nvmet.h" @@ -1138,7 +1139,8 @@ static int nvmet_rdma_cm_reject(struct rdma_cm_id *cm_id, rej.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0); rej.sts = cpu_to_le16(status); - return rdma_reject(cm_id, (void *)&rej, sizeof(rej)); + return rdma_reject(cm_id, (void *)&rej, sizeof(rej), + IB_CM_REJ_CONSUMER_DEFINED); } static struct nvmet_rdma_queue * diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 7ac91677660f..939d7abe026f 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -320,7 +320,7 @@ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event); * rdma_reject - Called to reject a connection request or response. */ int rdma_reject(struct rdma_cm_id *id, const void *private_data, - u8 private_data_len); + u8 private_data_len, u8 reason); /** * rdma_disconnect - This function disconnects the associated QP and diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 6b883dde7064..ed5a514305c1 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -238,7 +238,8 @@ struct rdma_ucm_accept { struct rdma_ucm_reject { __u32 id; __u8 private_data_len; - __u8 reserved[3]; + __u8 reason; + __u8 reserved[2]; __u8 private_data[RDMA_MAX_PRIVATE_DATA]; }; diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index c71f4328d138..0fec4171564e 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "rds_single_path.h" #include "rds.h" @@ -927,7 +928,8 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, if (conn) mutex_unlock(&conn->c_cm_lock); if (err) - rdma_reject(cm_id, &err, sizeof(int)); + rdma_reject(cm_id, &err, sizeof(int), + IB_CM_REJ_CONSUMER_DEFINED); return destroy; } From 3e09a427ae7ac347e08dca5ffac64c902860d675 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 May 2020 14:54:34 +0300 Subject: [PATCH 507/562] RDMA/mlx5: Get ECE options from FW during create QP Supported ECE options are returned from FW in the create_qp phase and zero means that field is not valid. Such default value allows us to reuse reserved field without worries about comp_mask. Update create QP API to return ECE options. Link: https://lore.kernel.org/r/20200526115440.205922-3-leon@kernel.org Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 16 +++++++++++----- drivers/infiniband/hw/mlx5/qp.h | 4 ++-- drivers/infiniband/hw/mlx5/qpc.c | 8 ++++---- include/uapi/rdma/mlx5-abi.h | 2 +- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 2e28752e8cd2..be7289c480f7 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1842,6 +1842,7 @@ static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr = params->attr; u32 uidx = params->uidx; struct mlx5_ib_resources *devr = &dev->devr; + u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; int inlen = MLX5_ST_SZ_BYTES(create_qp_in); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_qp_base *base; @@ -1894,13 +1895,14 @@ static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, } base = &qp->trans_qp.base; - err = mlx5_core_create_qp(dev, &base->mqp, in, inlen); + err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out); kvfree(in); if (err) return err; base->container_mibqp = qp; base->mqp.event = mlx5_ib_qp_event; + params->resp.ece_options = MLX5_GET(create_qp_out, out, ece); spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); list_add_tail(&qp->qps_list, &dev->qp_list); @@ -1916,6 +1918,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, { struct ib_qp_init_attr *init_attr = params->attr; struct mlx5_ib_create_qp *ucmd = params->ucmd; + u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; struct ib_udata *udata = params->udata; u32 uidx = params->uidx; struct mlx5_ib_resources *devr = &dev->devr; @@ -2065,7 +2068,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata, ¶ms->resp); } else - err = mlx5_core_create_qp(dev, &base->mqp, in, inlen); + err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out); kvfree(in); if (err) @@ -2073,6 +2076,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, base->container_mibqp = qp; base->mqp.event = mlx5_ib_qp_event; + params->resp.ece_options = MLX5_GET(create_qp_out, out, ece); get_cqs(qp->type, init_attr->send_cq, init_attr->recv_cq, &send_cq, &recv_cq); @@ -2105,6 +2109,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *attr = params->attr; u32 uidx = params->uidx; struct mlx5_ib_resources *devr = &dev->devr; + u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; int inlen = MLX5_ST_SZ_BYTES(create_qp_in); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_cq *send_cq; @@ -2195,7 +2200,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1); - err = mlx5_core_create_qp(dev, &base->mqp, in, inlen); + err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out); kvfree(in); if (err) goto err_create; @@ -2779,12 +2784,13 @@ static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn; mlx5_ib_dbg(dev, - "QP type %d, ib qpn 0x%X, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n", + "QP type %d, ib qpn 0x%X, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x, ece 0x%x\n", qp->type, qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn, params->attr->recv_cq ? to_mcq(params->attr->recv_cq)->mcq.cqn : -1, params->attr->send_cq ? to_mcq(params->attr->send_cq)->mcq.cqn : - -1); + -1, + params->resp.ece_options); return 0; } diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h index ad9d76e3e18a..795c21f88962 100644 --- a/drivers/infiniband/hw/mlx5/qp.h +++ b/drivers/infiniband/hw/mlx5/qp.h @@ -13,8 +13,8 @@ void mlx5_cleanup_qp_table(struct mlx5_ib_dev *dev); int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *qp, u32 *in, int inlen, u32 *out, int outlen); -int mlx5_core_create_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp, - u32 *in, int inlen); +int mlx5_qpc_create_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp, + u32 *in, int inlen, u32 *out); int mlx5_core_qp_modify(struct mlx5_ib_dev *dev, u16 opcode, u32 opt_param_mask, void *qpc, struct mlx5_core_qp *qp); int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp); diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index ea62735042f0..69c80859a6ee 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -236,16 +236,16 @@ int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct, return err; } -int mlx5_core_create_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp, - u32 *in, int inlen) +int mlx5_qpc_create_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp, + u32 *in, int inlen, u32 *out) { - u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; u32 din[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; int err; MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); - err = mlx5_cmd_exec(dev->mdev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_exec(dev->mdev, in, inlen, out, + MLX5_ST_SZ_BYTES(create_qp_out)); if (err) return err; diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index df1cc3641bda..106fbb3bec6a 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -371,7 +371,7 @@ enum mlx5_ib_create_qp_resp_mask { struct mlx5_ib_create_qp_resp { __u32 bfreg_index; - __u32 reserved; + __u32 ece_options; __u32 comp_mask; __u32 tirn; __u32 tisn; From e383085c24255821e79d3c2aa6302d804b6a1c48 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 May 2020 14:54:35 +0300 Subject: [PATCH 508/562] RDMA/mlx5: Set ECE options during QP create Allow users to ask creation of QPs with specific ECE options. Such early set even before RDMA-CM connection is established is useful if user knows exactly which option he needs. Link: https://lore.kernel.org/r/20200526115440.205922-4-leon@kernel.org Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 75 +++++++++++++++++++++++++++++---- include/uapi/rdma/mlx5-abi.h | 2 + 2 files changed, 68 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index be7289c480f7..eb70eb371b4b 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1552,6 +1552,7 @@ struct mlx5_create_qp_params { struct ib_udata *udata; size_t inlen; size_t outlen; + size_t ucmd_size; void *ucmd; u8 is_rss_raw : 1; struct ib_qp_init_attr *attr; @@ -1839,6 +1840,7 @@ static int get_atomic_mode(struct mlx5_ib_dev *dev, static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct mlx5_create_qp_params *params) { + struct mlx5_ib_create_qp *ucmd = params->ucmd; struct ib_qp_init_attr *attr = params->attr; u32 uidx = params->uidx; struct mlx5_ib_resources *devr = &dev->devr; @@ -1860,6 +1862,8 @@ static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (!in) return -ENOMEM; + if (MLX5_CAP_GEN(mdev, ece_support)) + MLX5_SET(create_qp_in, in, ece, ucmd->ece_options); qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); MLX5_SET(qpc, qpc, st, MLX5_QP_ST_XRC); @@ -1974,6 +1978,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (is_sqp(init_attr->qp_type)) qp->port = init_attr->port_num; + if (MLX5_CAP_GEN(mdev, ece_support)) + MLX5_SET(create_qp_in, in, ece, ucmd->ece_options); qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); MLX5_SET(qpc, qpc, st, mlx5_st); @@ -2709,19 +2715,22 @@ static int process_udata_size(struct mlx5_ib_dev *dev, struct mlx5_create_qp_params *params) { size_t ucmd = sizeof(struct mlx5_ib_create_qp); - struct ib_qp_init_attr *attr = params->attr; struct ib_udata *udata = params->udata; size_t outlen = udata->outlen; size_t inlen = udata->inlen; params->outlen = min(outlen, sizeof(struct mlx5_ib_create_qp_resp)); - if (attr->qp_type == IB_QPT_DRIVER) { - params->inlen = (inlen < ucmd) ? 0 : ucmd; - goto out; - } - + params->ucmd_size = ucmd; if (!params->is_rss_raw) { - params->inlen = ucmd; + /* User has old rdma-core, which doesn't support ECE */ + size_t min_inlen = + offsetof(struct mlx5_ib_create_qp, ece_options); + + /* + * We will check in check_ucmd_data() that user + * cleared everything after inlen. + */ + params->inlen = (inlen < min_inlen) ? 0 : min(inlen, ucmd); goto out; } @@ -2733,13 +2742,14 @@ static int process_udata_size(struct mlx5_ib_dev *dev, return -EINVAL; ucmd = sizeof(struct mlx5_ib_create_qp_rss); + params->ucmd_size = ucmd; if (inlen > ucmd && !ib_is_udata_cleared(udata, ucmd, inlen - ucmd)) return -EINVAL; params->inlen = min(ucmd, inlen); out: if (!params->inlen) - mlx5_ib_dbg(dev, "udata is too small or not cleared\n"); + mlx5_ib_dbg(dev, "udata is too small\n"); return (params->inlen) ? 0 : -EINVAL; } @@ -2855,6 +2865,49 @@ static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp) return 0; } +static int check_ucmd_data(struct mlx5_ib_dev *dev, + struct mlx5_create_qp_params *params) +{ + struct ib_qp_init_attr *attr = params->attr; + struct ib_udata *udata = params->udata; + size_t size, last; + int ret; + + if (params->is_rss_raw) + /* + * These QPs don't have "reserved" field in their + * create_qp input struct, so their data is always valid. + */ + last = sizeof(struct mlx5_ib_create_qp_rss); + else + /* IB_QPT_RAW_PACKET and IB_QPT_DRIVER don't have ECE data */ + switch (attr->qp_type) { + case IB_QPT_DRIVER: + case IB_QPT_RAW_PACKET: + last = offsetof(struct mlx5_ib_create_qp, ece_options); + break; + default: + last = offsetof(struct mlx5_ib_create_qp, reserved); + } + + if (udata->inlen <= last) + return 0; + + /* + * User provides different create_qp structures based on the + * flow and we need to know if he cleared memory after our + * struct create_qp ends. + */ + size = udata->inlen - last; + ret = ib_is_udata_cleared(params->udata, last, size); + if (!ret) + mlx5_ib_dbg( + dev, + "udata is not cleared, inlen = %lu, ucmd = %lu, last = %lu, size = %lu\n", + udata->inlen, params->ucmd_size, last, size); + return ret ? 0 : -EINVAL; +} + struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr, struct ib_udata *udata) { @@ -2888,7 +2941,11 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr, if (err) return ERR_PTR(err); - params.ucmd = kzalloc(params.inlen, GFP_KERNEL); + err = check_ucmd_data(dev, ¶ms); + if (err) + return ERR_PTR(err); + + params.ucmd = kzalloc(params.ucmd_size, GFP_KERNEL); if (!params.ucmd) return ERR_PTR(-ENOMEM); diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 106fbb3bec6a..bc9d9e3cb369 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -322,6 +322,8 @@ struct mlx5_ib_create_qp { __aligned_u64 sq_buf_addr; __aligned_u64 access_key; }; + __u32 ece_options; + __u32 reserved; }; /* RX Hash function flags */ From 64bae2d455f6058572ac4d23a8ea9e47c9d10f03 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 May 2020 14:54:36 +0300 Subject: [PATCH 509/562] RDMA/mlx5: Use direct modify QP implementation As a preparation to removal hand crafted mlx5_qp_context, convert counter code to use mlx5_cmd_exec_in() directly. Link: https://lore.kernel.org/r/20200526115440.205922-5-leon@kernel.org Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index eb70eb371b4b..8a3aee57a196 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3688,10 +3688,11 @@ static int __mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter) { struct mlx5_ib_dev *dev = to_mdev(qp->device); + u32 in[MLX5_ST_SZ_DW(rts2rts_qp_in)] = {}; struct mlx5_ib_qp *mqp = to_mqp(qp); - struct mlx5_qp_context context = {}; struct mlx5_ib_qp_base *base; u32 set_id; + u32 *qpc; if (counter) set_id = counter->id; @@ -3699,11 +3700,15 @@ static int __mlx5_ib_qp_set_counter(struct ib_qp *qp, set_id = mlx5_ib_get_counters_id(dev, mqp->port - 1); base = &mqp->trans_qp.base; - context.qp_counter_set_usr_page &= cpu_to_be32(0xffffff); - context.qp_counter_set_usr_page |= cpu_to_be32(set_id << 24); - return mlx5_core_qp_modify(dev, MLX5_CMD_OP_RTS2RTS_QP, - MLX5_QP_OPTPAR_COUNTER_SET_ID, &context, - &base->mqp); + MLX5_SET(rts2rts_qp_in, in, opcode, MLX5_CMD_OP_RTS2RTS_QP); + MLX5_SET(rts2rts_qp_in, in, qpn, base->mqp.qpn); + MLX5_SET(rts2rts_qp_in, in, uid, base->mqp.uid); + MLX5_SET(rts2rts_qp_in, in, opt_param_mask, + MLX5_QP_OPTPAR_COUNTER_SET_ID); + + qpc = MLX5_ADDR_OF(rts2rts_qp_in, in, qpc); + MLX5_SET(qpc, qpc, counter_set_id, set_id); + return mlx5_cmd_exec_in(dev->mdev, rts2rts_qp, in); } static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, From 70bd7fb8762528ac0e69a8ae0f485298dff57043 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 May 2020 14:54:37 +0300 Subject: [PATCH 510/562] RDMA/mlx5: Remove manually crafted QP context the query call As a preparation to removal hand crafted mlx5_qp_context, convert query_qp_attr() to use proper MLX5_GET() macros. Link: https://lore.kernel.org/r/20200526115440.205922-6-leon@kernel.org Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 125 ++++++++++++++------------------ 1 file changed, 54 insertions(+), 71 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8a3aee57a196..5099866533dd 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4330,50 +4330,35 @@ static inline enum ib_mig_state to_ib_mig_state(int mlx5_mig_state) } } -static int to_ib_qp_access_flags(int mlx5_flags) -{ - int ib_flags = 0; - - if (mlx5_flags & MLX5_QP_BIT_RRE) - ib_flags |= IB_ACCESS_REMOTE_READ; - if (mlx5_flags & MLX5_QP_BIT_RWE) - ib_flags |= IB_ACCESS_REMOTE_WRITE; - if (mlx5_flags & MLX5_QP_BIT_RAE) - ib_flags |= IB_ACCESS_REMOTE_ATOMIC; - - return ib_flags; -} - static void to_rdma_ah_attr(struct mlx5_ib_dev *ibdev, - struct rdma_ah_attr *ah_attr, - struct mlx5_qp_path *path) + struct rdma_ah_attr *ah_attr, void *path) { + int port = MLX5_GET(ads, path, vhca_port_num); + int static_rate; memset(ah_attr, 0, sizeof(*ah_attr)); - if (!path->port || path->port > ibdev->num_ports) + if (!port || port > ibdev->num_ports) return; - ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, path->port); + ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port); - rdma_ah_set_port_num(ah_attr, path->port); - rdma_ah_set_sl(ah_attr, path->dci_cfi_prio_sl & 0xf); + rdma_ah_set_port_num(ah_attr, port); + rdma_ah_set_sl(ah_attr, MLX5_GET(ads, path, sl)); - rdma_ah_set_dlid(ah_attr, be16_to_cpu(path->rlid)); - rdma_ah_set_path_bits(ah_attr, path->grh_mlid & 0x7f); - rdma_ah_set_static_rate(ah_attr, - path->static_rate ? path->static_rate - 5 : 0); + rdma_ah_set_dlid(ah_attr, MLX5_GET(ads, path, rlid)); + rdma_ah_set_path_bits(ah_attr, MLX5_GET(ads, path, mlid)); - if (path->grh_mlid & (1 << 7) || + static_rate = MLX5_GET(ads, path, stat_rate); + rdma_ah_set_static_rate(ah_attr, static_rate ? static_rate - 5 : 0); + if (MLX5_GET(ads, path, grh) || ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { - u32 tc_fl = be32_to_cpu(path->tclass_flowlabel); - - rdma_ah_set_grh(ah_attr, NULL, - tc_fl & 0xfffff, - path->mgid_index, - path->hop_limit, - (tc_fl >> 20) & 0xff); - rdma_ah_set_dgid_raw(ah_attr, path->rgid); + rdma_ah_set_grh(ah_attr, NULL, MLX5_GET(ads, path, flow_label), + MLX5_GET(ads, path, src_addr_index), + MLX5_GET(ads, path, hop_limit), + MLX5_GET(ads, path, tclass)); + memcpy(ah_attr, MLX5_ADDR_OF(ads, path, rgid_rip), + MLX5_FLD_SZ_BYTES(ads, rgid_rip)); } } @@ -4495,10 +4480,9 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct ib_qp_attr *qp_attr) { int outlen = MLX5_ST_SZ_BYTES(query_qp_out); - struct mlx5_qp_context *context; - int mlx5_state; + void *qpc, *pri_path, *alt_path; u32 *outb; - int err = 0; + int err; outb = kzalloc(outlen, GFP_KERNEL); if (!outb) @@ -4508,47 +4492,46 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (err) goto out; - /* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */ - context = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, outb, qpc); + qpc = MLX5_ADDR_OF(query_qp_out, outb, qpc); - mlx5_state = be32_to_cpu(context->flags) >> 28; + qp->state = to_ib_qp_state(MLX5_GET(qpc, qpc, state)); + if (MLX5_GET(qpc, qpc, state) == MLX5_QP_STATE_SQ_DRAINING) + qp_attr->sq_draining = 1; - qp->state = to_ib_qp_state(mlx5_state); - qp_attr->path_mtu = context->mtu_msgmax >> 5; - qp_attr->path_mig_state = - to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3); - qp_attr->qkey = be32_to_cpu(context->qkey); - qp_attr->rq_psn = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff; - qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff; - qp_attr->dest_qp_num = be32_to_cpu(context->log_pg_sz_remote_qpn) & 0xffffff; - qp_attr->qp_access_flags = - to_ib_qp_access_flags(be32_to_cpu(context->params2)); + qp_attr->path_mtu = MLX5_GET(qpc, qpc, mtu); + qp_attr->path_mig_state = to_ib_mig_state(MLX5_GET(qpc, qpc, pm_state)); + qp_attr->qkey = MLX5_GET(qpc, qpc, q_key); + qp_attr->rq_psn = MLX5_GET(qpc, qpc, next_rcv_psn); + qp_attr->sq_psn = MLX5_GET(qpc, qpc, next_send_psn); + qp_attr->dest_qp_num = MLX5_GET(qpc, qpc, remote_qpn); + + if (MLX5_GET(qpc, qpc, rre)) + qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ; + if (MLX5_GET(qpc, qpc, rwe)) + qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_WRITE; + if (MLX5_GET(qpc, qpc, rae)) + qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_ATOMIC; + + qp_attr->max_rd_atomic = 1 << MLX5_GET(qpc, qpc, log_sra_max); + qp_attr->max_dest_rd_atomic = 1 << MLX5_GET(qpc, qpc, log_rra_max); + qp_attr->min_rnr_timer = MLX5_GET(qpc, qpc, min_rnr_nak); + qp_attr->retry_cnt = MLX5_GET(qpc, qpc, retry_count); + qp_attr->rnr_retry = MLX5_GET(qpc, qpc, rnr_retry); + + pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path); if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { - to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); - to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path); - qp_attr->alt_pkey_index = - be16_to_cpu(context->alt_path.pkey_index); - qp_attr->alt_port_num = - rdma_ah_get_port_num(&qp_attr->alt_ah_attr); + to_rdma_ah_attr(dev, &qp_attr->ah_attr, pri_path); + to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, alt_path); + qp_attr->alt_pkey_index = MLX5_GET(ads, alt_path, pkey_index); + qp_attr->alt_port_num = MLX5_GET(ads, alt_path, vhca_port_num); } - qp_attr->pkey_index = be16_to_cpu(context->pri_path.pkey_index); - qp_attr->port_num = context->pri_path.port; - - /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ - qp_attr->sq_draining = mlx5_state == MLX5_QP_STATE_SQ_DRAINING; - - qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7); - - qp_attr->max_dest_rd_atomic = - 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7); - qp_attr->min_rnr_timer = - (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f; - qp_attr->timeout = context->pri_path.ackto_lt >> 3; - qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7; - qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7; - qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3; + qp_attr->pkey_index = MLX5_GET(ads, pri_path, pkey_index); + qp_attr->port_num = MLX5_GET(ads, pri_path, vhca_port_num); + qp_attr->timeout = MLX5_GET(ads, pri_path, ack_timeout); + qp_attr->alt_timeout = MLX5_GET(ads, alt_path, ack_timeout); out: kfree(outb); From f18e26af6aba778b888044859d9c69bb9bbc7bc1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 May 2020 14:54:38 +0300 Subject: [PATCH 511/562] RDMA/mlx5: Convert modify QP to use MLX5_SET macros Instead of hand crafted mlx5_qp_context and mlx5_qp_path use common MLX5_SET() macros. Link: https://lore.kernel.org/r/20200526115440.205922-7-leon@kernel.org Reviewed-by: Maor Gottlieb Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 203 +++++++++++++++----------------- include/linux/mlx5/qp.h | 66 ----------- 2 files changed, 97 insertions(+), 172 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 5099866533dd..a24176a8ec83 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3028,14 +3028,13 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) return 0; } -static int to_mlx5_access_flags(struct mlx5_ib_qp *qp, - const struct ib_qp_attr *attr, - int attr_mask, __be32 *hw_access_flags_be) +static int set_qpc_atomic_flags(struct mlx5_ib_qp *qp, + const struct ib_qp_attr *attr, int attr_mask, + void *qpc) { - u8 dest_rd_atomic; - u32 access_flags, hw_access_flags = 0; - struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device); + u8 dest_rd_atomic; + u32 access_flags; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) dest_rd_atomic = attr->max_dest_rd_atomic; @@ -3050,8 +3049,8 @@ static int to_mlx5_access_flags(struct mlx5_ib_qp *qp, if (!dest_rd_atomic) access_flags &= IB_ACCESS_REMOTE_WRITE; - if (access_flags & IB_ACCESS_REMOTE_READ) - hw_access_flags |= MLX5_QP_BIT_RRE; + MLX5_SET(qpc, qpc, rre, !!(access_flags & IB_ACCESS_REMOTE_READ)); + if (access_flags & IB_ACCESS_REMOTE_ATOMIC) { int atomic_mode; @@ -3059,15 +3058,11 @@ static int to_mlx5_access_flags(struct mlx5_ib_qp *qp, if (atomic_mode < 0) return -EOPNOTSUPP; - hw_access_flags |= MLX5_QP_BIT_RAE; - hw_access_flags |= atomic_mode << MLX5_ATOMIC_MODE_OFFSET; + MLX5_SET(qpc, qpc, rae, 1); + MLX5_SET(qpc, qpc, atomic_mode, atomic_mode); } - if (access_flags & IB_ACCESS_REMOTE_WRITE) - hw_access_flags |= MLX5_QP_BIT_RWE; - - *hw_access_flags_be = cpu_to_be32(hw_access_flags); - + MLX5_SET(qpc, qpc, rwe, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); return 0; } @@ -3147,26 +3142,22 @@ static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev, return err; } -static void mlx5_set_path_udp_sport(struct mlx5_qp_path *path, - const struct rdma_ah_attr *ah, +static void mlx5_set_path_udp_sport(void *path, const struct rdma_ah_attr *ah, u32 lqpn, u32 rqpn) { u32 fl = ah->grh.flow_label; - u16 sport; if (!fl) fl = rdma_calc_flow_label(lqpn, rqpn); - sport = rdma_flow_label_to_udp_sport(fl); - path->udp_sport = cpu_to_be16(sport); + MLX5_SET(ads, path, udp_sport, rdma_flow_label_to_udp_sport(fl)); } static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - const struct rdma_ah_attr *ah, - struct mlx5_qp_path *path, u8 port, int attr_mask, - u32 path_flags, const struct ib_qp_attr *attr, - bool alt) + const struct rdma_ah_attr *ah, void *path, u8 port, + int attr_mask, u32 path_flags, + const struct ib_qp_attr *attr, bool alt) { const struct ib_global_route *grh = rdma_ah_read_grh(ah); int err; @@ -3175,8 +3166,8 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, u8 sl = rdma_ah_get_sl(ah); if (attr_mask & IB_QP_PKEY_INDEX) - path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index : - attr->pkey_index); + MLX5_SET(ads, path, pkey_index, + alt ? attr->alt_pkey_index : attr->pkey_index); if (ah_flags & IB_AH_GRH) { if (grh->sgid_index >= @@ -3192,7 +3183,8 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (!(ah_flags & IB_AH_GRH)) return -EINVAL; - memcpy(path->rmac, ah->roce.dmac, sizeof(ah->roce.dmac)); + ether_addr_copy(MLX5_ADDR_OF(ads, path, rmac_47_32), + ah->roce.dmac); if ((qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || qp->ibqp.qp_type == IB_QPT_XRC_INI || @@ -3202,38 +3194,38 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, mlx5_set_path_udp_sport(path, ah, qp->ibqp.qp_num, attr->dest_qp_num); - path->dci_cfi_prio_sl = (sl & 0x7) << 4; + MLX5_SET(ads, path, eth_prio, sl & 0x7); gid_type = ah->grh.sgid_attr->gid_type; if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) - path->ecn_dscp = (grh->traffic_class >> 2) & 0x3f; + MLX5_SET(ads, path, dscp, grh->traffic_class >> 2); } else { - path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; - path->fl_free_ar |= - (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x40 : 0; - path->rlid = cpu_to_be16(rdma_ah_get_dlid(ah)); - path->grh_mlid = rdma_ah_get_path_bits(ah) & 0x7f; - if (ah_flags & IB_AH_GRH) - path->grh_mlid |= 1 << 7; - path->dci_cfi_prio_sl = sl & 0xf; + MLX5_SET(ads, path, fl, !!(path_flags & MLX5_PATH_FLAG_FL)); + MLX5_SET(ads, path, free_ar, + !!(path_flags & MLX5_PATH_FLAG_FREE_AR)); + MLX5_SET(ads, path, rlid, rdma_ah_get_dlid(ah)); + MLX5_SET(ads, path, mlid, rdma_ah_get_path_bits(ah)); + MLX5_SET(ads, path, grh, !!(ah_flags & IB_AH_GRH)); + MLX5_SET(ads, path, sl, sl); } if (ah_flags & IB_AH_GRH) { - path->mgid_index = grh->sgid_index; - path->hop_limit = grh->hop_limit; - path->tclass_flowlabel = - cpu_to_be32((grh->traffic_class << 20) | - (grh->flow_label)); - memcpy(path->rgid, grh->dgid.raw, 16); + MLX5_SET(ads, path, src_addr_index, grh->sgid_index); + MLX5_SET(ads, path, hop_limit, grh->hop_limit); + MLX5_SET(ads, path, tclass, grh->traffic_class); + MLX5_SET(ads, path, flow_label, grh->flow_label); + memcpy(MLX5_ADDR_OF(ads, path, rgid_rip), grh->dgid.raw, + sizeof(grh->dgid.raw)); } err = ib_rate_to_mlx5(dev, rdma_ah_get_static_rate(ah)); if (err < 0) return err; - path->static_rate = err; - path->port = port; + MLX5_SET(ads, path, stat_rate, err); + MLX5_SET(ads, path, vhca_port_num, port); if (attr_mask & IB_QP_TIMEOUT) - path->ackto_lt = (alt ? attr->alt_timeout : attr->timeout) << 3; + MLX5_SET(ads, path, ack_timeout, + alt ? attr->alt_timeout : attr->timeout); if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt) return modify_raw_packet_eth_prio(dev->mdev, @@ -3759,9 +3751,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_ib_qp_base *base = &qp->trans_qp.base; struct mlx5_ib_cq *send_cq, *recv_cq; - struct mlx5_qp_context *context; struct mlx5_ib_pd *pd; enum mlx5_qp_state mlx5_cur, mlx5_new; + void *qpc, *pri_path, *alt_path; enum mlx5_qp_optpar optpar = 0; u32 set_id = 0; int mlx5_st; @@ -3773,25 +3765,25 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (mlx5_st < 0) return -EINVAL; - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) + qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL); + if (!qpc) return -ENOMEM; pd = to_mpd(qp->ibqp.pd); - context->flags = cpu_to_be32(mlx5_st << 16); + MLX5_SET(qpc, qpc, st, mlx5_st); if (!(attr_mask & IB_QP_PATH_MIG_STATE)) { - context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); } else { switch (attr->path_mig_state) { case IB_MIG_MIGRATED: - context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); break; case IB_MIG_REARM: - context->flags |= cpu_to_be32(MLX5_QP_PM_REARM << 11); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_REARM); break; case IB_MIG_ARMED: - context->flags |= cpu_to_be32(MLX5_QP_PM_ARMED << 11); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_ARMED); break; } } @@ -3799,19 +3791,20 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, tx_affinity = get_tx_affinity(ibqp, attr, attr_mask, cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT, udata); - if (tx_affinity) { - context->flags |= cpu_to_be32(tx_affinity << 24); - if (new_state == IB_QPS_RTR && - MLX5_CAP_GEN(dev->mdev, init2_lag_tx_port_affinity)) - optpar |= MLX5_QP_OPTPAR_LAG_TX_AFF; - } + + MLX5_SET(qpc, qpc, lag_tx_port_affinity, tx_affinity); + if (tx_affinity && new_state == IB_QPS_RTR && + MLX5_CAP_GEN(dev->mdev, init2_lag_tx_port_affinity)) + optpar |= MLX5_QP_OPTPAR_LAG_TX_AFF; if (is_sqp(ibqp->qp_type)) { - context->mtu_msgmax = (IB_MTU_256 << 5) | 8; + MLX5_SET(qpc, qpc, mtu, IB_MTU_256); + MLX5_SET(qpc, qpc, log_msg_max, 8); } else if ((ibqp->qp_type == IB_QPT_UD && !(qp->flags & IB_QP_CREATE_SOURCE_QPN)) || ibqp->qp_type == MLX5_IB_QPT_REG_UMR) { - context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; + MLX5_SET(qpc, qpc, mtu, IB_MTU_4096); + MLX5_SET(qpc, qpc, log_msg_max, 12); } else if (attr_mask & IB_QP_PATH_MTU) { if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { @@ -3819,40 +3812,45 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, err = -EINVAL; goto out; } - context->mtu_msgmax = (attr->path_mtu << 5) | - (u8)MLX5_CAP_GEN(dev->mdev, log_max_msg); + MLX5_SET(qpc, qpc, mtu, attr->path_mtu); + MLX5_SET(qpc, qpc, log_msg_max, + MLX5_CAP_GEN(dev->mdev, log_max_msg)); } if (attr_mask & IB_QP_DEST_QPN) - context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num); + MLX5_SET(qpc, qpc, remote_qpn, attr->dest_qp_num); + + pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path); if (attr_mask & IB_QP_PKEY_INDEX) - context->pri_path.pkey_index = cpu_to_be16(attr->pkey_index); + MLX5_SET(ads, pri_path, pkey_index, attr->pkey_index); /* todo implement counter_index functionality */ if (is_sqp(ibqp->qp_type)) - context->pri_path.port = qp->port; + MLX5_SET(ads, pri_path, vhca_port_num, qp->port); if (attr_mask & IB_QP_PORT) - context->pri_path.port = attr->port_num; + MLX5_SET(ads, pri_path, vhca_port_num, attr->port_num); if (attr_mask & IB_QP_AV) { - err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path, - attr_mask & IB_QP_PORT ? attr->port_num : qp->port, + err = mlx5_set_path(dev, qp, &attr->ah_attr, pri_path, + attr_mask & IB_QP_PORT ? attr->port_num : + qp->port, attr_mask, 0, attr, false); if (err) goto out; } if (attr_mask & IB_QP_TIMEOUT) - context->pri_path.ackto_lt |= attr->timeout << 3; + MLX5_SET(ads, pri_path, ack_timeout, attr->timeout); if (attr_mask & IB_QP_ALT_PATH) { - err = mlx5_set_path(dev, qp, &attr->alt_ah_attr, - &context->alt_path, + err = mlx5_set_path(dev, qp, &attr->alt_ah_attr, alt_path, attr->alt_port_num, - attr_mask | IB_QP_PKEY_INDEX | IB_QP_TIMEOUT, + attr_mask | IB_QP_PKEY_INDEX | + IB_QP_TIMEOUT, 0, attr, true); if (err) goto out; @@ -3861,53 +3859,47 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq, &send_cq, &recv_cq); - context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn); - context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0; - context->cqn_recv = recv_cq ? cpu_to_be32(recv_cq->mcq.cqn) : 0; - context->params1 = cpu_to_be32(MLX5_IB_ACK_REQ_FREQ << 28); + MLX5_SET(qpc, qpc, pd, pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn); + if (send_cq) + MLX5_SET(qpc, qpc, cqn_snd, send_cq->mcq.cqn); + if (recv_cq) + MLX5_SET(qpc, qpc, cqn_rcv, recv_cq->mcq.cqn); + + MLX5_SET(qpc, qpc, log_ack_req_freq, MLX5_IB_ACK_REQ_FREQ); if (attr_mask & IB_QP_RNR_RETRY) - context->params1 |= cpu_to_be32(attr->rnr_retry << 13); + MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry); if (attr_mask & IB_QP_RETRY_CNT) - context->params1 |= cpu_to_be32(attr->retry_cnt << 16); + MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt); - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { - if (attr->max_rd_atomic) - context->params1 |= - cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21); - } + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic) + MLX5_SET(qpc, qpc, log_sra_max, ilog2(attr->max_rd_atomic)); if (attr_mask & IB_QP_SQ_PSN) - context->next_send_psn = cpu_to_be32(attr->sq_psn); + MLX5_SET(qpc, qpc, next_send_psn, attr->sq_psn); - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { - if (attr->max_dest_rd_atomic) - context->params2 |= - cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); - } + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic) + MLX5_SET(qpc, qpc, log_rra_max, + ilog2(attr->max_dest_rd_atomic)); if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) { - __be32 access_flags; - - err = to_mlx5_access_flags(qp, attr, attr_mask, &access_flags); + err = set_qpc_atomic_flags(qp, attr, attr_mask, qpc); if (err) goto out; - - context->params2 |= access_flags; } if (attr_mask & IB_QP_MIN_RNR_TIMER) - context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); + MLX5_SET(qpc, qpc, min_rnr_nak, attr->min_rnr_timer); if (attr_mask & IB_QP_RQ_PSN) - context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); + MLX5_SET(qpc, qpc, next_rcv_psn, attr->rq_psn); if (attr_mask & IB_QP_QKEY) - context->qkey = cpu_to_be32(attr->qkey); + MLX5_SET(qpc, qpc, q_key, attr->qkey); if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) - context->db_rec_addr = cpu_to_be64(qp->db.dma); + MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma); if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num : @@ -3921,15 +3913,14 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, set_id = ibqp->counter->id; else set_id = mlx5_ib_get_counters_id(dev, port_num); - context->qp_counter_set_usr_page |= - cpu_to_be32(set_id << 24); + MLX5_SET(qpc, qpc, counter_set_id, set_id); } if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) - context->sq_crq_size |= cpu_to_be16(1 << 4); + MLX5_SET(qpc, qpc, rlky, 1); if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1) - context->deth_sqpn = cpu_to_be32(1); + MLX5_SET(qpc, qpc, deth_sqpn, 1); mlx5_cur = to_mlx5_state(cur_state); mlx5_new = to_mlx5_state(new_state); @@ -3987,7 +3978,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity); } else { - err = mlx5_core_qp_modify(dev, op, optpar, context, &base->mqp); + err = mlx5_core_qp_modify(dev, op, optpar, qpc, &base->mqp); } if (err) @@ -4034,7 +4025,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } out: - kfree(context); + kfree(qpc); return err; } diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index def601199a1a..b8992b861ae6 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -495,72 +495,6 @@ struct mlx5_core_dct { struct completion drained; }; -struct mlx5_qp_path { - u8 fl_free_ar; - u8 rsvd3; - __be16 pkey_index; - u8 rsvd0; - u8 grh_mlid; - __be16 rlid; - u8 ackto_lt; - u8 mgid_index; - u8 static_rate; - u8 hop_limit; - __be32 tclass_flowlabel; - union { - u8 rgid[16]; - u8 rip[16]; - }; - u8 f_dscp_ecn_prio; - u8 ecn_dscp; - __be16 udp_sport; - u8 dci_cfi_prio_sl; - u8 port; - u8 rmac[6]; -}; - -/* FIXME: use mlx5_ifc.h qpc */ -struct mlx5_qp_context { - __be32 flags; - __be32 flags_pd; - u8 mtu_msgmax; - u8 rq_size_stride; - __be16 sq_crq_size; - __be32 qp_counter_set_usr_page; - __be32 wire_qpn; - __be32 log_pg_sz_remote_qpn; - struct mlx5_qp_path pri_path; - struct mlx5_qp_path alt_path; - __be32 params1; - u8 reserved2[4]; - __be32 next_send_psn; - __be32 cqn_send; - __be32 deth_sqpn; - u8 reserved3[4]; - __be32 last_acked_psn; - __be32 ssn; - __be32 params2; - __be32 rnr_nextrecvpsn; - __be32 xrcd; - __be32 cqn_recv; - __be64 db_rec_addr; - __be32 qkey; - __be32 rq_type_srqn; - __be32 rmsn; - __be16 hw_sq_wqe_counter; - __be16 sw_sq_wqe_counter; - __be16 hw_rcyclic_byte_counter; - __be16 hw_rq_counter; - __be16 sw_rcyclic_byte_counter; - __be16 sw_rq_counter; - u8 rsvd0[5]; - u8 cgs; - u8 cs_req; - u8 cs_res; - __be64 dc_access_key; - u8 rsvd1[24]; -}; - int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); From 5f62a521ff20e0b47a8d33421334bd245d6714ff Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 May 2020 14:54:39 +0300 Subject: [PATCH 512/562] RDMA/mlx5: Set ECE options during modify QP The most common way to set ECE option will be during modify QP command in INIT2RTR, RTR2RTS and RTS2RTS stages, so update mlx5 to support it. The new bit in the comp_mask is needed to mark that kernel supports ECE and can receive data instead of "reserved" field in the struct mlx5_ib_modify_qp. Link: https://lore.kernel.org/r/20200526115440.205922-8-leon@kernel.org Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 3 +++ drivers/infiniband/hw/mlx5/qp.c | 21 ++++++++++++--------- drivers/infiniband/hw/mlx5/qp.h | 2 +- drivers/infiniband/hw/mlx5/qpc.c | 11 +++++++---- include/uapi/rdma/mlx5-abi.h | 3 ++- 5 files changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 623d7898ae6d..6557c8339161 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1959,6 +1959,9 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, resp.response_length += sizeof(resp.dump_fill_mkey); } + if (MLX5_CAP_GEN(dev->mdev, ece_support)) + resp.comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE; + err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) goto out_mdev; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a24176a8ec83..bfa0f7e43e3b 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2353,7 +2353,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET && !(qp->flags & IB_QP_CREATE_SOURCE_QPN)) { err = mlx5_core_qp_modify(dev, MLX5_CMD_OP_2RST_QP, 0, - NULL, &base->mqp); + NULL, &base->mqp, NULL); } else { struct mlx5_modify_raw_qp_param raw_qp_param = { .operation = MLX5_CMD_OP_2RST_QP @@ -3978,7 +3978,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity); } else { - err = mlx5_core_qp_modify(dev, op, optpar, qpc, &base->mqp); + u32 ece = MLX5_CAP_GEN(dev->mdev, ece_support) ? + ucmd->ece_options : 0; + err = mlx5_core_qp_modify(dev, op, optpar, qpc, &base->mqp, + &ece); } if (err) @@ -4131,7 +4134,6 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1); MLX5_SET(dctc, dctc, counter_set_id, set_id); - } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { struct mlx5_ib_modify_qp_resp resp = {}; u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {0}; @@ -4182,7 +4184,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx5_ib_modify_qp ucmd = {}; enum ib_qp_type qp_type; enum ib_qp_state cur_state, new_state; - size_t required_cmd_sz; int err = -EINVAL; int port; @@ -4190,9 +4191,7 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, return -ENOSYS; if (udata && udata->inlen) { - required_cmd_sz = offsetof(typeof(ucmd), reserved) + - sizeof(ucmd.reserved); - if (udata->inlen < required_cmd_sz) + if (udata->inlen < offsetofend(typeof(ucmd), ece_options)) return -EINVAL; if (udata->inlen > sizeof(ucmd) && @@ -4205,10 +4204,10 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, return -EFAULT; if (ucmd.comp_mask || - memchr_inv(&ucmd.reserved, 0, sizeof(ucmd.reserved)) || memchr_inv(&ucmd.burst_info.reserved, 0, sizeof(ucmd.burst_info.reserved))) return -EOPNOTSUPP; + } if (unlikely(ibqp->qp_type == IB_QPT_GSI)) @@ -4217,8 +4216,12 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ? IB_QPT_GSI : qp->type; - if (qp_type == MLX5_IB_QPT_DCT) + if (qp_type == MLX5_IB_QPT_DCT) { + if (memchr_inv(&ucmd.ece_options, 0, sizeof(ucmd.ece_options))) + return -EOPNOTSUPP; + return mlx5_ib_modify_dct(ibqp, attr, attr_mask, udata); + } mutex_lock(&qp->mutex); diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h index 795c21f88962..82ea2b94dfa6 100644 --- a/drivers/infiniband/hw/mlx5/qp.h +++ b/drivers/infiniband/hw/mlx5/qp.h @@ -16,7 +16,7 @@ int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *qp, int mlx5_qpc_create_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp, u32 *in, int inlen, u32 *out); int mlx5_core_qp_modify(struct mlx5_ib_dev *dev, u16 opcode, u32 opt_param_mask, - void *qpc, struct mlx5_core_qp *qp); + void *qpc, struct mlx5_core_qp *qp, u32 *ece); int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp); int mlx5_core_destroy_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct); int mlx5_core_qp_query(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp, diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index 69c80859a6ee..d61bc1a88925 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -343,7 +343,7 @@ static void mbox_free(struct mbox_info *mbox) static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, u32 opt_param_mask, void *qpc, - struct mbox_info *mbox, u16 uid) + struct mbox_info *mbox, u16 uid, u32 ece) { mbox->out = NULL; mbox->in = NULL; @@ -391,18 +391,21 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, return -ENOMEM; MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn, opt_param_mask, qpc, uid); + MLX5_SET(init2rtr_qp_in, mbox->in, ece, ece); break; case MLX5_CMD_OP_RTR2RTS_QP: if (MBOX_ALLOC(mbox, rtr2rts_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn, opt_param_mask, qpc, uid); + MLX5_SET(rtr2rts_qp_in, mbox->in, ece, ece); break; case MLX5_CMD_OP_RTS2RTS_QP: if (MBOX_ALLOC(mbox, rts2rts_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn, opt_param_mask, qpc, uid); + MLX5_SET(rts2rts_qp_in, mbox->in, ece, ece); break; case MLX5_CMD_OP_SQERR2RTS_QP: if (MBOX_ALLOC(mbox, sqerr2rts_qp)) @@ -423,13 +426,13 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, } int mlx5_core_qp_modify(struct mlx5_ib_dev *dev, u16 opcode, u32 opt_param_mask, - void *qpc, struct mlx5_core_qp *qp) + void *qpc, struct mlx5_core_qp *qp, u32 *ece) { struct mbox_info mbox; int err; - err = modify_qp_mbox_alloc(dev->mdev, opcode, qp->qpn, - opt_param_mask, qpc, &mbox, qp->uid); + err = modify_qp_mbox_alloc(dev->mdev, opcode, qp->qpn, opt_param_mask, + qpc, &mbox, qp->uid, (ece) ? *ece : 0); if (err) return err; diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index bc9d9e3cb369..24e29a678177 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -100,6 +100,7 @@ struct mlx5_ib_alloc_ucontext_req_v2 { enum mlx5_ib_alloc_ucontext_resp_mask { MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0, MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY = 1UL << 1, + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE = 1UL << 2, }; enum mlx5_user_cmds_supp_uhw { @@ -422,7 +423,7 @@ struct mlx5_ib_burst_info { struct mlx5_ib_modify_qp { __u32 comp_mask; struct mlx5_ib_burst_info burst_info; - __u32 reserved; + __u32 ece_options; }; struct mlx5_ib_modify_qp_resp { From 50aec2c3135efd985291adc2e4d1278d52b03de9 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 May 2020 14:54:40 +0300 Subject: [PATCH 513/562] RDMA/mlx5: Return ECE data after modify QP After users sets the ECE option, FW will return the agreed/supported bits through an output structures of modify QP stages for regular QPs or through create QP for the DCT. Link: https://lore.kernel.org/r/20200526115440.205922-9-leon@kernel.org Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 25 +++++++++++++++++++++---- drivers/infiniband/hw/mlx5/qpc.c | 25 +++++++++++++++++++++++++ include/uapi/rdma/mlx5-abi.h | 2 ++ 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index bfa0f7e43e3b..1988a0375696 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3708,6 +3708,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, enum ib_qp_state cur_state, enum ib_qp_state new_state, const struct mlx5_ib_modify_qp *ucmd, + struct mlx5_ib_modify_qp_resp *resp, struct ib_udata *udata) { static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = { @@ -3978,10 +3979,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity); } else { - u32 ece = MLX5_CAP_GEN(dev->mdev, ece_support) ? - ucmd->ece_options : 0; + if (udata) { + /* For the kernel flows, the resp will stay zero */ + resp->ece_options = + MLX5_CAP_GEN(dev->mdev, ece_support) ? + ucmd->ece_options : 0; + resp->response_length = sizeof(*resp); + } err = mlx5_core_qp_modify(dev, op, optpar, qpc, &base->mqp, - &ece); + &resp->ece_options); } if (err) @@ -4180,6 +4186,7 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_ib_modify_qp_resp resp = {}; struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_ib_modify_qp ucmd = {}; enum ib_qp_type qp_type; @@ -4292,7 +4299,17 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, - new_state, &ucmd, udata); + new_state, &ucmd, &resp, udata); + + /* resp.response_length is set in ECE supported flows only */ + if (!err && resp.response_length && + udata->outlen >= resp.response_length) + /* + * We don't check return value of the function below + * on purpose, because it is unclear how to unwind the + * error flow after QP was modified to the new state. + */ + ib_copy_to_udata(udata, &resp, resp.response_length); out: mutex_unlock(&qp->mutex); diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index d61bc1a88925..c19d91d6dce8 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -341,6 +341,27 @@ static void mbox_free(struct mbox_info *mbox) kfree(mbox->out); } +static int get_ece_from_mbox(void *out, u16 opcode) +{ + int ece = 0; + + switch (opcode) { + case MLX5_CMD_OP_INIT2RTR_QP: + ece = MLX5_GET(init2rtr_qp_out, out, ece); + break; + case MLX5_CMD_OP_RTR2RTS_QP: + ece = MLX5_GET(rtr2rts_qp_out, out, ece); + break; + case MLX5_CMD_OP_RTS2RTS_QP: + ece = MLX5_GET(rts2rts_qp_out, out, ece); + break; + default: + break; + } + + return ece; +} + static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, u32 opt_param_mask, void *qpc, struct mbox_info *mbox, u16 uid, u32 ece) @@ -438,6 +459,10 @@ int mlx5_core_qp_modify(struct mlx5_ib_dev *dev, u16 opcode, u32 opt_param_mask, err = mlx5_cmd_exec(dev->mdev, mbox.in, mbox.inlen, mbox.out, mbox.outlen); + + if (ece) + *ece = get_ece_from_mbox(mbox.out, opcode); + mbox_free(&mbox); return err; } diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 24e29a678177..27905a0268c9 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -429,6 +429,8 @@ struct mlx5_ib_modify_qp { struct mlx5_ib_modify_qp_resp { __u32 response_length; __u32 dctn; + __u32 ece_options; + __u32 reserved; }; struct mlx5_ib_create_wq_resp { From 2a556ce779e39b15cbb74e896ca640e86baeb1a1 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 27 May 2020 18:25:56 -0500 Subject: [PATCH 514/562] ipmi:ssif: Remove dynamic platform device handing Platform devices can only come in through the DMI interface, and that will get done before initialization is complete. Therefore there is no reason to hande getting a device in new_ssif_client after initialization. Dynamic entries can still come in through the i2c interfaces, but that's handled differently. Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_ssif.c | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 6fffeb241f45..198b65d45c5e 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -189,8 +189,6 @@ struct ssif_addr_info { struct device *dev; struct i2c_client *client; - struct i2c_client *added_client; - struct mutex clients_mutex; struct list_head clients; @@ -1941,21 +1939,6 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) goto out; } -static int ssif_adapter_handler(struct device *adev, void *opaque) -{ - struct ssif_addr_info *addr_info = opaque; - - if (adev->type != &i2c_adapter_type) - return 0; - - addr_info->added_client = i2c_new_client_device(to_i2c_adapter(adev), - &addr_info->binfo); - - if (!addr_info->adapter_name) - return 1; /* Only try the first I2C adapter by default. */ - return 0; -} - static int new_ssif_client(int addr, char *adapter_name, int debug, int slave_addr, enum ipmi_addr_src addr_src, @@ -1999,9 +1982,7 @@ static int new_ssif_client(int addr, char *adapter_name, list_add_tail(&addr_info->link, &ssif_infos); - if (initialized) - i2c_for_each_dev(addr_info, ssif_adapter_handler); - /* Otherwise address list will get it */ + /* Address list will get it */ out_unlock: mutex_unlock(&ssif_infos_mutex); @@ -2121,8 +2102,6 @@ static int ssif_platform_remove(struct platform_device *dev) return 0; mutex_lock(&ssif_infos_mutex); - i2c_unregister_device(addr_info->added_client); - list_del(&addr_info->link); kfree(addr_info); mutex_unlock(&ssif_infos_mutex); From a8173820f441ab3e2a45c4bb66b70da9a57a349e Mon Sep 17 00:00:00 2001 From: Maulik Shah Date: Sat, 23 May 2020 22:41:10 +0530 Subject: [PATCH 515/562] gpio: gpiolib: Allow GPIO IRQs to lazy disable With 'commit 461c1a7d4733 ("gpiolib: override irq_enable/disable")' gpiolib overrides irqchip's irq_enable and irq_disable callbacks. If irq_disable callback is implemented then genirq takes unlazy path to disable irq. Underlying irqchip may not want to implement irq_disable callback to lazy disable irq when client drivers invokes disable_irq(). By overriding irq_disable callback, gpiolib ends up always unlazy disabling IRQ. Allow gpiolib to lazy disable IRQs by overriding irq_disable callback only if irqchip implemented irq_disable. In cases where irq_disable is not implemented irq_mask is overridden. Similarly override irq_enable callback only if irqchip implemented irq_enable otherwise irq_unmask is overridden. Fixes: 461c1a7d4733 ("gpiolib: override irq_enable/disable") Signed-off-by: Maulik Shah Tested-by: Hans Verkuil Link: https://lore.kernel.org/r/1590253873-11556-2-git-send-email-mkshah@codeaurora.org Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 55 ++++++++++++++++++++++++------------- include/linux/gpio/driver.h | 13 +++++++++ 2 files changed, 49 insertions(+), 19 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 34259c1e78ca..167860684be6 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2471,32 +2471,37 @@ static void gpiochip_irq_relres(struct irq_data *d) gpiochip_relres_irq(gc, d->hwirq); } +static void gpiochip_irq_mask(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + + if (gc->irq.irq_mask) + gc->irq.irq_mask(d); + gpiochip_disable_irq(gc, d->hwirq); +} + +static void gpiochip_irq_unmask(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + + gpiochip_enable_irq(gc, d->hwirq); + if (gc->irq.irq_unmask) + gc->irq.irq_unmask(d); +} + static void gpiochip_irq_enable(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); gpiochip_enable_irq(gc, d->hwirq); - if (gc->irq.irq_enable) - gc->irq.irq_enable(d); - else - gc->irq.chip->irq_unmask(d); + gc->irq.irq_enable(d); } static void gpiochip_irq_disable(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); - /* - * Since we override .irq_disable() we need to mimic the - * behaviour of __irq_disable() in irq/chip.c. - * First call .irq_disable() if it exists, else mimic the - * behaviour of mask_irq() which calls .irq_mask() if - * it exists. - */ - if (gc->irq.irq_disable) - gc->irq.irq_disable(d); - else if (gc->irq.chip->irq_mask) - gc->irq.chip->irq_mask(d); + gc->irq.irq_disable(d); gpiochip_disable_irq(gc, d->hwirq); } @@ -2521,10 +2526,22 @@ static void gpiochip_set_irq_hooks(struct gpio_chip *gc) "detected irqchip that is shared with multiple gpiochips: please fix the driver.\n"); return; } - gc->irq.irq_enable = irqchip->irq_enable; - gc->irq.irq_disable = irqchip->irq_disable; - irqchip->irq_enable = gpiochip_irq_enable; - irqchip->irq_disable = gpiochip_irq_disable; + + if (irqchip->irq_disable) { + gc->irq.irq_disable = irqchip->irq_disable; + irqchip->irq_disable = gpiochip_irq_disable; + } else { + gc->irq.irq_mask = irqchip->irq_mask; + irqchip->irq_mask = gpiochip_irq_mask; + } + + if (irqchip->irq_enable) { + gc->irq.irq_enable = irqchip->irq_enable; + irqchip->irq_enable = gpiochip_irq_enable; + } else { + gc->irq.irq_unmask = irqchip->irq_unmask; + irqchip->irq_unmask = gpiochip_irq_unmask; + } } /** diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 8c41ae41b6bb..c8bcaf315d03 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -253,6 +253,19 @@ struct gpio_irq_chip { * Store old irq_chip irq_disable callback */ void (*irq_disable)(struct irq_data *data); + /** + * @irq_unmask: + * + * Store old irq_chip irq_unmask callback + */ + void (*irq_unmask)(struct irq_data *data); + + /** + * @irq_mask: + * + * Store old irq_chip irq_mask callback + */ + void (*irq_mask)(struct irq_data *data); }; /** From a8a24f3f6e38103b77cf399c38eb54e1219d00d6 Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Fri, 29 May 2020 02:00:47 +0530 Subject: [PATCH 516/562] vfio: UAPI for migration interface for device state - Defined MIGRATION region type and sub-type. - Defined vfio_device_migration_info structure which will be placed at the 0th offset of migration region to get/set VFIO device related information. Defined members of structure and usage on read/write access. - Defined device states and state transition details. - Defined sequence to be followed while saving and resuming VFIO device. Signed-off-by: Kirti Wankhede Reviewed-by: Neo Jia Reviewed-by: Cornelia Huck Reviewed-by: Yan Zhao Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 228 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 228 insertions(+) diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 015516bcfaa3..ad9bb5af3463 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -305,6 +305,7 @@ struct vfio_region_info_cap_type { #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) #define VFIO_REGION_TYPE_GFX (1) #define VFIO_REGION_TYPE_CCW (2) +#define VFIO_REGION_TYPE_MIGRATION (3) /* sub-types for VFIO_REGION_TYPE_PCI_* */ @@ -379,6 +380,233 @@ struct vfio_region_gfx_edid { /* sub-types for VFIO_REGION_TYPE_CCW */ #define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1) +/* sub-types for VFIO_REGION_TYPE_MIGRATION */ +#define VFIO_REGION_SUBTYPE_MIGRATION (1) + +/* + * The structure vfio_device_migration_info is placed at the 0th offset of + * the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related + * migration information. Field accesses from this structure are only supported + * at their native width and alignment. Otherwise, the result is undefined and + * vendor drivers should return an error. + * + * device_state: (read/write) + * - The user application writes to this field to inform the vendor driver + * about the device state to be transitioned to. + * - The vendor driver should take the necessary actions to change the + * device state. After successful transition to a given state, the + * vendor driver should return success on write(device_state, state) + * system call. If the device state transition fails, the vendor driver + * should return an appropriate -errno for the fault condition. + * - On the user application side, if the device state transition fails, + * that is, if write(device_state, state) returns an error, read + * device_state again to determine the current state of the device from + * the vendor driver. + * - The vendor driver should return previous state of the device unless + * the vendor driver has encountered an internal error, in which case + * the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR. + * - The user application must use the device reset ioctl to recover the + * device from VFIO_DEVICE_STATE_ERROR state. If the device is + * indicated to be in a valid device state by reading device_state, the + * user application may attempt to transition the device to any valid + * state reachable from the current state or terminate itself. + * + * device_state consists of 3 bits: + * - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear, + * it indicates the _STOP state. When the device state is changed to + * _STOP, driver should stop the device before write() returns. + * - If bit 1 is set, it indicates the _SAVING state, which means that the + * driver should start gathering device state information that will be + * provided to the VFIO user application to save the device's state. + * - If bit 2 is set, it indicates the _RESUMING state, which means that + * the driver should prepare to resume the device. Data provided through + * the migration region should be used to resume the device. + * Bits 3 - 31 are reserved for future use. To preserve them, the user + * application should perform a read-modify-write operation on this + * field when modifying the specified bits. + * + * +------- _RESUMING + * |+------ _SAVING + * ||+----- _RUNNING + * ||| + * 000b => Device Stopped, not saving or resuming + * 001b => Device running, which is the default state + * 010b => Stop the device & save the device state, stop-and-copy state + * 011b => Device running and save the device state, pre-copy state + * 100b => Device stopped and the device state is resuming + * 101b => Invalid state + * 110b => Error state + * 111b => Invalid state + * + * State transitions: + * + * _RESUMING _RUNNING Pre-copy Stop-and-copy _STOP + * (100b) (001b) (011b) (010b) (000b) + * 0. Running or default state + * | + * + * 1. Normal Shutdown (optional) + * |------------------------------------->| + * + * 2. Save the state or suspend + * |------------------------->|---------->| + * + * 3. Save the state during live migration + * |----------->|------------>|---------->| + * + * 4. Resuming + * |<---------| + * + * 5. Resumed + * |--------->| + * + * 0. Default state of VFIO device is _RUNNNG when the user application starts. + * 1. During normal shutdown of the user application, the user application may + * optionally change the VFIO device state from _RUNNING to _STOP. This + * transition is optional. The vendor driver must support this transition but + * must not require it. + * 2. When the user application saves state or suspends the application, the + * device state transitions from _RUNNING to stop-and-copy and then to _STOP. + * On state transition from _RUNNING to stop-and-copy, driver must stop the + * device, save the device state and send it to the application through the + * migration region. The sequence to be followed for such transition is given + * below. + * 3. In live migration of user application, the state transitions from _RUNNING + * to pre-copy, to stop-and-copy, and to _STOP. + * On state transition from _RUNNING to pre-copy, the driver should start + * gathering the device state while the application is still running and send + * the device state data to application through the migration region. + * On state transition from pre-copy to stop-and-copy, the driver must stop + * the device, save the device state and send it to the user application + * through the migration region. + * Vendor drivers must support the pre-copy state even for implementations + * where no data is provided to the user before the stop-and-copy state. The + * user must not be required to consume all migration data before the device + * transitions to a new state, including the stop-and-copy state. + * The sequence to be followed for above two transitions is given below. + * 4. To start the resuming phase, the device state should be transitioned from + * the _RUNNING to the _RESUMING state. + * In the _RESUMING state, the driver should use the device state data + * received through the migration region to resume the device. + * 5. After providing saved device data to the driver, the application should + * change the state from _RESUMING to _RUNNING. + * + * reserved: + * Reads on this field return zero and writes are ignored. + * + * pending_bytes: (read only) + * The number of pending bytes still to be migrated from the vendor driver. + * + * data_offset: (read only) + * The user application should read data_offset field from the migration + * region. The user application should read the device data from this + * offset within the migration region during the _SAVING state or write + * the device data during the _RESUMING state. See below for details of + * sequence to be followed. + * + * data_size: (read/write) + * The user application should read data_size to get the size in bytes of + * the data copied in the migration region during the _SAVING state and + * write the size in bytes of the data copied in the migration region + * during the _RESUMING state. + * + * The format of the migration region is as follows: + * ------------------------------------------------------------------ + * |vfio_device_migration_info| data section | + * | | /////////////////////////////// | + * ------------------------------------------------------------------ + * ^ ^ + * offset 0-trapped part data_offset + * + * The structure vfio_device_migration_info is always followed by the data + * section in the region, so data_offset will always be nonzero. The offset + * from where the data is copied is decided by the kernel driver. The data + * section can be trapped, mmapped, or partitioned, depending on how the kernel + * driver defines the data section. The data section partition can be defined + * as mapped by the sparse mmap capability. If mmapped, data_offset must be + * page aligned, whereas initial section which contains the + * vfio_device_migration_info structure, might not end at the offset, which is + * page aligned. The user is not required to access through mmap regardless + * of the capabilities of the region mmap. + * The vendor driver should determine whether and how to partition the data + * section. The vendor driver should return data_offset accordingly. + * + * The sequence to be followed while in pre-copy state and stop-and-copy state + * is as follows: + * a. Read pending_bytes, indicating the start of a new iteration to get device + * data. Repeated read on pending_bytes at this stage should have no side + * effects. + * If pending_bytes == 0, the user application should not iterate to get data + * for that device. + * If pending_bytes > 0, perform the following steps. + * b. Read data_offset, indicating that the vendor driver should make data + * available through the data section. The vendor driver should return this + * read operation only after data is available from (region + data_offset) + * to (region + data_offset + data_size). + * c. Read data_size, which is the amount of data in bytes available through + * the migration region. + * Read on data_offset and data_size should return the offset and size of + * the current buffer if the user application reads data_offset and + * data_size more than once here. + * d. Read data_size bytes of data from (region + data_offset) from the + * migration region. + * e. Process the data. + * f. Read pending_bytes, which indicates that the data from the previous + * iteration has been read. If pending_bytes > 0, go to step b. + * + * The user application can transition from the _SAVING|_RUNNING + * (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the + * number of pending bytes. The user application should iterate in _SAVING + * (stop-and-copy) until pending_bytes is 0. + * + * The sequence to be followed while _RESUMING device state is as follows: + * While data for this device is available, repeat the following steps: + * a. Read data_offset from where the user application should write data. + * b. Write migration data starting at the migration region + data_offset for + * the length determined by data_size from the migration source. + * c. Write data_size, which indicates to the vendor driver that data is + * written in the migration region. Vendor driver must return this write + * operations on consuming data. Vendor driver should apply the + * user-provided migration region data to the device resume state. + * + * If an error occurs during the above sequences, the vendor driver can return + * an error code for next read() or write() operation, which will terminate the + * loop. The user application should then take the next necessary action, for + * example, failing migration or terminating the user application. + * + * For the user application, data is opaque. The user application should write + * data in the same order as the data is received and the data should be of + * same transaction size at the source. + */ + +struct vfio_device_migration_info { + __u32 device_state; /* VFIO device state */ +#define VFIO_DEVICE_STATE_STOP (0) +#define VFIO_DEVICE_STATE_RUNNING (1 << 0) +#define VFIO_DEVICE_STATE_SAVING (1 << 1) +#define VFIO_DEVICE_STATE_RESUMING (1 << 2) +#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \ + VFIO_DEVICE_STATE_SAVING | \ + VFIO_DEVICE_STATE_RESUMING) + +#define VFIO_DEVICE_STATE_VALID(state) \ + (state & VFIO_DEVICE_STATE_RESUMING ? \ + (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1) + +#define VFIO_DEVICE_STATE_IS_ERROR(state) \ + ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \ + VFIO_DEVICE_STATE_RESUMING)) + +#define VFIO_DEVICE_STATE_SET_ERROR(state) \ + ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \ + VFIO_DEVICE_STATE_RESUMING) + + __u32 reserved; + __u64 pending_bytes; + __u64 data_offset; + __u64 data_size; +}; + /* * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped * which allows direct access to non-MSIX registers which happened to be within From 6581708586c812460ea4798216d9dd2897ad105c Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Fri, 29 May 2020 02:00:48 +0530 Subject: [PATCH 517/562] vfio iommu: Remove atomicity of ref_count of pinned pages vfio_pfn.ref_count is always updated while holding iommu->lock, using atomic variable is overkill. Signed-off-by: Kirti Wankhede Reviewed-by: Neo Jia Reviewed-by: Eric Auger Reviewed-by: Cornelia Huck Reviewed-by: Yan Zhao Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 4a4cb7cd86b2..e0de3337fa2a 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -112,7 +112,7 @@ struct vfio_pfn { struct rb_node node; dma_addr_t iova; /* Device address */ unsigned long pfn; /* Host pfn */ - atomic_t ref_count; + unsigned int ref_count; }; struct vfio_regions { @@ -233,7 +233,7 @@ static int vfio_add_to_pfn_list(struct vfio_dma *dma, dma_addr_t iova, vpfn->iova = iova; vpfn->pfn = pfn; - atomic_set(&vpfn->ref_count, 1); + vpfn->ref_count = 1; vfio_link_pfn(dma, vpfn); return 0; } @@ -251,7 +251,7 @@ static struct vfio_pfn *vfio_iova_get_vfio_pfn(struct vfio_dma *dma, struct vfio_pfn *vpfn = vfio_find_vpfn(dma, iova); if (vpfn) - atomic_inc(&vpfn->ref_count); + vpfn->ref_count++; return vpfn; } @@ -259,7 +259,8 @@ static int vfio_iova_put_vfio_pfn(struct vfio_dma *dma, struct vfio_pfn *vpfn) { int ret = 0; - if (atomic_dec_and_test(&vpfn->ref_count)) { + vpfn->ref_count--; + if (!vpfn->ref_count) { ret = put_pfn(vpfn->pfn, dma->prot); vfio_remove_from_pfn_list(dma, vpfn); } From cade075f265b25c3d6c13486e1bf0fbed9935a4a Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Fri, 29 May 2020 02:00:49 +0530 Subject: [PATCH 518/562] vfio iommu: Cache pgsize_bitmap in struct vfio_iommu Calculate and cache pgsize_bitmap when iommu->domain_list is updated and iommu->external_domain is set for mdev device. Add iommu->lock protection when cached pgsize_bitmap is accessed. Signed-off-by: Kirti Wankhede Reviewed-by: Neo Jia Reviewed-by: Cornelia Huck Reviewed-by: Yan Zhao Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 88 ++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 39 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index e0de3337fa2a..664ff166b4c0 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -69,6 +69,7 @@ struct vfio_iommu { struct rb_root dma_list; struct blocking_notifier_head notifier; unsigned int dma_avail; + uint64_t pgsize_bitmap; bool v2; bool nesting; }; @@ -835,15 +836,14 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) iommu->dma_avail++; } -static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu) +static void vfio_update_pgsize_bitmap(struct vfio_iommu *iommu) { struct vfio_domain *domain; - unsigned long bitmap = ULONG_MAX; - mutex_lock(&iommu->lock); + iommu->pgsize_bitmap = ULONG_MAX; + list_for_each_entry(domain, &iommu->domain_list, next) - bitmap &= domain->domain->pgsize_bitmap; - mutex_unlock(&iommu->lock); + iommu->pgsize_bitmap &= domain->domain->pgsize_bitmap; /* * In case the IOMMU supports page sizes smaller than PAGE_SIZE @@ -853,12 +853,10 @@ static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu) * granularity while iommu driver can use the sub-PAGE_SIZE size * to map the buffer. */ - if (bitmap & ~PAGE_MASK) { - bitmap &= PAGE_MASK; - bitmap |= PAGE_SIZE; + if (iommu->pgsize_bitmap & ~PAGE_MASK) { + iommu->pgsize_bitmap &= PAGE_MASK; + iommu->pgsize_bitmap |= PAGE_SIZE; } - - return bitmap; } static int vfio_dma_do_unmap(struct vfio_iommu *iommu, @@ -869,19 +867,28 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, size_t unmapped = 0; int ret = 0, retries = 0; - mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1; + mutex_lock(&iommu->lock); + + mask = ((uint64_t)1 << __ffs(iommu->pgsize_bitmap)) - 1; + + if (unmap->iova & mask) { + ret = -EINVAL; + goto unlock; + } + + if (!unmap->size || unmap->size & mask) { + ret = -EINVAL; + goto unlock; + } - if (unmap->iova & mask) - return -EINVAL; - if (!unmap->size || unmap->size & mask) - return -EINVAL; if (unmap->iova + unmap->size - 1 < unmap->iova || - unmap->size > SIZE_MAX) - return -EINVAL; + unmap->size > SIZE_MAX) { + ret = -EINVAL; + goto unlock; + } WARN_ON(mask & PAGE_MASK); again: - mutex_lock(&iommu->lock); /* * vfio-iommu-type1 (v1) - User mappings were coalesced together to @@ -960,6 +967,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, blocking_notifier_call_chain(&iommu->notifier, VFIO_IOMMU_NOTIFY_DMA_UNMAP, &nb_unmap); + mutex_lock(&iommu->lock); goto again; } unmapped += dma->size; @@ -1075,24 +1083,28 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, if (map->size != size || map->vaddr != vaddr || map->iova != iova) return -EINVAL; - mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1; - - WARN_ON(mask & PAGE_MASK); - /* READ/WRITE from device perspective */ if (map->flags & VFIO_DMA_MAP_FLAG_WRITE) prot |= IOMMU_WRITE; if (map->flags & VFIO_DMA_MAP_FLAG_READ) prot |= IOMMU_READ; - if (!prot || !size || (size | iova | vaddr) & mask) - return -EINVAL; + mutex_lock(&iommu->lock); + + mask = ((uint64_t)1 << __ffs(iommu->pgsize_bitmap)) - 1; + + WARN_ON(mask & PAGE_MASK); + + if (!prot || !size || (size | iova | vaddr) & mask) { + ret = -EINVAL; + goto out_unlock; + } /* Don't allow IOVA or virtual address wrap */ - if (iova + size - 1 < iova || vaddr + size - 1 < vaddr) - return -EINVAL; - - mutex_lock(&iommu->lock); + if (iova + size - 1 < iova || vaddr + size - 1 < vaddr) { + ret = -EINVAL; + goto out_unlock; + } if (vfio_find_dma(iommu, iova, size)) { ret = -EEXIST; @@ -1698,6 +1710,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, if (!iommu->external_domain) { INIT_LIST_HEAD(&domain->group_list); iommu->external_domain = domain; + vfio_update_pgsize_bitmap(iommu); } else { kfree(domain); } @@ -1823,6 +1836,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, } list_add(&domain->next, &iommu->domain_list); + vfio_update_pgsize_bitmap(iommu); done: /* Delete the old one and insert new iova list */ vfio_iommu_iova_insert_copy(iommu, &iova_copy); @@ -2034,6 +2048,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, list_del(&domain->next); kfree(domain); vfio_iommu_aper_expand(iommu, &iova_copy); + vfio_update_pgsize_bitmap(iommu); } break; } @@ -2166,8 +2181,6 @@ static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu, size_t size; int iovas = 0, i = 0, ret; - mutex_lock(&iommu->lock); - list_for_each_entry(iova, &iommu->iova_list, list) iovas++; @@ -2176,17 +2189,14 @@ static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu, * Return 0 as a container with a single mdev device * will have an empty list */ - ret = 0; - goto out_unlock; + return 0; } size = sizeof(*cap_iovas) + (iovas * sizeof(*cap_iovas->iova_ranges)); cap_iovas = kzalloc(size, GFP_KERNEL); - if (!cap_iovas) { - ret = -ENOMEM; - goto out_unlock; - } + if (!cap_iovas) + return -ENOMEM; cap_iovas->nr_iovas = iovas; @@ -2199,8 +2209,6 @@ static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu, ret = vfio_iommu_iova_add_cap(caps, cap_iovas, size); kfree(cap_iovas); -out_unlock: - mutex_unlock(&iommu->lock); return ret; } @@ -2245,11 +2253,13 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, info.cap_offset = 0; /* output, no-recopy necessary */ } + mutex_lock(&iommu->lock); info.flags = VFIO_IOMMU_INFO_PGSIZES; - info.iova_pgsizes = vfio_pgsize_bitmap(iommu); + info.iova_pgsizes = iommu->pgsize_bitmap; ret = vfio_iommu_iova_build_caps(iommu, &caps); + mutex_unlock(&iommu->lock); if (ret) return ret; From b704fd14a06f678f080e44db28061f1bcb1f595c Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Fri, 29 May 2020 02:00:50 +0530 Subject: [PATCH 519/562] vfio iommu: Add ioctl definition for dirty pages tracking IOMMU container maintains a list of all pages pinned by vfio_pin_pages API. All pages pinned by vendor driver through this API should be considered as dirty during migration. When container consists of IOMMU capable device and all pages are pinned and mapped, then all pages are marked dirty. Added support to start/stop dirtied pages tracking and to get bitmap of all dirtied pages for requested IO virtual address range. Signed-off-by: Kirti Wankhede Reviewed-by: Neo Jia Reviewed-by: Cornelia Huck Reviewed-by: Yan Zhao Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 57 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index ad9bb5af3463..009a8c80079d 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1033,6 +1033,12 @@ struct vfio_iommu_type1_dma_map { #define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13) +struct vfio_bitmap { + __u64 pgsize; /* page size for bitmap in bytes */ + __u64 size; /* in bytes */ + __u64 __user *data; /* one bit per page */ +}; + /** * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14, * struct vfio_dma_unmap) @@ -1059,6 +1065,57 @@ struct vfio_iommu_type1_dma_unmap { #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) #define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16) +/** + * VFIO_IOMMU_DIRTY_PAGES - _IOWR(VFIO_TYPE, VFIO_BASE + 17, + * struct vfio_iommu_type1_dirty_bitmap) + * IOCTL is used for dirty pages logging. + * Caller should set flag depending on which operation to perform, details as + * below: + * + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_START flag set, instructs + * the IOMMU driver to log pages that are dirtied or potentially dirtied by + * the device; designed to be used when a migration is in progress. Dirty pages + * are logged until logging is disabled by user application by calling the IOCTL + * with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag. + * + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag set, instructs + * the IOMMU driver to stop logging dirtied pages. + * + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP flag set + * returns the dirty pages bitmap for IOMMU container for a given IOVA range. + * The user must specify the IOVA range and the pgsize through the structure + * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface + * supports getting a bitmap of the smallest supported pgsize only and can be + * modified in future to get a bitmap of any specified supported pgsize. The + * user must provide a zeroed memory area for the bitmap memory and specify its + * size in bitmap.size. One bit is used to represent one page consecutively + * starting from iova offset. The user should provide page size in bitmap.pgsize + * field. A bit set in the bitmap indicates that the page at that offset from + * iova is dirty. The caller must set argsz to a value including the size of + * structure vfio_iommu_type1_dirty_bitmap_get, but excluding the size of the + * actual bitmap. If dirty pages logging is not enabled, an error will be + * returned. + * + * Only one of the flags _START, _STOP and _GET may be specified at a time. + * + */ +struct vfio_iommu_type1_dirty_bitmap { + __u32 argsz; + __u32 flags; +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0) +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1) +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2) + __u8 data[]; +}; + +struct vfio_iommu_type1_dirty_bitmap_get { + __u64 iova; /* IO virtual address */ + __u64 size; /* Size of iova range */ + struct vfio_bitmap bitmap; +}; + +#define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17) + /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ /* From d6a4c185660cb978f76e46e3f5f6b08dc00ecc8f Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Fri, 29 May 2020 02:00:51 +0530 Subject: [PATCH 520/562] vfio iommu: Implementation of ioctl for dirty pages tracking VFIO_IOMMU_DIRTY_PAGES ioctl performs three operations: - Start dirty pages tracking while migration is active - Stop dirty pages tracking. - Get dirty pages bitmap. Its user space application's responsibility to copy content of dirty pages from source to destination during migration. To prevent DoS attack, memory for bitmap is allocated per vfio_dma structure. Bitmap size is calculated considering smallest supported page size. Bitmap is allocated for all vfio_dmas when dirty logging is enabled Bitmap is populated for already pinned pages when bitmap is allocated for a vfio_dma with the smallest supported page size. Update bitmap from pinning functions when tracking is enabled. When user application queries bitmap, check if requested page size is same as page size used to populated bitmap. If it is equal, copy bitmap, but if not equal, return error. Signed-off-by: Kirti Wankhede Reviewed-by: Neo Jia Reviewed-by: Yan Zhao Fixed error reported by build bot by changing pgsize type from uint64_t to size_t. Reported-by: kbuild test robot Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 314 +++++++++++++++++++++++++++++++- 1 file changed, 308 insertions(+), 6 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 664ff166b4c0..be6fc0d88633 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -72,6 +72,7 @@ struct vfio_iommu { uint64_t pgsize_bitmap; bool v2; bool nesting; + bool dirty_page_tracking; }; struct vfio_domain { @@ -92,6 +93,7 @@ struct vfio_dma { bool lock_cap; /* capable(CAP_IPC_LOCK) */ struct task_struct *task; struct rb_root pfn_list; /* Ex-user pinned pfn list */ + unsigned long *bitmap; }; struct vfio_group { @@ -126,6 +128,19 @@ struct vfio_regions { #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu) \ (!list_empty(&iommu->domain_list)) +#define DIRTY_BITMAP_BYTES(n) (ALIGN(n, BITS_PER_TYPE(u64)) / BITS_PER_BYTE) + +/* + * Input argument of number of bits to bitmap_set() is unsigned integer, which + * further casts to signed integer for unaligned multi-bit operation, + * __bitmap_set(). + * Then maximum bitmap size supported is 2^31 bits divided by 2^3 bits/byte, + * that is 2^28 (256 MB) which maps to 2^31 * 2^12 = 2^43 (8TB) on 4K page + * system. + */ +#define DIRTY_BITMAP_PAGES_MAX ((u64)INT_MAX) +#define DIRTY_BITMAP_SIZE_MAX DIRTY_BITMAP_BYTES(DIRTY_BITMAP_PAGES_MAX) + static int put_pfn(unsigned long pfn, int prot); /* @@ -176,6 +191,80 @@ static void vfio_unlink_dma(struct vfio_iommu *iommu, struct vfio_dma *old) rb_erase(&old->node, &iommu->dma_list); } + +static int vfio_dma_bitmap_alloc(struct vfio_dma *dma, size_t pgsize) +{ + uint64_t npages = dma->size / pgsize; + + if (npages > DIRTY_BITMAP_PAGES_MAX) + return -EINVAL; + + /* + * Allocate extra 64 bits that are used to calculate shift required for + * bitmap_shift_left() to manipulate and club unaligned number of pages + * in adjacent vfio_dma ranges. + */ + dma->bitmap = kvzalloc(DIRTY_BITMAP_BYTES(npages) + sizeof(u64), + GFP_KERNEL); + if (!dma->bitmap) + return -ENOMEM; + + return 0; +} + +static void vfio_dma_bitmap_free(struct vfio_dma *dma) +{ + kfree(dma->bitmap); + dma->bitmap = NULL; +} + +static void vfio_dma_populate_bitmap(struct vfio_dma *dma, size_t pgsize) +{ + struct rb_node *p; + + for (p = rb_first(&dma->pfn_list); p; p = rb_next(p)) { + struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn, node); + + bitmap_set(dma->bitmap, (vpfn->iova - dma->iova) / pgsize, 1); + } +} + +static int vfio_dma_bitmap_alloc_all(struct vfio_iommu *iommu, size_t pgsize) +{ + struct rb_node *n; + + for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) { + struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node); + int ret; + + ret = vfio_dma_bitmap_alloc(dma, pgsize); + if (ret) { + struct rb_node *p; + + for (p = rb_prev(n); p; p = rb_prev(p)) { + struct vfio_dma *dma = rb_entry(n, + struct vfio_dma, node); + + vfio_dma_bitmap_free(dma); + } + return ret; + } + vfio_dma_populate_bitmap(dma, pgsize); + } + return 0; +} + +static void vfio_dma_bitmap_free_all(struct vfio_iommu *iommu) +{ + struct rb_node *n; + + for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) { + struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node); + + vfio_dma_bitmap_free(dma); + } +} + /* * Helper Functions for host iova-pfn list */ @@ -598,6 +687,17 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, vfio_unpin_page_external(dma, iova, do_accounting); goto pin_unwind; } + + if (iommu->dirty_page_tracking) { + unsigned long pgshift = __ffs(iommu->pgsize_bitmap); + + /* + * Bitmap populated with the smallest supported page + * size + */ + bitmap_set(dma->bitmap, + (iova - dma->iova) >> pgshift, 1); + } } ret = i; @@ -832,6 +932,7 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) vfio_unmap_unpin(iommu, dma, true); vfio_unlink_dma(iommu, dma); put_task_struct(dma->task); + vfio_dma_bitmap_free(dma); kfree(dma); iommu->dma_avail++; } @@ -859,6 +960,94 @@ static void vfio_update_pgsize_bitmap(struct vfio_iommu *iommu) } } +static int update_user_bitmap(u64 __user *bitmap, struct vfio_dma *dma, + dma_addr_t base_iova, size_t pgsize) +{ + unsigned long pgshift = __ffs(pgsize); + unsigned long nbits = dma->size >> pgshift; + unsigned long bit_offset = (dma->iova - base_iova) >> pgshift; + unsigned long copy_offset = bit_offset / BITS_PER_LONG; + unsigned long shift = bit_offset % BITS_PER_LONG; + unsigned long leftover; + + /* mark all pages dirty if all pages are pinned and mapped. */ + if (dma->iommu_mapped) + bitmap_set(dma->bitmap, 0, nbits); + + if (shift) { + bitmap_shift_left(dma->bitmap, dma->bitmap, shift, + nbits + shift); + + if (copy_from_user(&leftover, + (const void *)(bitmap + copy_offset), + sizeof(leftover))) + return -EFAULT; + + bitmap_or(dma->bitmap, dma->bitmap, &leftover, shift); + } + + if (copy_to_user((void *)(bitmap + copy_offset), dma->bitmap, + DIRTY_BITMAP_BYTES(nbits + shift))) + return -EFAULT; + + return 0; +} + +static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, + dma_addr_t iova, size_t size, size_t pgsize) +{ + struct vfio_dma *dma; + struct rb_node *n; + unsigned long pgshift = __ffs(pgsize); + int ret; + + /* + * GET_BITMAP request must fully cover vfio_dma mappings. Multiple + * vfio_dma mappings may be clubbed by specifying large ranges, but + * there must not be any previous mappings bisected by the range. + * An error will be returned if these conditions are not met. + */ + dma = vfio_find_dma(iommu, iova, 1); + if (dma && dma->iova != iova) + return -EINVAL; + + dma = vfio_find_dma(iommu, iova + size - 1, 0); + if (dma && dma->iova + dma->size != iova + size) + return -EINVAL; + + for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) { + struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node); + + if (dma->iova < iova) + continue; + + if (dma->iova > iova + size - 1) + break; + + ret = update_user_bitmap(bitmap, dma, iova, pgsize); + if (ret) + return ret; + + /* + * Re-populate bitmap to include all pinned pages which are + * considered as dirty but exclude pages which are unpinned and + * pages which are marked dirty by vfio_dma_rw() + */ + bitmap_clear(dma->bitmap, 0, dma->size >> pgshift); + vfio_dma_populate_bitmap(dma, pgsize); + } + return 0; +} + +static int verify_bitmap_size(uint64_t npages, uint64_t bitmap_size) +{ + if (!npages || !bitmap_size || (bitmap_size > DIRTY_BITMAP_SIZE_MAX) || + (bitmap_size < DIRTY_BITMAP_BYTES(npages))) + return -EINVAL; + + return 0; +} + static int vfio_dma_do_unmap(struct vfio_iommu *iommu, struct vfio_iommu_type1_dma_unmap *unmap) { @@ -1076,7 +1265,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, unsigned long vaddr = map->vaddr; size_t size = map->size; int ret = 0, prot = 0; - uint64_t mask; + size_t pgsize; struct vfio_dma *dma; /* Verify that none of our __u64 fields overflow */ @@ -1091,11 +1280,11 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, mutex_lock(&iommu->lock); - mask = ((uint64_t)1 << __ffs(iommu->pgsize_bitmap)) - 1; + pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap); - WARN_ON(mask & PAGE_MASK); + WARN_ON((pgsize - 1) & PAGE_MASK); - if (!prot || !size || (size | iova | vaddr) & mask) { + if (!prot || !size || (size | iova | vaddr) & (pgsize - 1)) { ret = -EINVAL; goto out_unlock; } @@ -1172,6 +1361,12 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, else ret = vfio_pin_map_dma(iommu, dma, size); + if (!ret && iommu->dirty_page_tracking) { + ret = vfio_dma_bitmap_alloc(dma, pgsize); + if (ret) + vfio_remove_dma(iommu, dma); + } + out_unlock: mutex_unlock(&iommu->lock); return ret; @@ -2318,6 +2513,104 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, return copy_to_user((void __user *)arg, &unmap, minsz) ? -EFAULT : 0; + } else if (cmd == VFIO_IOMMU_DIRTY_PAGES) { + struct vfio_iommu_type1_dirty_bitmap dirty; + uint32_t mask = VFIO_IOMMU_DIRTY_PAGES_FLAG_START | + VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP | + VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; + int ret = 0; + + if (!iommu->v2) + return -EACCES; + + minsz = offsetofend(struct vfio_iommu_type1_dirty_bitmap, + flags); + + if (copy_from_user(&dirty, (void __user *)arg, minsz)) + return -EFAULT; + + if (dirty.argsz < minsz || dirty.flags & ~mask) + return -EINVAL; + + /* only one flag should be set at a time */ + if (__ffs(dirty.flags) != __fls(dirty.flags)) + return -EINVAL; + + if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_START) { + size_t pgsize; + + mutex_lock(&iommu->lock); + pgsize = 1 << __ffs(iommu->pgsize_bitmap); + if (!iommu->dirty_page_tracking) { + ret = vfio_dma_bitmap_alloc_all(iommu, pgsize); + if (!ret) + iommu->dirty_page_tracking = true; + } + mutex_unlock(&iommu->lock); + return ret; + } else if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP) { + mutex_lock(&iommu->lock); + if (iommu->dirty_page_tracking) { + iommu->dirty_page_tracking = false; + vfio_dma_bitmap_free_all(iommu); + } + mutex_unlock(&iommu->lock); + return 0; + } else if (dirty.flags & + VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP) { + struct vfio_iommu_type1_dirty_bitmap_get range; + unsigned long pgshift; + size_t data_size = dirty.argsz - minsz; + size_t iommu_pgsize; + + if (!data_size || data_size < sizeof(range)) + return -EINVAL; + + if (copy_from_user(&range, (void __user *)(arg + minsz), + sizeof(range))) + return -EFAULT; + + if (range.iova + range.size < range.iova) + return -EINVAL; + if (!access_ok((void __user *)range.bitmap.data, + range.bitmap.size)) + return -EINVAL; + + pgshift = __ffs(range.bitmap.pgsize); + ret = verify_bitmap_size(range.size >> pgshift, + range.bitmap.size); + if (ret) + return ret; + + mutex_lock(&iommu->lock); + + iommu_pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap); + + /* allow only smallest supported pgsize */ + if (range.bitmap.pgsize != iommu_pgsize) { + ret = -EINVAL; + goto out_unlock; + } + if (range.iova & (iommu_pgsize - 1)) { + ret = -EINVAL; + goto out_unlock; + } + if (!range.size || range.size & (iommu_pgsize - 1)) { + ret = -EINVAL; + goto out_unlock; + } + + if (iommu->dirty_page_tracking) + ret = vfio_iova_dirty_bitmap(range.bitmap.data, + iommu, range.iova, range.size, + range.bitmap.pgsize); + else + ret = -EINVAL; +out_unlock: + mutex_unlock(&iommu->lock); + + return ret; + } } return -ENOTTY; @@ -2385,10 +2678,19 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, vaddr = dma->vaddr + offset; - if (write) + if (write) { *copied = copy_to_user((void __user *)vaddr, data, count) ? 0 : count; - else + if (*copied && iommu->dirty_page_tracking) { + unsigned long pgshift = __ffs(iommu->pgsize_bitmap); + /* + * Bitmap populated with the smallest supported page + * size + */ + bitmap_set(dma->bitmap, offset >> pgshift, + *copied >> pgshift); + } + } else *copied = copy_from_user(data, (void __user *)vaddr, count) ? 0 : count; if (kthread) From 331e33d2960c8292bad8b02578fcfac18f721517 Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Fri, 29 May 2020 02:00:52 +0530 Subject: [PATCH 521/562] vfio iommu: Update UNMAP_DMA ioctl to get dirty bitmap before unmap DMA mapped pages, including those pinned by mdev vendor drivers, might get unpinned and unmapped while migration is active and device is still running. For example, in pre-copy phase while guest driver could access those pages, host device or vendor driver can dirty these mapped pages. Such pages should be marked dirty so as to maintain memory consistency for a user making use of dirty page tracking. To get bitmap during unmap, user should allocate memory for bitmap, set it all zeros, set size of allocated memory, set page size to be considered for bitmap and set flag VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP. Signed-off-by: Kirti Wankhede Reviewed-by: Neo Jia Reviewed-by: Cornelia Huck Reviewed-by: Yan Zhao Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 61 +++++++++++++++++++++++++++------ include/uapi/linux/vfio.h | 11 ++++++ 2 files changed, 61 insertions(+), 11 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index be6fc0d88633..e31fcc23e81a 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1049,23 +1049,25 @@ static int verify_bitmap_size(uint64_t npages, uint64_t bitmap_size) } static int vfio_dma_do_unmap(struct vfio_iommu *iommu, - struct vfio_iommu_type1_dma_unmap *unmap) + struct vfio_iommu_type1_dma_unmap *unmap, + struct vfio_bitmap *bitmap) { - uint64_t mask; struct vfio_dma *dma, *dma_last = NULL; - size_t unmapped = 0; + size_t unmapped = 0, pgsize; int ret = 0, retries = 0; + unsigned long pgshift; mutex_lock(&iommu->lock); - mask = ((uint64_t)1 << __ffs(iommu->pgsize_bitmap)) - 1; + pgshift = __ffs(iommu->pgsize_bitmap); + pgsize = (size_t)1 << pgshift; - if (unmap->iova & mask) { + if (unmap->iova & (pgsize - 1)) { ret = -EINVAL; goto unlock; } - if (!unmap->size || unmap->size & mask) { + if (!unmap->size || unmap->size & (pgsize - 1)) { ret = -EINVAL; goto unlock; } @@ -1076,9 +1078,15 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, goto unlock; } - WARN_ON(mask & PAGE_MASK); -again: + /* When dirty tracking is enabled, allow only min supported pgsize */ + if ((unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) && + (!iommu->dirty_page_tracking || (bitmap->pgsize != pgsize))) { + ret = -EINVAL; + goto unlock; + } + WARN_ON((pgsize - 1) & PAGE_MASK); +again: /* * vfio-iommu-type1 (v1) - User mappings were coalesced together to * avoid tracking individual mappings. This means that the granularity @@ -1159,6 +1167,14 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, mutex_lock(&iommu->lock); goto again; } + + if (unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { + ret = update_user_bitmap(bitmap->data, dma, + unmap->iova, pgsize); + if (ret) + break; + } + unmapped += dma->size; vfio_remove_dma(iommu, dma); } @@ -2497,17 +2513,40 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, } else if (cmd == VFIO_IOMMU_UNMAP_DMA) { struct vfio_iommu_type1_dma_unmap unmap; - long ret; + struct vfio_bitmap bitmap = { 0 }; + int ret; minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); if (copy_from_user(&unmap, (void __user *)arg, minsz)) return -EFAULT; - if (unmap.argsz < minsz || unmap.flags) + if (unmap.argsz < minsz || + unmap.flags & ~VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) return -EINVAL; - ret = vfio_dma_do_unmap(iommu, &unmap); + if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { + unsigned long pgshift; + + if (unmap.argsz < (minsz + sizeof(bitmap))) + return -EINVAL; + + if (copy_from_user(&bitmap, + (void __user *)(arg + minsz), + sizeof(bitmap))) + return -EFAULT; + + if (!access_ok((void __user *)bitmap.data, bitmap.size)) + return -EINVAL; + + pgshift = __ffs(bitmap.pgsize); + ret = verify_bitmap_size(unmap.size >> pgshift, + bitmap.size); + if (ret) + return ret; + } + + ret = vfio_dma_do_unmap(iommu, &unmap, &bitmap); if (ret) return ret; diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 009a8c80079d..ff4b6706f7df 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1048,12 +1048,23 @@ struct vfio_bitmap { * field. No guarantee is made to the user that arbitrary unmaps of iova * or size different from those used in the original mapping call will * succeed. + * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap + * before unmapping IO virtual addresses. When this flag is set, the user must + * provide a struct vfio_bitmap in data[]. User must provide zero-allocated + * memory via vfio_bitmap.data and its size in the vfio_bitmap.size field. + * A bit in the bitmap represents one page, of user provided page size in + * vfio_bitmap.pgsize field, consecutively starting from iova offset. Bit set + * indicates that the page at that offset from iova is dirty. A Bitmap of the + * pages in the range of unmapped size is returned in the user-provided + * vfio_bitmap.data. */ struct vfio_iommu_type1_dma_unmap { __u32 argsz; __u32 flags; +#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0) __u64 iova; /* IO virtual address */ __u64 size; /* Size of mapping (bytes) */ + __u8 data[]; }; #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14) From ad721705d09c62f0d108a6b4f59867ebfd592c90 Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Fri, 29 May 2020 02:00:53 +0530 Subject: [PATCH 522/562] vfio iommu: Add migration capability to report supported features Added migration capability in IOMMU info chain. User application should check IOMMU info chain for migration capability to use dirty page tracking feature provided by kernel module. User application must check page sizes supported and maximum dirty bitmap size returned by this capability structure for ioctls used to get dirty bitmap. Signed-off-by: Kirti Wankhede Reviewed-by: Cornelia Huck Reviewed-by: Yan Zhao Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 23 ++++++++++++++++++++++- include/uapi/linux/vfio.h | 23 +++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index e31fcc23e81a..f5c6ca46f165 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2423,6 +2423,22 @@ static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu, return ret; } +static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu, + struct vfio_info_cap *caps) +{ + struct vfio_iommu_type1_info_cap_migration cap_mig; + + cap_mig.header.id = VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION; + cap_mig.header.version = 1; + + cap_mig.flags = 0; + /* support minimum pgsize */ + cap_mig.pgsize_bitmap = (size_t)1 << __ffs(iommu->pgsize_bitmap); + cap_mig.max_dirty_bitmap_size = DIRTY_BITMAP_SIZE_MAX; + + return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig)); +} + static long vfio_iommu_type1_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { @@ -2469,8 +2485,13 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, info.iova_pgsizes = iommu->pgsize_bitmap; - ret = vfio_iommu_iova_build_caps(iommu, &caps); + ret = vfio_iommu_migration_build_caps(iommu, &caps); + + if (!ret) + ret = vfio_iommu_iova_build_caps(iommu, &caps); + mutex_unlock(&iommu->lock); + if (ret) return ret; diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index ff4b6706f7df..fde4692a6989 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1013,6 +1013,29 @@ struct vfio_iommu_type1_info_cap_iova_range { struct vfio_iova_range iova_ranges[]; }; +/* + * The migration capability allows to report supported features for migration. + * + * The structures below define version 1 of this capability. + * + * The existence of this capability indicates that IOMMU kernel driver supports + * dirty page logging. + * + * pgsize_bitmap: Kernel driver returns bitmap of supported page sizes for dirty + * page logging. + * max_dirty_bitmap_size: Kernel driver returns maximum supported dirty bitmap + * size in bytes that can be used by user applications when getting the dirty + * bitmap. + */ +#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 1 + +struct vfio_iommu_type1_info_cap_migration { + struct vfio_info_cap_header header; + __u32 flags; + __u64 pgsize_bitmap; + __u64 max_dirty_bitmap_size; /* in bytes */ +}; + #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) /** From 95fc87b44104d9a524ff3e975bbfbd7c1f1a2dd5 Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Fri, 29 May 2020 02:00:54 +0530 Subject: [PATCH 523/562] vfio: Selective dirty page tracking if IOMMU backed device pins pages Added a check such that only singleton IOMMU groups can pin pages. >From the point when vendor driver pins any pages, consider IOMMU group dirty page scope to be limited to pinned pages. To optimize to avoid walking list often, added flag pinned_page_dirty_scope to indicate if all of the vfio_groups for each vfio_domain in the domain_list dirty page scope is limited to pinned pages. This flag is updated on first pinned pages request for that IOMMU group and on attaching/detaching group. Signed-off-by: Kirti Wankhede Reviewed-by: Neo Jia Reviewed-by: Yan Zhao Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 13 +++- drivers/vfio/vfio_iommu_type1.c | 103 +++++++++++++++++++++++++++++--- include/linux/vfio.h | 4 +- 3 files changed, 109 insertions(+), 11 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 765e0e5d83ed..580099afeaff 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -85,6 +85,7 @@ struct vfio_group { atomic_t opened; wait_queue_head_t container_q; bool noiommu; + unsigned int dev_counter; struct kvm *kvm; struct blocking_notifier_head notifier; }; @@ -555,6 +556,7 @@ struct vfio_device *vfio_group_create_device(struct vfio_group *group, mutex_lock(&group->device_lock); list_add(&device->group_next, &group->device_list); + group->dev_counter++; mutex_unlock(&group->device_lock); return device; @@ -567,6 +569,7 @@ static void vfio_device_release(struct kref *kref) struct vfio_group *group = device->group; list_del(&device->group_next); + group->dev_counter--; mutex_unlock(&group->device_lock); dev_set_drvdata(device->dev, NULL); @@ -1945,6 +1948,9 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage, if (!group) return -ENODEV; + if (group->dev_counter > 1) + return -EINVAL; + ret = vfio_group_add_container_user(group); if (ret) goto err_pin_pages; @@ -1952,7 +1958,8 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage, container = group->container; driver = container->iommu_driver; if (likely(driver && driver->ops->pin_pages)) - ret = driver->ops->pin_pages(container->iommu_data, user_pfn, + ret = driver->ops->pin_pages(container->iommu_data, + group->iommu_group, user_pfn, npage, prot, phys_pfn); else ret = -ENOTTY; @@ -2050,8 +2057,8 @@ int vfio_group_pin_pages(struct vfio_group *group, driver = container->iommu_driver; if (likely(driver && driver->ops->pin_pages)) ret = driver->ops->pin_pages(container->iommu_data, - user_iova_pfn, npage, - prot, phys_pfn); + group->iommu_group, user_iova_pfn, + npage, prot, phys_pfn); else ret = -ENOTTY; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index f5c6ca46f165..a6d632d44c82 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -73,6 +73,7 @@ struct vfio_iommu { bool v2; bool nesting; bool dirty_page_tracking; + bool pinned_page_dirty_scope; }; struct vfio_domain { @@ -100,6 +101,7 @@ struct vfio_group { struct iommu_group *iommu_group; struct list_head next; bool mdev_group; /* An mdev group */ + bool pinned_page_dirty_scope; }; struct vfio_iova { @@ -143,6 +145,10 @@ struct vfio_regions { static int put_pfn(unsigned long pfn, int prot); +static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, + struct iommu_group *iommu_group); + +static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu); /* * This code handles mapping and unmapping of user data buffers * into DMA'ble space using the IOMMU @@ -622,11 +628,13 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova, } static int vfio_iommu_type1_pin_pages(void *iommu_data, + struct iommu_group *iommu_group, unsigned long *user_pfn, int npage, int prot, unsigned long *phys_pfn) { struct vfio_iommu *iommu = iommu_data; + struct vfio_group *group; int i, j, ret; unsigned long remote_vaddr; struct vfio_dma *dma; @@ -699,8 +707,14 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, (iova - dma->iova) >> pgshift, 1); } } - ret = i; + + group = vfio_iommu_find_iommu_group(iommu, iommu_group); + if (!group->pinned_page_dirty_scope) { + group->pinned_page_dirty_scope = true; + update_pinned_page_dirty_scope(iommu); + } + goto pin_done; pin_unwind: @@ -960,8 +974,9 @@ static void vfio_update_pgsize_bitmap(struct vfio_iommu *iommu) } } -static int update_user_bitmap(u64 __user *bitmap, struct vfio_dma *dma, - dma_addr_t base_iova, size_t pgsize) +static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, + struct vfio_dma *dma, dma_addr_t base_iova, + size_t pgsize) { unsigned long pgshift = __ffs(pgsize); unsigned long nbits = dma->size >> pgshift; @@ -970,8 +985,11 @@ static int update_user_bitmap(u64 __user *bitmap, struct vfio_dma *dma, unsigned long shift = bit_offset % BITS_PER_LONG; unsigned long leftover; - /* mark all pages dirty if all pages are pinned and mapped. */ - if (dma->iommu_mapped) + /* + * mark all pages dirty if any IOMMU capable device is not able + * to report dirty pages and all pages are pinned and mapped. + */ + if (!iommu->pinned_page_dirty_scope && dma->iommu_mapped) bitmap_set(dma->bitmap, 0, nbits); if (shift) { @@ -1024,7 +1042,7 @@ static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, if (dma->iova > iova + size - 1) break; - ret = update_user_bitmap(bitmap, dma, iova, pgsize); + ret = update_user_bitmap(bitmap, iommu, dma, iova, pgsize); if (ret) return ret; @@ -1169,7 +1187,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, } if (unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { - ret = update_user_bitmap(bitmap->data, dma, + ret = update_user_bitmap(bitmap->data, iommu, dma, unmap->iova, pgsize); if (ret) break; @@ -1521,6 +1539,51 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain, return NULL; } +static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, + struct iommu_group *iommu_group) +{ + struct vfio_domain *domain; + struct vfio_group *group = NULL; + + list_for_each_entry(domain, &iommu->domain_list, next) { + group = find_iommu_group(domain, iommu_group); + if (group) + return group; + } + + if (iommu->external_domain) + group = find_iommu_group(iommu->external_domain, iommu_group); + + return group; +} + +static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu) +{ + struct vfio_domain *domain; + struct vfio_group *group; + + list_for_each_entry(domain, &iommu->domain_list, next) { + list_for_each_entry(group, &domain->group_list, next) { + if (!group->pinned_page_dirty_scope) { + iommu->pinned_page_dirty_scope = false; + return; + } + } + } + + if (iommu->external_domain) { + domain = iommu->external_domain; + list_for_each_entry(group, &domain->group_list, next) { + if (!group->pinned_page_dirty_scope) { + iommu->pinned_page_dirty_scope = false; + return; + } + } + } + + iommu->pinned_page_dirty_scope = true; +} + static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions, phys_addr_t *base) { @@ -1928,6 +1991,16 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, list_add(&group->next, &iommu->external_domain->group_list); + /* + * Non-iommu backed group cannot dirty memory directly, + * it can only use interfaces that provide dirty + * tracking. + * The iommu scope can only be promoted with the + * addition of a dirty tracking group. + */ + group->pinned_page_dirty_scope = true; + if (!iommu->pinned_page_dirty_scope) + update_pinned_page_dirty_scope(iommu); mutex_unlock(&iommu->lock); return 0; @@ -2051,6 +2124,13 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, done: /* Delete the old one and insert new iova list */ vfio_iommu_iova_insert_copy(iommu, &iova_copy); + + /* + * An iommu backed group can dirty memory directly and therefore + * demotes the iommu scope until it declares itself dirty tracking + * capable via the page pinning interface. + */ + iommu->pinned_page_dirty_scope = false; mutex_unlock(&iommu->lock); vfio_iommu_resv_free(&group_resv_regions); @@ -2203,6 +2283,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, struct vfio_iommu *iommu = iommu_data; struct vfio_domain *domain; struct vfio_group *group; + bool update_dirty_scope = false; LIST_HEAD(iova_copy); mutex_lock(&iommu->lock); @@ -2210,6 +2291,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, if (iommu->external_domain) { group = find_iommu_group(iommu->external_domain, iommu_group); if (group) { + update_dirty_scope = !group->pinned_page_dirty_scope; list_del(&group->next); kfree(group); @@ -2239,6 +2321,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, continue; vfio_iommu_detach_group(domain, group); + update_dirty_scope = !group->pinned_page_dirty_scope; list_del(&group->next); kfree(group); /* @@ -2270,6 +2353,12 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, vfio_iommu_iova_free(&iova_copy); detach_group_done: + /* + * Removal of a group without dirty tracking may allow the iommu scope + * to be promoted. + */ + if (update_dirty_scope) + update_pinned_page_dirty_scope(iommu); mutex_unlock(&iommu->lock); } diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 5d92ee15d098..38d3c6a8dc7e 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -76,7 +76,9 @@ struct vfio_iommu_driver_ops { struct iommu_group *group); void (*detach_group)(void *iommu_data, struct iommu_group *group); - int (*pin_pages)(void *iommu_data, unsigned long *user_pfn, + int (*pin_pages)(void *iommu_data, + struct iommu_group *group, + unsigned long *user_pfn, int npage, int prot, unsigned long *phys_pfn); int (*unpin_pages)(void *iommu_data, From 87fee61c35133e41f28facf856e0481cbf1bb2bd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 25 May 2020 10:22:09 -0700 Subject: [PATCH 524/562] RDMA/srp: Make the channel count configurable per target Increase the flexibility of the SRP initiator driver by making the channel count configurable per target instead of only providing a kernel module parameter for configuring the channel count. Link: https://lore.kernel.org/r/20200525172212.14413-2-bvanassche@acm.org Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srp/ib_srp.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 00b4f88b113e..22fa8722a03a 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3424,6 +3424,7 @@ enum { SRP_OPT_IP_DEST = 1 << 16, SRP_OPT_TARGET_CAN_QUEUE= 1 << 17, SRP_OPT_MAX_IT_IU_SIZE = 1 << 18, + SRP_OPT_CH_COUNT = 1 << 19, }; static unsigned int srp_opt_mandatory[] = { @@ -3457,6 +3458,7 @@ static const match_table_t srp_opt_tokens = { { SRP_OPT_IP_SRC, "src=%s" }, { SRP_OPT_IP_DEST, "dest=%s" }, { SRP_OPT_MAX_IT_IU_SIZE, "max_it_iu_size=%d" }, + { SRP_OPT_CH_COUNT, "ch_count=%u", }, { SRP_OPT_ERR, NULL } }; @@ -3758,6 +3760,14 @@ static int srp_parse_options(struct net *net, const char *buf, target->max_it_iu_size = token; break; + case SRP_OPT_CH_COUNT: + if (match_int(args, &token) || token < 1) { + pr_warn("bad channel count %s\n", p); + goto out; + } + target->ch_count = token; + break; + default: pr_warn("unknown parameter or missing value '%s' in target creation request\n", p); @@ -3921,11 +3931,13 @@ static ssize_t srp_create_target(struct device *dev, goto out; ret = -ENOMEM; - target->ch_count = max_t(unsigned, num_online_nodes(), - min(ch_count ? : - min(4 * num_online_nodes(), - ibdev->num_comp_vectors), - num_online_cpus())); + if (target->ch_count == 0) + target->ch_count = + max_t(unsigned int, num_online_nodes(), + min(ch_count ?: + min(4 * num_online_nodes(), + ibdev->num_comp_vectors), + num_online_cpus())); target->ch = kcalloc(target->ch_count, sizeof(*target->ch), GFP_KERNEL); if (!target->ch) From d4ee7f3a4445ec1b0b88af216f4032c4d30abf5a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 25 May 2020 10:22:10 -0700 Subject: [PATCH 525/562] RDMA/srpt: Make debug output more detailed Since the session name by itself is not sufficient to uniquely identify a queue pair, include the queue pair number. Show the ASCII channel state name instead of the numeric value. This change makes the ib_srpt debug output more consistent. Link: https://lore.kernel.org/r/20200525172212.14413-3-bvanassche@acm.org Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index cdc8c239d6c0..b96e91e1b775 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -214,8 +214,9 @@ static const char *get_ch_state_name(enum rdma_ch_state s) */ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) { - pr_debug("QP event %d on ch=%p sess_name=%s state=%d\n", - event->event, ch, ch->sess_name, ch->state); + pr_debug("QP event %d on ch=%p sess_name=%s-%d state=%s\n", + event->event, ch, ch->sess_name, ch->qp->qp_num, + get_ch_state_name(ch->state)); switch (event->event) { case IB_EVENT_COMM_EST: @@ -1985,8 +1986,8 @@ static void __srpt_close_all_ch(struct srpt_port *sport) list_for_each_entry(nexus, &sport->nexus_list, entry) { list_for_each_entry(ch, &nexus->ch_list, list) { if (srpt_disconnect_ch(ch) >= 0) - pr_info("Closing channel %s because target %s_%d has been disabled\n", - ch->sess_name, + pr_info("Closing channel %s-%d because target %s_%d has been disabled\n", + ch->sess_name, ch->qp->qp_num, dev_name(&sport->sdev->device->dev), sport->port); srpt_close_ch(ch); From 66ced2eb2ab95f6ebe8033e2e0a2e61f2615377c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 25 May 2020 10:22:11 -0700 Subject: [PATCH 526/562] RDMA/srpt: Reduce max_recv_sge to 1 Since srpt_post_recv() always sets num_sge to 1, reduce the max_recv_sge parameter that is used at queue pair allocation time to 1. Link: https://lore.kernel.org/r/20200525172212.14413-4-bvanassche@acm.org Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index b96e91e1b775..84f80f35ec8a 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1818,16 +1818,12 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) qp_init->cap.max_rdma_ctxs = sq_size / 2; qp_init->cap.max_send_sge = min(attrs->max_send_sge, SRPT_MAX_SG_PER_WQE); - qp_init->cap.max_recv_sge = min(attrs->max_recv_sge, - SRPT_MAX_SG_PER_WQE); + qp_init->cap.max_recv_sge = 1; qp_init->port_num = ch->sport->port; - if (sdev->use_srq) { + if (sdev->use_srq) qp_init->srq = sdev->srq; - } else { + else qp_init->cap.max_recv_wr = ch->rq_size; - qp_init->cap.max_recv_sge = min(attrs->max_recv_sge, - SRPT_MAX_SG_PER_WQE); - } if (ch->using_rdma_cm) { ret = rdma_create_qp(ch->rdma_cm.cm_id, sdev->pd, qp_init); From e0cca8b456e2319804c9187f237268b14c50d323 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 25 May 2020 10:22:12 -0700 Subject: [PATCH 527/562] RDMA/srpt: Increase max_send_sge The ib_srpt driver limits max_send_sge to 16. Since that is a workaround for an mlx4 bug that has been fixed, increase max_send_sge. See also commit f95ccffc715b ("IB/mlx4: Use 4K pages for kernel QP's WQE buffer"). Link: https://lore.kernel.org/r/20200525172212.14413-5-bvanassche@acm.org Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 3 +-- drivers/infiniband/ulp/srpt/ib_srpt.h | 5 ----- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 84f80f35ec8a..ef7fcd3e8e15 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1816,8 +1816,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) */ qp_init->cap.max_send_wr = min(sq_size / 2, attrs->max_qp_wr); qp_init->cap.max_rdma_ctxs = sq_size / 2; - qp_init->cap.max_send_sge = min(attrs->max_send_sge, - SRPT_MAX_SG_PER_WQE); + qp_init->cap.max_send_sge = attrs->max_send_sge; qp_init->cap.max_recv_sge = 1; qp_init->port_num = ch->sport->port; if (sdev->use_srq) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 2e1a69840857..f31c349d07a1 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -105,11 +105,6 @@ enum { SRP_CMD_ACA = 0x4, SRPT_DEF_SG_TABLESIZE = 128, - /* - * An experimentally determined value that avoids that QP creation - * fails due to "swiotlb buffer is full" on systems using the swiotlb. - */ - SRPT_MAX_SG_PER_WQE = 16, MIN_SRPT_SQ_SIZE = 16, DEF_SRPT_SQ_SIZE = 4096, From 802dcc7fc5ec0932bea0f33db046cc744aecf233 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 27 May 2020 08:50:14 +0300 Subject: [PATCH 528/562] RDMA/mlx5: Support TX port affinity for VF drivers in LAG mode The mlx5 VF driver doesn't set QP tx port affinity because it doesn't know if the lag is active or not, since the "lag_active" works only for PF interfaces. In this case for VF interfaces only one lag is used which brings performance issue. Add a lag_tx_port_affinity CAP bit; When it is enabled and "num_lag_ports > 1", then driver always set QP tx affinity, regardless of lag state. Link: https://lore.kernel.org/r/20200527055014.355093-1-leon@kernel.org Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 +- drivers/infiniband/hw/mlx5/mlx5_ib.h | 7 +++++++ drivers/infiniband/hw/mlx5/qp.c | 3 ++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6557c8339161..49a1aff72715 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1972,7 +1972,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, context->lib_caps = req.lib_caps; print_lib_caps(dev, context->lib_caps); - if (dev->lag_active) { + if (mlx5_ib_lag_should_assign_affinity(dev)) { u8 port = mlx5_core_native_port_num(dev->mdev) - 1; atomic_set(&context->tx_port_affinity, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 482b54eb9764..2a702fa9e943 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1550,4 +1550,11 @@ static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, int mlx5_ib_enable_driver(struct ib_device *dev); int mlx5_ib_test_wc(struct mlx5_ib_dev *dev); + +static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev) +{ + return dev->lag_active || + (MLX5_CAP_GEN(dev->mdev, num_lag_ports) > 1 && + MLX5_CAP_GEN(dev->mdev, lag_tx_port_affinity)); +} #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 1988a0375696..9364a7a76ac2 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3653,7 +3653,8 @@ static unsigned int get_tx_affinity(struct ib_qp *qp, struct mlx5_ib_qp_base *qp_base; unsigned int tx_affinity; - if (!(dev->lag_active && qp_supports_affinity(qp))) + if (!(mlx5_ib_lag_should_assign_affinity(dev) && + qp_supports_affinity(qp))) return 0; if (mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1) From 48062b0a8ba0e0d7da2a96fe53028b7474e2dd26 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 28 May 2020 16:04:27 +0100 Subject: [PATCH 529/562] RDMA/hns: remove duplicate assignment to pointer raq The pointer raq is being assigned twice. Fix this by removing one of the redundant assignments. Fixes: 14ba87304bf9 ("RDMA/hns: Remove redundant type cast for general pointers") Link: https://lore.kernel.org/r/20200528150427.420624-1-colin.king@canonical.com Addressses-Coverity: ("Evaluation order violation") Signed-off-by: Colin Ian King Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 8ff6b922b4d7..d02207cd30df 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1146,7 +1146,7 @@ static void hns_roce_db_free(struct hns_roce_dev *hr_dev) static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) { struct hns_roce_v1_priv *priv = hr_dev->priv; - struct hns_roce_raq_table *raq = raq = &priv->raq_table; + struct hns_roce_raq_table *raq = &priv->raq_table; struct device *dev = &hr_dev->pdev->dev; int raq_shift = 0; dma_addr_t addr; From ffd7339a2fac98b9ff731e336c4411bf1ce57e22 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 27 May 2020 14:18:45 -0300 Subject: [PATCH 530/562] RDMA/core: Use offsetofend() instead of open coding No reason to open code this. Link: https://lore.kernel.org/r/0-v1-0bc346e08476+585-drop_offsetofend_jgg@mellanox.com Signed-off-by: Jason Gunthorpe --- include/rdma/uverbs_ioctl.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 0418d7bddf3e..86de10ea30af 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -491,8 +491,7 @@ struct uapi_definition { */ #define UVERBS_ATTR_STRUCT(_type, _last) \ .zero_trailing = 1, \ - UVERBS_ATTR_SIZE(((uintptr_t)(&((_type *)0)->_last + 1)), \ - sizeof(_type)) + UVERBS_ATTR_SIZE(offsetofend(_type, _last), sizeof(_type)) /* * Specifies at least min_len bytes must be passed in, but the amount can be * larger, up to the protocol maximum size. No check for zeroing is done. From bcafcdfdaee7665267e56990e8040fbab588550f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 28 May 2020 12:07:09 +0100 Subject: [PATCH 531/562] IB/hfi1: Fix spelling mistake "enought" -> "enough" There is a spelling mistake in an error message. Fix it. Link: https://lore.kernel.org/r/20200528110709.400935-1-colin.king@canonical.com Signed-off-by: Colin Ian King Acked-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 7f35b9ea158b..15f9c635f292 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14559,7 +14559,7 @@ static bool hfi1_netdev_update_rmt(struct hfi1_devdata *dd) } if (hfi1_is_rmt_full(rmt_start, NUM_NETDEV_MAP_ENTRIES)) { - dd_dev_err(dd, "Not enought RMT entries used = %d\n", + dd_dev_err(dd, "Not enough RMT entries used = %d\n", rmt_start); return false; } From 0b8e125e213204508e1b3c4bdfe69713280b7abd Mon Sep 17 00:00:00 2001 From: Qiushi Wu Date: Wed, 27 May 2020 22:02:30 -0500 Subject: [PATCH 532/562] RDMA/core: Fix several reference count leaks. kobject_init_and_add() takes reference even when it fails. If this function returns an error, kobject_put() must be called to properly clean up the memory associated with the object. Previous commit b8eb718348b8 ("net-sysfs: Fix reference count leak in rx|netdev_queue_add_kobject") fixed a similar problem. Link: https://lore.kernel.org/r/20200528030231.9082-1-wu000273@umn.edu Signed-off-by: Qiushi Wu Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sysfs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 087682e6969e..defe9cd4c5ee 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1058,8 +1058,7 @@ static int add_port(struct ib_core_device *coredev, int port_num) coredev->ports_kobj, "%d", port_num); if (ret) { - kfree(p); - return ret; + goto err_put; } p->gid_attr_group = kzalloc(sizeof(*p->gid_attr_group), GFP_KERNEL); @@ -1072,8 +1071,7 @@ static int add_port(struct ib_core_device *coredev, int port_num) ret = kobject_init_and_add(&p->gid_attr_group->kobj, &gid_attr_type, &p->kobj, "gid_attrs"); if (ret) { - kfree(p->gid_attr_group); - goto err_put; + goto err_put_gid_attrs; } if (device->ops.process_mad && is_full_dev) { @@ -1404,8 +1402,10 @@ int ib_port_register_module_stat(struct ib_device *device, u8 port_num, ret = kobject_init_and_add(kobj, ktype, &port->kobj, "%s", name); - if (ret) + if (ret) { + kobject_put(kobj); return ret; + } } return 0; From 3446cbd2d523fdaf37f3772082071d1154c419d9 Mon Sep 17 00:00:00 2001 From: Yamin Friedman Date: Wed, 27 May 2020 11:34:52 +0300 Subject: [PATCH 533/562] RDMA/core: Add protection for shared CQs used by ULPs A pre-step for adding shared CQs. Add the infrastructure to prevent shared CQ users from altering the CQ configurations. For now all cqs are marked as private (non-shared). The core driver should use the new force functions to perform resize/destroy/moderation changes that are not allowed for users of shared CQs. Link: https://lore.kernel.org/r/1590568495-101621-2-git-send-email-yaminf@mellanox.com Signed-off-by: Yamin Friedman Reviewed-by: Or Gerlitz Reviewed-by: Max Gurtovoy Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 9 +++++++++ include/rdma/ib_verbs.h | 5 ++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index e2c9430a3ff1..21815e125e98 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2005,6 +2005,9 @@ EXPORT_SYMBOL(__ib_create_cq); int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period) { + if (cq->shared) + return -EOPNOTSUPP; + return cq->device->ops.modify_cq ? cq->device->ops.modify_cq(cq, cq_count, cq_period) : -EOPNOTSUPP; @@ -2013,6 +2016,9 @@ EXPORT_SYMBOL(rdma_set_cq_moderation); int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata) { + if (WARN_ON_ONCE(cq->shared)) + return -EOPNOTSUPP; + if (atomic_read(&cq->usecnt)) return -EBUSY; @@ -2025,6 +2031,9 @@ EXPORT_SYMBOL(ib_destroy_cq_user); int ib_resize_cq(struct ib_cq *cq, int cqe) { + if (cq->shared) + return -EOPNOTSUPP; + return cq->device->ops.resize_cq ? cq->device->ops.resize_cq(cq, cqe, NULL) : -EOPNOTSUPP; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 94533ae16697..cc515025cbdb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1613,7 +1613,8 @@ struct ib_cq { /* updated only by trace points */ ktime_t timestamp; - bool interrupt; + u8 interrupt:1; + u8 shared:1; /* * Implementation details of the RDMA core, don't use in drivers: @@ -3909,6 +3910,8 @@ static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev, * ib_free_cq_user - Free kernel/user CQ * @cq: The CQ to free * @udata: Valid user data or NULL for kernel objects + * + * NOTE: This function shouldn't be called on shared CQs. */ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata); From c7ff819aefea04944dfcec5f0731b97277df6a9c Mon Sep 17 00:00:00 2001 From: Yamin Friedman Date: Wed, 27 May 2020 11:34:53 +0300 Subject: [PATCH 534/562] RDMA/core: Introduce shared CQ pool API Allow a ULP to ask the core to provide a completion queue based on a least-used search on a per-device CQ pools. The device CQ pools grow in a lazy fashion when more CQs are requested. This feature reduces the amount of interrupts when using many QPs. Using shared CQs allows for more effcient completion handling. It also reduces the amount of overhead needed for CQ contexts. Test setup: Intel(R) Xeon(R) Platinum 8176M CPU @ 2.10GHz servers. Running NVMeoF 4KB read IOs over ConnectX-5EX across Spectrum switch. TX-depth = 32. The patch was applied in the nvme driver on both the target and initiator. Four controllers are accessed from each core. In the current test case we have exposed sixteen NVMe namespaces using four different subsystems (four namespaces per subsystem) from one NVM port. Each controller allocated X queues (RDMA QPs) and attached to Y CQs. Before this series we had X == Y, i.e for four controllers we've created total of 4X QPs and 4X CQs. In the shared case, we've created 4X QPs and only X CQs which means that we have four controllers that share a completion queue per core. Until fourteen cores there is no significant change in performance and the number of interrupts per second is less than a million in the current case. ================================================== |Cores|Current KIOPs |Shared KIOPs |improvement| |-----|---------------|--------------|-----------| |14 |2332 |2723 |16.7% | |-----|---------------|--------------|-----------| |20 |2086 |2712 |30% | |-----|---------------|--------------|-----------| |28 |1971 |2669 |35.4% | |================================================= |Cores|Current avg lat|Shared avg lat|improvement| |-----|---------------|--------------|-----------| |14 |767us |657us |14.3% | |-----|---------------|--------------|-----------| |20 |1225us |943us |23% | |-----|---------------|--------------|-----------| |28 |1816us |1341us |26.1% | ======================================================== |Cores|Current interrupts|Shared interrupts|improvement| |-----|------------------|-----------------|-----------| |14 |1.6M/sec |0.4M/sec |72% | |-----|------------------|-----------------|-----------| |20 |2.8M/sec |0.6M/sec |72.4% | |-----|------------------|-----------------|-----------| |28 |2.9M/sec |0.8M/sec |63.4% | ==================================================================== |Cores|Current 99.99th PCTL lat|Shared 99.99th PCTL lat|improvement| |-----|------------------------|-----------------------|-----------| |14 |67ms |6ms |90.9% | |-----|------------------------|-----------------------|-----------| |20 |5ms |6ms |-10% | |-----|------------------------|-----------------------|-----------| |28 |8.7ms |6ms |25.9% | |=================================================================== Performance improvement with sixteen disks (sixteen CQs per core) is comparable. Link: https://lore.kernel.org/r/1590568495-101621-3-git-send-email-yaminf@mellanox.com Signed-off-by: Yamin Friedman Reviewed-by: Or Gerlitz Reviewed-by: Max Gurtovoy Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 3 + drivers/infiniband/core/cq.c | 173 ++++++++++++++++++++++++++++ drivers/infiniband/core/device.c | 2 + include/rdma/ib_verbs.h | 17 ++- 4 files changed, 194 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index cf42acca4a3a..a1e6a67b2c4a 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -414,4 +414,7 @@ void rdma_umap_priv_init(struct rdma_umap_priv *priv, struct vm_area_struct *vma, struct rdma_user_mmap_entry *entry); +void ib_cq_pool_init(struct ib_device *dev); +void ib_cq_pool_destroy(struct ib_device *dev); + #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 4f25b2400694..655795bfa0ee 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -7,7 +7,11 @@ #include #include +#include "core_priv.h" + #include +/* Max size for shared CQ, may require tuning */ +#define IB_MAX_SHARED_CQ_SZ 4096U /* # of WCs to poll for with a single call to ib_poll_cq */ #define IB_POLL_BATCH 16 @@ -218,6 +222,7 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, cq->cq_context = private; cq->poll_ctx = poll_ctx; atomic_set(&cq->usecnt, 0); + cq->comp_vector = comp_vector; cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL); if (!cq->wc) @@ -309,6 +314,8 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata) { if (WARN_ON_ONCE(atomic_read(&cq->usecnt))) return; + if (WARN_ON_ONCE(cq->cqe_used)) + return; switch (cq->poll_ctx) { case IB_POLL_DIRECT: @@ -334,3 +341,169 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata) kfree(cq); } EXPORT_SYMBOL(ib_free_cq_user); + +void ib_cq_pool_init(struct ib_device *dev) +{ + unsigned int i; + + spin_lock_init(&dev->cq_pools_lock); + for (i = 0; i < ARRAY_SIZE(dev->cq_pools); i++) + INIT_LIST_HEAD(&dev->cq_pools[i]); +} + +void ib_cq_pool_destroy(struct ib_device *dev) +{ + struct ib_cq *cq, *n; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(dev->cq_pools); i++) { + list_for_each_entry_safe(cq, n, &dev->cq_pools[i], + pool_entry) { + WARN_ON(cq->cqe_used); + cq->shared = false; + ib_free_cq(cq); + } + } +} + +static int ib_alloc_cqs(struct ib_device *dev, unsigned int nr_cqes, + enum ib_poll_context poll_ctx) +{ + LIST_HEAD(tmp_list); + unsigned int nr_cqs, i; + struct ib_cq *cq; + int ret; + + if (poll_ctx > IB_POLL_LAST_POOL_TYPE) { + WARN_ON_ONCE(poll_ctx > IB_POLL_LAST_POOL_TYPE); + return -EINVAL; + } + + /* + * Allocate at least as many CQEs as requested, and otherwise + * a reasonable batch size so that we can share CQs between + * multiple users instead of allocating a larger number of CQs. + */ + nr_cqes = min_t(unsigned int, dev->attrs.max_cqe, + max(nr_cqes, IB_MAX_SHARED_CQ_SZ)); + nr_cqs = min_t(unsigned int, dev->num_comp_vectors, num_online_cpus()); + for (i = 0; i < nr_cqs; i++) { + cq = ib_alloc_cq(dev, NULL, nr_cqes, i, poll_ctx); + if (IS_ERR(cq)) { + ret = PTR_ERR(cq); + goto out_free_cqs; + } + cq->shared = true; + list_add_tail(&cq->pool_entry, &tmp_list); + } + + spin_lock_irq(&dev->cq_pools_lock); + list_splice(&tmp_list, &dev->cq_pools[poll_ctx]); + spin_unlock_irq(&dev->cq_pools_lock); + + return 0; + +out_free_cqs: + list_for_each_entry(cq, &tmp_list, pool_entry) { + cq->shared = false; + ib_free_cq(cq); + } + return ret; +} + +/** + * ib_cq_pool_get() - Find the least used completion queue that matches + * a given cpu hint (or least used for wild card affinity) and fits + * nr_cqe. + * @dev: rdma device + * @nr_cqe: number of needed cqe entries + * @comp_vector_hint: completion vector hint (-1) for the driver to assign + * a comp vector based on internal counter + * @poll_ctx: cq polling context + * + * Finds a cq that satisfies @comp_vector_hint and @nr_cqe requirements and + * claim entries in it for us. In case there is no available cq, allocate + * a new cq with the requirements and add it to the device pool. + * IB_POLL_DIRECT cannot be used for shared cqs so it is not a valid value + * for @poll_ctx. + */ +struct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe, + int comp_vector_hint, + enum ib_poll_context poll_ctx) +{ + static unsigned int default_comp_vector; + unsigned int vector, num_comp_vectors; + struct ib_cq *cq, *found = NULL; + int ret; + + if (poll_ctx > IB_POLL_LAST_POOL_TYPE) { + WARN_ON_ONCE(poll_ctx > IB_POLL_LAST_POOL_TYPE); + return ERR_PTR(-EINVAL); + } + + num_comp_vectors = + min_t(unsigned int, dev->num_comp_vectors, num_online_cpus()); + /* Project the affinty to the device completion vector range */ + if (comp_vector_hint < 0) { + comp_vector_hint = + (READ_ONCE(default_comp_vector) + 1) % num_comp_vectors; + WRITE_ONCE(default_comp_vector, comp_vector_hint); + } + vector = comp_vector_hint % num_comp_vectors; + + /* + * Find the least used CQ with correct affinity and + * enough free CQ entries + */ + while (!found) { + spin_lock_irq(&dev->cq_pools_lock); + list_for_each_entry(cq, &dev->cq_pools[poll_ctx], + pool_entry) { + /* + * Check to see if we have found a CQ with the + * correct completion vector + */ + if (vector != cq->comp_vector) + continue; + if (cq->cqe_used + nr_cqe > cq->cqe) + continue; + found = cq; + break; + } + + if (found) { + found->cqe_used += nr_cqe; + spin_unlock_irq(&dev->cq_pools_lock); + + return found; + } + spin_unlock_irq(&dev->cq_pools_lock); + + /* + * Didn't find a match or ran out of CQs in the device + * pool, allocate a new array of CQs. + */ + ret = ib_alloc_cqs(dev, nr_cqe, poll_ctx); + if (ret) + return ERR_PTR(ret); + } + + return found; +} +EXPORT_SYMBOL(ib_cq_pool_get); + +/** + * ib_cq_pool_put - Return a CQ taken from a shared pool. + * @cq: The CQ to return. + * @nr_cqe: The max number of cqes that the user had requested. + */ +void ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe) +{ + if (WARN_ON_ONCE(nr_cqe > cq->cqe_used)) + return; + + spin_lock_irq(&cq->device->cq_pools_lock); + cq->cqe_used -= nr_cqe; + spin_unlock_irq(&cq->device->cq_pools_lock); +} +EXPORT_SYMBOL(ib_cq_pool_put); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index d9f565a779df..53f541f41ff3 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1393,6 +1393,7 @@ int ib_register_device(struct ib_device *device, const char *name) goto dev_cleanup; } + ib_cq_pool_init(device); ret = enable_device_and_get(device); dev_set_uevent_suppress(&device->dev, false); /* Mark for userspace that device is ready */ @@ -1447,6 +1448,7 @@ static void __ib_unregister_device(struct ib_device *ib_dev) goto out; disable_device(ib_dev); + ib_cq_pool_destroy(ib_dev); /* Expedite removing unregistered pointers from the hash table */ free_netdevs(ib_dev); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index cc515025cbdb..19864da78649 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1588,10 +1588,12 @@ struct ib_ah { typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); enum ib_poll_context { - IB_POLL_DIRECT, /* caller context, no hw completions */ IB_POLL_SOFTIRQ, /* poll from softirq context */ IB_POLL_WORKQUEUE, /* poll from workqueue */ IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */ + IB_POLL_LAST_POOL_TYPE = IB_POLL_UNBOUND_WORKQUEUE, + + IB_POLL_DIRECT, /* caller context, no hw completions */ }; struct ib_cq { @@ -1601,9 +1603,11 @@ struct ib_cq { void (*event_handler)(struct ib_event *, void *); void *cq_context; int cqe; + unsigned int cqe_used; atomic_t usecnt; /* count number of work queues */ enum ib_poll_context poll_ctx; struct ib_wc *wc; + struct list_head pool_entry; union { struct irq_poll iop; struct work_struct work; @@ -1615,6 +1619,7 @@ struct ib_cq { ktime_t timestamp; u8 interrupt:1; u8 shared:1; + unsigned int comp_vector; /* * Implementation details of the RDMA core, don't use in drivers: @@ -2734,6 +2739,10 @@ struct ib_device { #endif u32 index; + + spinlock_t cq_pools_lock; + struct list_head cq_pools[IB_POLL_LAST_POOL_TYPE + 1]; + struct rdma_restrack_root *res; const struct uapi_definition *driver_def; @@ -4037,6 +4046,12 @@ static inline int ib_req_notify_cq(struct ib_cq *cq, return cq->device->ops.req_notify_cq(cq, flags); } +struct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe, + int comp_vector_hint, + enum ib_poll_context poll_ctx); + +void ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe); + /** * ib_req_ncomp_notif - Request completion notification when there are * at least the specified number of unreaped completions on the CQ. From aa8ba13cae3134b8ef1c1b6879f66372531da738 Mon Sep 17 00:00:00 2001 From: Qiushi Wu Date: Wed, 27 May 2020 21:01:09 -0500 Subject: [PATCH 535/562] vfio/mdev: Fix reference count leak in add_mdev_supported_type kobject_init_and_add() takes reference even when it fails. If this function returns an error, kobject_put() must be called to properly clean up the memory associated with the object. Thus, replace kfree() by kobject_put() to fix this issue. Previous commit "b8eb718348b8" fixed a similar problem. Fixes: 7b96953bc640 ("vfio: Mediated device Core driver") Signed-off-by: Qiushi Wu Reviewed-by: Cornelia Huck Reviewed-by: Kirti Wankhede Signed-off-by: Alex Williamson --- drivers/vfio/mdev/mdev_sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c index 8ad14e5c02bf..917fd84c1c6f 100644 --- a/drivers/vfio/mdev/mdev_sysfs.c +++ b/drivers/vfio/mdev/mdev_sysfs.c @@ -110,7 +110,7 @@ static struct mdev_type *add_mdev_supported_type(struct mdev_parent *parent, "%s-%s", dev_driver_string(parent->dev), group->name); if (ret) { - kfree(type); + kobject_put(&type->kobj); return ERR_PTR(ret); } From cd0bb41ea86027d3a5c3555772692323cbc05bc1 Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Wed, 3 Jun 2020 00:12:36 +0530 Subject: [PATCH 536/562] vfio iommu: Use shift operation for 64-bit integer division Fixes compilation error with ARCH=i386. Error fixed by this commit: ld: drivers/vfio/vfio_iommu_type1.o: in function `vfio_dma_populate_bitmap': >> vfio_iommu_type1.c:(.text+0x666): undefined reference to `__udivdi3' Fixes: d6a4c185660c ("vfio iommu: Implementation of ioctl for dirty pages tracking") Reported-by: kbuild test robot Signed-off-by: Kirti Wankhede Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index a6d632d44c82..0e4e71799290 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -227,11 +227,12 @@ static void vfio_dma_bitmap_free(struct vfio_dma *dma) static void vfio_dma_populate_bitmap(struct vfio_dma *dma, size_t pgsize) { struct rb_node *p; + unsigned long pgshift = __ffs(pgsize); for (p = rb_first(&dma->pfn_list); p; p = rb_next(p)) { struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn, node); - bitmap_set(dma->bitmap, (vpfn->iova - dma->iova) / pgsize, 1); + bitmap_set(dma->bitmap, (vpfn->iova - dma->iova) >> pgshift, 1); } } From c8e9df4744a3e4d897a6ac6e71a38aa8d7b65aa0 Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Wed, 3 Jun 2020 00:12:37 +0530 Subject: [PATCH 537/562] vfio iommu: typecast corrections Fixes sparse warnings by adding '__user' in typecast for copy_[from,to]_user() Fixes: d6a4c185660c ("vfio iommu: Implementation of ioctl for dirty pages tracking") Reported-by: kbuild test robot Signed-off-by: Kirti Wankhede Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 0e4e71799290..391fafe82c5c 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -998,14 +998,14 @@ static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, nbits + shift); if (copy_from_user(&leftover, - (const void *)(bitmap + copy_offset), + (void __user *)(bitmap + copy_offset), sizeof(leftover))) return -EFAULT; bitmap_or(dma->bitmap, dma->bitmap, &leftover, shift); } - if (copy_to_user((void *)(bitmap + copy_offset), dma->bitmap, + if (copy_to_user((void __user *)(bitmap + copy_offset), dma->bitmap, DIRTY_BITMAP_BYTES(nbits + shift))) return -EFAULT; From 1fc431320a53f3e9b33b399667c8788fa00eb8b0 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Thu, 28 May 2020 16:45:43 -0300 Subject: [PATCH 538/562] RDMA/iser: Remove support for FMR memory registration FMR is not supported on most recent RDMA devices (that use fast memory registration mechanism). Also, FMR was recently removed from NFS/RDMA ULP. Link: https://lore.kernel.org/r/1-v3-f58e6669d5d3+2cf-fmr_removal_jgg@mellanox.com Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iscsi_iser.h | 79 +------- drivers/infiniband/ulp/iser/iser_initiator.c | 19 +- drivers/infiniband/ulp/iser/iser_memory.c | 188 +------------------ drivers/infiniband/ulp/iser/iser_verbs.c | 126 ++----------- 4 files changed, 40 insertions(+), 372 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 029c00163442..1d77c7f42e38 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -65,7 +65,6 @@ #include #include -#include #include #define DRV_NAME "iser" @@ -312,33 +311,6 @@ struct iser_comp { int active_qps; }; -/** - * struct iser_reg_ops - Memory registration operations - * per-device registration schemes - * - * @alloc_reg_res: Allocate registration resources - * @free_reg_res: Free registration resources - * @reg_mem: Register memory buffers - * @unreg_mem: Un-register memory buffers - * @reg_desc_get: Get a registration descriptor for pool - * @reg_desc_put: Get a registration descriptor to pool - */ -struct iser_reg_ops { - int (*alloc_reg_res)(struct ib_conn *ib_conn, - unsigned cmds_max, - unsigned int size); - void (*free_reg_res)(struct ib_conn *ib_conn); - int (*reg_mem)(struct iscsi_iser_task *iser_task, - struct iser_data_buf *mem, - struct iser_reg_resources *rsc, - struct iser_mem_reg *reg); - void (*unreg_mem)(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir); - struct iser_fr_desc * (*reg_desc_get)(struct ib_conn *ib_conn); - void (*reg_desc_put)(struct ib_conn *ib_conn, - struct iser_fr_desc *desc); -}; - /** * struct iser_device - iSER device handle * @@ -351,8 +323,6 @@ struct iser_reg_ops { * @comps_used: Number of completion contexts used, Min between online * cpus and device max completion vectors * @comps: Dinamically allocated array of completion handlers - * @reg_ops: Registration ops - * @remote_inv_sup: Remote invalidate is supported on this device */ struct iser_device { struct ib_device *ib_device; @@ -362,26 +332,18 @@ struct iser_device { int refcount; int comps_used; struct iser_comp *comps; - const struct iser_reg_ops *reg_ops; - bool remote_inv_sup; }; /** * struct iser_reg_resources - Fast registration resources * * @mr: memory region - * @fmr_pool: pool of fmrs * @sig_mr: signature memory region - * @page_vec: fast reg page list used by fmr pool * @mr_valid: is mr valid indicator */ struct iser_reg_resources { - union { - struct ib_mr *mr; - struct ib_fmr_pool *fmr_pool; - }; + struct ib_mr *mr; struct ib_mr *sig_mr; - struct iser_page_vec *page_vec; u8 mr_valid:1; }; @@ -403,7 +365,7 @@ struct iser_fr_desc { * struct iser_fr_pool - connection fast registration pool * * @list: list of fastreg descriptors - * @lock: protects fmr/fastreg pool + * @lock: protects fastreg pool * @size: size of the pool */ struct iser_fr_pool { @@ -518,12 +480,6 @@ struct iscsi_iser_task { struct iser_data_buf prot[ISER_DIRS_NUM]; }; -struct iser_page_vec { - u64 *pages; - int npages; - struct ib_mr fake_mr; -}; - /** * struct iser_global - iSER global context * @@ -548,8 +504,6 @@ extern int iser_pi_guard; extern unsigned int iser_max_sectors; extern bool iser_always_reg; -int iser_assign_reg_ops(struct iser_device *device); - int iser_send_control(struct iscsi_conn *conn, struct iscsi_task *task); @@ -591,22 +545,17 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, struct iser_data_buf *mem, enum iser_data_dir cmd_dir); -int iser_reg_rdma_mem(struct iscsi_iser_task *task, - enum iser_data_dir dir, - bool all_imm); -void iser_unreg_rdma_mem(struct iscsi_iser_task *task, - enum iser_data_dir dir); +int iser_reg_mem_fastreg(struct iscsi_iser_task *task, + enum iser_data_dir dir, + bool all_imm); +void iser_unreg_mem_fastreg(struct iscsi_iser_task *task, + enum iser_data_dir dir); int iser_connect(struct iser_conn *iser_conn, struct sockaddr *src_addr, struct sockaddr *dst_addr, int non_blocking); -void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir); -void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir); - int iser_post_recvl(struct iser_conn *iser_conn); int iser_post_recvm(struct iser_conn *iser_conn, int count); int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, @@ -625,26 +574,12 @@ int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc); int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, struct iscsi_session *session); -int iser_alloc_fmr_pool(struct ib_conn *ib_conn, - unsigned cmds_max, - unsigned int size); -void iser_free_fmr_pool(struct ib_conn *ib_conn); int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max, unsigned int size); void iser_free_fastreg_pool(struct ib_conn *ib_conn); u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir, sector_t *sector); -struct iser_fr_desc * -iser_reg_desc_get_fr(struct ib_conn *ib_conn); -void -iser_reg_desc_put_fr(struct ib_conn *ib_conn, - struct iser_fr_desc *desc); -struct iser_fr_desc * -iser_reg_desc_get_fmr(struct ib_conn *ib_conn); -void -iser_reg_desc_put_fmr(struct ib_conn *ib_conn, - struct iser_fr_desc *desc); static inline struct iser_conn * to_iser_conn(struct ib_conn *ib_conn) diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 4a7045bb0831..27a6f75a9912 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -72,7 +72,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task) return err; } - err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN, false); + err = iser_reg_mem_fastreg(iser_task, ISER_DIR_IN, false); if (err) { iser_err("Failed to set up Data-IN RDMA\n"); return err; @@ -126,8 +126,8 @@ iser_prepare_write_cmd(struct iscsi_task *task, return err; } - err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT, - buf_out->data_len == imm_sz); + err = iser_reg_mem_fastreg(iser_task, ISER_DIR_OUT, + buf_out->data_len == imm_sz); if (err != 0) { iser_err("Failed to register write cmd RDMA mem\n"); return err; @@ -250,8 +250,8 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, iser_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */ iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2; - if (device->reg_ops->alloc_reg_res(ib_conn, session->scsi_cmds_max, - iser_conn->pages_per_mr)) + if (iser_alloc_fastreg_pool(ib_conn, session->scsi_cmds_max, + iser_conn->pages_per_mr)) goto create_rdma_reg_res_failed; if (iser_alloc_login_buf(iser_conn)) @@ -293,7 +293,7 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, rx_desc_alloc_fail: iser_free_login_buf(iser_conn); alloc_login_buf_fail: - device->reg_ops->free_reg_res(ib_conn); + iser_free_fastreg_pool(ib_conn); create_rdma_reg_res_failed: iser_err("failed allocating rx descriptors / data buffers\n"); return -ENOMEM; @@ -306,8 +306,7 @@ void iser_free_rx_descriptors(struct iser_conn *iser_conn) struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iser_device *device = ib_conn->device; - if (device->reg_ops->free_reg_res) - device->reg_ops->free_reg_res(ib_conn); + iser_free_fastreg_pool(ib_conn); rx_desc = iser_conn->rx_descs; for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) @@ -768,7 +767,7 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) int prot_count = scsi_prot_sg_count(iser_task->sc); if (iser_task->dir[ISER_DIR_IN]) { - iser_unreg_rdma_mem(iser_task, ISER_DIR_IN); + iser_unreg_mem_fastreg(iser_task, ISER_DIR_IN); iser_dma_unmap_task_data(iser_task, &iser_task->data[ISER_DIR_IN], DMA_FROM_DEVICE); @@ -779,7 +778,7 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) } if (iser_task->dir[ISER_DIR_OUT]) { - iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT); + iser_unreg_mem_fastreg(iser_task, ISER_DIR_OUT); iser_dma_unmap_task_data(iser_task, &iser_task->data[ISER_DIR_OUT], DMA_TO_DEVICE); diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 999ef7cdd05e..d4e057fac219 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -38,62 +38,13 @@ #include #include "iscsi_iser.h" -static -int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task, - struct iser_data_buf *mem, - struct iser_reg_resources *rsc, - struct iser_mem_reg *mem_reg); -static -int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, - struct iser_data_buf *mem, - struct iser_reg_resources *rsc, - struct iser_mem_reg *mem_reg); - -static const struct iser_reg_ops fastreg_ops = { - .alloc_reg_res = iser_alloc_fastreg_pool, - .free_reg_res = iser_free_fastreg_pool, - .reg_mem = iser_fast_reg_mr, - .unreg_mem = iser_unreg_mem_fastreg, - .reg_desc_get = iser_reg_desc_get_fr, - .reg_desc_put = iser_reg_desc_put_fr, -}; - -static const struct iser_reg_ops fmr_ops = { - .alloc_reg_res = iser_alloc_fmr_pool, - .free_reg_res = iser_free_fmr_pool, - .reg_mem = iser_fast_reg_fmr, - .unreg_mem = iser_unreg_mem_fmr, - .reg_desc_get = iser_reg_desc_get_fmr, - .reg_desc_put = iser_reg_desc_put_fmr, -}; void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc) { iser_err_comp(wc, "memreg"); } -int iser_assign_reg_ops(struct iser_device *device) -{ - struct ib_device *ib_dev = device->ib_device; - - /* Assign function handles - based on FMR support */ - if (ib_dev->ops.alloc_fmr && ib_dev->ops.dealloc_fmr && - ib_dev->ops.map_phys_fmr && ib_dev->ops.unmap_fmr) { - iser_info("FMR supported, using FMR for registration\n"); - device->reg_ops = &fmr_ops; - } else if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { - iser_info("FastReg supported, using FastReg for registration\n"); - device->reg_ops = &fastreg_ops; - device->remote_inv_sup = iser_always_reg; - } else { - iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n"); - return -1; - } - - return 0; -} - -struct iser_fr_desc * +static struct iser_fr_desc * iser_reg_desc_get_fr(struct ib_conn *ib_conn) { struct iser_fr_pool *fr_pool = &ib_conn->fr_pool; @@ -109,7 +60,7 @@ iser_reg_desc_get_fr(struct ib_conn *ib_conn) return desc; } -void +static void iser_reg_desc_put_fr(struct ib_conn *ib_conn, struct iser_fr_desc *desc) { @@ -121,44 +72,6 @@ iser_reg_desc_put_fr(struct ib_conn *ib_conn, spin_unlock_irqrestore(&fr_pool->lock, flags); } -struct iser_fr_desc * -iser_reg_desc_get_fmr(struct ib_conn *ib_conn) -{ - struct iser_fr_pool *fr_pool = &ib_conn->fr_pool; - - return list_first_entry(&fr_pool->list, - struct iser_fr_desc, list); -} - -void -iser_reg_desc_put_fmr(struct ib_conn *ib_conn, - struct iser_fr_desc *desc) -{ -} - -static void iser_data_buf_dump(struct iser_data_buf *data, - struct ib_device *ibdev) -{ - struct scatterlist *sg; - int i; - - for_each_sg(data->sg, sg, data->dma_nents, i) - iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p " - "off:0x%x sz:0x%x dma_len:0x%x\n", - i, (unsigned long)sg_dma_address(sg), - sg_page(sg), sg->offset, sg->length, sg_dma_len(sg)); -} - -static void iser_dump_page_vec(struct iser_page_vec *page_vec) -{ - int i; - - iser_err("page vec npages %d data length %lld\n", - page_vec->npages, page_vec->fake_mr.length); - for (i = 0; i < page_vec->npages; i++) - iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]); -} - int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, struct iser_data_buf *data, enum iser_data_dir iser_dir, @@ -213,84 +126,9 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem, return 0; } -static int iser_set_page(struct ib_mr *mr, u64 addr) -{ - struct iser_page_vec *page_vec = - container_of(mr, struct iser_page_vec, fake_mr); - - page_vec->pages[page_vec->npages++] = addr; - - return 0; -} - -static -int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task, - struct iser_data_buf *mem, - struct iser_reg_resources *rsc, - struct iser_mem_reg *reg) -{ - struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; - struct iser_device *device = ib_conn->device; - struct iser_page_vec *page_vec = rsc->page_vec; - struct ib_fmr_pool *fmr_pool = rsc->fmr_pool; - struct ib_pool_fmr *fmr; - int ret, plen; - - page_vec->npages = 0; - page_vec->fake_mr.page_size = SZ_4K; - plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg, - mem->dma_nents, NULL, iser_set_page); - if (unlikely(plen < mem->dma_nents)) { - iser_err("page vec too short to hold this SG\n"); - iser_data_buf_dump(mem, device->ib_device); - iser_dump_page_vec(page_vec); - return -EINVAL; - } - - fmr = ib_fmr_pool_map_phys(fmr_pool, page_vec->pages, - page_vec->npages, page_vec->pages[0]); - if (IS_ERR(fmr)) { - ret = PTR_ERR(fmr); - iser_err("ib_fmr_pool_map_phys failed: %d\n", ret); - return ret; - } - - reg->sge.lkey = fmr->fmr->lkey; - reg->rkey = fmr->fmr->rkey; - reg->sge.addr = page_vec->fake_mr.iova; - reg->sge.length = page_vec->fake_mr.length; - reg->mem_h = fmr; - - iser_dbg("fmr reg: lkey=0x%x, rkey=0x%x, addr=0x%llx," - " length=0x%x\n", reg->sge.lkey, reg->rkey, - reg->sge.addr, reg->sge.length); - - return 0; -} - -/** - * Unregister (previosuly registered using FMR) memory. - * If memory is non-FMR does nothing. - */ -void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir) -{ - struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir]; - - if (!reg->mem_h) - return; - - iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h); - - ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h); - - reg->mem_h = NULL; -} - void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { - struct iser_device *device = iser_task->iser_conn->ib_conn.device; struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir]; struct iser_fr_desc *desc; struct ib_mr_status mr_status; @@ -312,7 +150,7 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, ib_check_mr_status(desc->rsc.sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); } - device->reg_ops->reg_desc_put(&iser_task->iser_conn->ib_conn, desc); + iser_reg_desc_put_fr(&iser_task->iser_conn->ib_conn, reg->mem_h); reg->mem_h = NULL; } @@ -509,15 +347,14 @@ iser_reg_data_sg(struct iscsi_iser_task *task, if (use_dma_key) return iser_reg_dma(device, mem, reg); - return device->reg_ops->reg_mem(task, mem, &desc->rsc, reg); + return iser_fast_reg_mr(task, mem, &desc->rsc, reg); } -int iser_reg_rdma_mem(struct iscsi_iser_task *task, - enum iser_data_dir dir, - bool all_imm) +int iser_reg_mem_fastreg(struct iscsi_iser_task *task, + enum iser_data_dir dir, + bool all_imm) { struct ib_conn *ib_conn = &task->iser_conn->ib_conn; - struct iser_device *device = ib_conn->device; struct iser_data_buf *mem = &task->data[dir]; struct iser_mem_reg *reg = &task->rdma_reg[dir]; struct iser_fr_desc *desc = NULL; @@ -528,7 +365,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task, scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL; if (!use_dma_key) { - desc = device->reg_ops->reg_desc_get(ib_conn); + desc = iser_reg_desc_get_fr(ib_conn); reg->mem_h = desc; } @@ -549,15 +386,8 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task, err_reg: if (desc) - device->reg_ops->reg_desc_put(ib_conn, desc); + iser_reg_desc_put_fr(ib_conn, desc); return err; } -void iser_unreg_rdma_mem(struct iscsi_iser_task *task, - enum iser_data_dir dir) -{ - struct iser_device *device = task->iser_conn->ib_conn.device; - - device->reg_ops->unreg_mem(task, dir); -} diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 127887c6c03f..c1f44c41f501 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -68,11 +68,12 @@ static void iser_event_handler(struct ib_event_handler *handler, static int iser_create_device_ib_res(struct iser_device *device) { struct ib_device *ib_dev = device->ib_device; - int ret, i, max_cqe; + int i, max_cqe; - ret = iser_assign_reg_ops(device); - if (ret) - return ret; + if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) { + iser_err("IB device does not support memory registrations\n"); + return -1; + } device->comps_used = min_t(int, num_online_cpus(), ib_dev->num_comp_vectors); @@ -147,96 +148,6 @@ static void iser_free_device_ib_res(struct iser_device *device) device->pd = NULL; } -/** - * iser_alloc_fmr_pool - Creates FMR pool and page_vector - * @ib_conn: connection RDMA resources - * @cmds_max: max number of SCSI commands for this connection - * @size: max number of pages per map request - * - * Return: 0 on success, or errno code on failure - */ -int iser_alloc_fmr_pool(struct ib_conn *ib_conn, - unsigned cmds_max, - unsigned int size) -{ - struct iser_device *device = ib_conn->device; - struct iser_fr_pool *fr_pool = &ib_conn->fr_pool; - struct iser_page_vec *page_vec; - struct iser_fr_desc *desc; - struct ib_fmr_pool *fmr_pool; - struct ib_fmr_pool_param params; - int ret; - - INIT_LIST_HEAD(&fr_pool->list); - spin_lock_init(&fr_pool->lock); - - desc = kzalloc(sizeof(*desc), GFP_KERNEL); - if (!desc) - return -ENOMEM; - - page_vec = kmalloc(sizeof(*page_vec) + (sizeof(u64) * size), - GFP_KERNEL); - if (!page_vec) { - ret = -ENOMEM; - goto err_frpl; - } - - page_vec->pages = (u64 *)(page_vec + 1); - - params.page_shift = ilog2(SZ_4K); - params.max_pages_per_fmr = size; - /* make the pool size twice the max number of SCSI commands * - * the ML is expected to queue, watermark for unmap at 50% */ - params.pool_size = cmds_max * 2; - params.dirty_watermark = cmds_max; - params.cache = 0; - params.flush_function = NULL; - params.access = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ); - - fmr_pool = ib_create_fmr_pool(device->pd, ¶ms); - if (IS_ERR(fmr_pool)) { - ret = PTR_ERR(fmr_pool); - iser_err("FMR allocation failed, err %d\n", ret); - goto err_fmr; - } - - desc->rsc.page_vec = page_vec; - desc->rsc.fmr_pool = fmr_pool; - list_add(&desc->list, &fr_pool->list); - - return 0; - -err_fmr: - kfree(page_vec); -err_frpl: - kfree(desc); - - return ret; -} - -/** - * iser_free_fmr_pool - releases the FMR pool and page vec - * @ib_conn: connection RDMA resources - */ -void iser_free_fmr_pool(struct ib_conn *ib_conn) -{ - struct iser_fr_pool *fr_pool = &ib_conn->fr_pool; - struct iser_fr_desc *desc; - - desc = list_first_entry(&fr_pool->list, - struct iser_fr_desc, list); - list_del(&desc->list); - - iser_info("freeing conn %p fmr pool %p\n", - ib_conn, desc->rsc.fmr_pool); - - ib_destroy_fmr_pool(desc->rsc.fmr_pool); - kfree(desc->rsc.page_vec); - kfree(desc); -} - static struct iser_fr_desc * iser_create_fastreg_desc(struct iser_device *device, struct ib_pd *pd, @@ -667,13 +578,12 @@ iser_calc_scsi_params(struct iser_conn *iser_conn, u32 max_num_sg; /* - * FRs without SG_GAPS or FMRs can only map up to a (device) page per - * entry, but if the first entry is misaligned we'll end up using two - * entries (head and tail) for a single page worth data, so one - * additional entry is required. + * FRs without SG_GAPS can only map up to a (device) page per entry, + * but if the first entry is misaligned we'll end up using two entries + * (head and tail) for a single page worth data, so one additional + * entry is required. */ - if ((attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) && - (attr->device_cap_flags & IB_DEVICE_SG_GAPS_REG)) + if (attr->device_cap_flags & IB_DEVICE_SG_GAPS_REG) reserved_mr_pages = 0; else reserved_mr_pages = 1; @@ -684,14 +594,8 @@ iser_calc_scsi_params(struct iser_conn *iser_conn, max_num_sg = attr->max_fast_reg_page_list_len; sg_tablesize = DIV_ROUND_UP(max_sectors * SECTOR_SIZE, SZ_4K); - if (attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) - sup_sg_tablesize = - min_t( - uint, ISCSI_ISER_MAX_SG_TABLESIZE, - max_num_sg - reserved_mr_pages); - else - sup_sg_tablesize = ISCSI_ISER_MAX_SG_TABLESIZE; - + sup_sg_tablesize = min_t(uint, ISCSI_ISER_MAX_SG_TABLESIZE, + max_num_sg - reserved_mr_pages); iser_conn->scsi_sg_tablesize = min(sg_tablesize, sup_sg_tablesize); iser_conn->pages_per_mr = iser_conn->scsi_sg_tablesize + reserved_mr_pages; @@ -755,7 +659,7 @@ static void iser_route_handler(struct rdma_cm_id *cma_id) struct iser_cm_hdr req_hdr; struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context; struct ib_conn *ib_conn = &iser_conn->ib_conn; - struct iser_device *device = ib_conn->device; + struct ib_device *ib_dev = ib_conn->device->ib_device; if (iser_conn->state != ISER_CONN_PENDING) /* bailout */ @@ -766,14 +670,14 @@ static void iser_route_handler(struct rdma_cm_id *cma_id) goto failure; memset(&conn_param, 0, sizeof conn_param); - conn_param.responder_resources = device->ib_device->attrs.max_qp_rd_atom; + conn_param.responder_resources = ib_dev->attrs.max_qp_rd_atom; conn_param.initiator_depth = 1; conn_param.retry_count = 7; conn_param.rnr_retry_count = 6; memset(&req_hdr, 0, sizeof(req_hdr)); req_hdr.flags = ISER_ZBVA_NOT_SUP; - if (!device->remote_inv_sup) + if (!iser_always_reg) req_hdr.flags |= ISER_SEND_W_INV_NOT_SUP; conn_param.private_data = (void *)&req_hdr; conn_param.private_data_len = sizeof(struct iser_cm_hdr); From f273ad4f8d90bb87d2259fe37caee82e9aa7906c Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 28 May 2020 16:45:44 -0300 Subject: [PATCH 539/562] RDMA/srp: Remove support for FMR memory registration FMR is not supported on most recent RDMA devices (that use fast memory registration mechanism). Also, FMR was recently removed from NFS/RDMA ULP. Link: https://lore.kernel.org/r/2-v3-f58e6669d5d3+2cf-fmr_removal_jgg@mellanox.com Signed-off-by: Max Gurtovoy Reviewed-by: Israel Rukshin Reviewed-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srp/ib_srp.c | 222 ++-------------------------- drivers/infiniband/ulp/srp/ib_srp.h | 27 +--- 2 files changed, 22 insertions(+), 227 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 22fa8722a03a..d8fcd21ab472 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -71,7 +71,6 @@ static unsigned int srp_sg_tablesize; static unsigned int cmd_sg_entries; static unsigned int indirect_sg_entries; static bool allow_ext_sg; -static bool prefer_fr = true; static bool register_always = true; static bool never_register; static int topspin_workarounds = 1; @@ -95,10 +94,6 @@ module_param(topspin_workarounds, int, 0444); MODULE_PARM_DESC(topspin_workarounds, "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); -module_param(prefer_fr, bool, 0444); -MODULE_PARM_DESC(prefer_fr, -"Whether to use fast registration if both FMR and fast registration are supported"); - module_param(register_always, bool, 0444); MODULE_PARM_DESC(register_always, "Use memory registration even for contiguous memory regions"); @@ -388,24 +383,6 @@ static int srp_new_cm_id(struct srp_rdma_ch *ch) srp_new_ib_cm_id(ch); } -static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target) -{ - struct srp_device *dev = target->srp_host->srp_dev; - struct ib_fmr_pool_param fmr_param; - - memset(&fmr_param, 0, sizeof(fmr_param)); - fmr_param.pool_size = target->mr_pool_size; - fmr_param.dirty_watermark = fmr_param.pool_size / 4; - fmr_param.cache = 1; - fmr_param.max_pages_per_fmr = dev->max_pages_per_mr; - fmr_param.page_shift = ilog2(dev->mr_page_size); - fmr_param.access = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ); - - return ib_create_fmr_pool(dev->pd, &fmr_param); -} - /** * srp_destroy_fr_pool() - free the resources owned by a pool * @pool: Fast registration pool to be destroyed. @@ -556,7 +533,6 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) struct ib_qp_init_attr *init_attr; struct ib_cq *recv_cq, *send_cq; struct ib_qp *qp; - struct ib_fmr_pool *fmr_pool = NULL; struct srp_fr_pool *fr_pool = NULL; const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2; int ret; @@ -619,14 +595,6 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) "FR pool allocation failed (%d)\n", ret); goto err_qp; } - } else if (dev->use_fmr) { - fmr_pool = srp_alloc_fmr_pool(target); - if (IS_ERR(fmr_pool)) { - ret = PTR_ERR(fmr_pool); - shost_printk(KERN_WARNING, target->scsi_host, PFX - "FMR pool allocation failed (%d)\n", ret); - goto err_qp; - } } if (ch->qp) @@ -644,10 +612,6 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) if (ch->fr_pool) srp_destroy_fr_pool(ch->fr_pool); ch->fr_pool = fr_pool; - } else if (dev->use_fmr) { - if (ch->fmr_pool) - ib_destroy_fmr_pool(ch->fmr_pool); - ch->fmr_pool = fmr_pool; } kfree(init_attr); @@ -702,9 +666,6 @@ static void srp_free_ch_ib(struct srp_target_port *target, if (dev->use_fast_reg) { if (ch->fr_pool) srp_destroy_fr_pool(ch->fr_pool); - } else if (dev->use_fmr) { - if (ch->fmr_pool) - ib_destroy_fmr_pool(ch->fmr_pool); } srp_destroy_qp(ch); @@ -1017,12 +978,8 @@ static void srp_free_req_data(struct srp_target_port *target, for (i = 0; i < target->req_ring_size; ++i) { req = &ch->req_ring[i]; - if (dev->use_fast_reg) { + if (dev->use_fast_reg) kfree(req->fr_list); - } else { - kfree(req->fmr_list); - kfree(req->map_page); - } if (req->indirect_dma_addr) { ib_dma_unmap_single(ibdev, req->indirect_dma_addr, target->indirect_size, @@ -1056,16 +1013,8 @@ static int srp_alloc_req_data(struct srp_rdma_ch *ch) GFP_KERNEL); if (!mr_list) goto out; - if (srp_dev->use_fast_reg) { + if (srp_dev->use_fast_reg) req->fr_list = mr_list; - } else { - req->fmr_list = mr_list; - req->map_page = kmalloc_array(srp_dev->max_pages_per_mr, - sizeof(void *), - GFP_KERNEL); - if (!req->map_page) - goto out; - } req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); if (!req->indirect_desc) goto out; @@ -1272,11 +1221,6 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd, if (req->nmdesc) srp_fr_pool_put(ch->fr_pool, req->fr_list, req->nmdesc); - } else if (dev->use_fmr) { - struct ib_pool_fmr **pfmr; - - for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++) - ib_fmr_pool_unmap(*pfmr); } ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd), @@ -1472,50 +1416,6 @@ static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr, state->ndesc++; } -static int srp_map_finish_fmr(struct srp_map_state *state, - struct srp_rdma_ch *ch) -{ - struct srp_target_port *target = ch->target; - struct srp_device *dev = target->srp_host->srp_dev; - struct ib_pool_fmr *fmr; - u64 io_addr = 0; - - if (state->fmr.next >= state->fmr.end) { - shost_printk(KERN_ERR, ch->target->scsi_host, - PFX "Out of MRs (mr_per_cmd = %d)\n", - ch->target->mr_per_cmd); - return -ENOMEM; - } - - WARN_ON_ONCE(!dev->use_fmr); - - if (state->npages == 0) - return 0; - - if (state->npages == 1 && target->global_rkey) { - srp_map_desc(state, state->base_dma_addr, state->dma_len, - target->global_rkey); - goto reset_state; - } - - fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages, - state->npages, io_addr); - if (IS_ERR(fmr)) - return PTR_ERR(fmr); - - *state->fmr.next++ = fmr; - state->nmdesc++; - - srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask, - state->dma_len, fmr->fmr->rkey); - -reset_state: - state->npages = 0; - state->dma_len = 0; - - return 0; -} - static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc) { srp_handle_qp_err(cq, wc, "FAST REG"); @@ -1606,74 +1506,6 @@ static int srp_map_finish_fr(struct srp_map_state *state, return n; } -static int srp_map_sg_entry(struct srp_map_state *state, - struct srp_rdma_ch *ch, - struct scatterlist *sg) -{ - struct srp_target_port *target = ch->target; - struct srp_device *dev = target->srp_host->srp_dev; - dma_addr_t dma_addr = sg_dma_address(sg); - unsigned int dma_len = sg_dma_len(sg); - unsigned int len = 0; - int ret; - - WARN_ON_ONCE(!dma_len); - - while (dma_len) { - unsigned offset = dma_addr & ~dev->mr_page_mask; - - if (state->npages == dev->max_pages_per_mr || - (state->npages > 0 && offset != 0)) { - ret = srp_map_finish_fmr(state, ch); - if (ret) - return ret; - } - - len = min_t(unsigned int, dma_len, dev->mr_page_size - offset); - - if (!state->npages) - state->base_dma_addr = dma_addr; - state->pages[state->npages++] = dma_addr & dev->mr_page_mask; - state->dma_len += len; - dma_addr += len; - dma_len -= len; - } - - /* - * If the end of the MR is not on a page boundary then we need to - * close it out and start a new one -- we can only merge at page - * boundaries. - */ - ret = 0; - if ((dma_addr & ~dev->mr_page_mask) != 0) - ret = srp_map_finish_fmr(state, ch); - return ret; -} - -static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch, - struct srp_request *req, struct scatterlist *scat, - int count) -{ - struct scatterlist *sg; - int i, ret; - - state->pages = req->map_page; - state->fmr.next = req->fmr_list; - state->fmr.end = req->fmr_list + ch->target->mr_per_cmd; - - for_each_sg(scat, sg, count, i) { - ret = srp_map_sg_entry(state, ch, sg); - if (ret) - return ret; - } - - ret = srp_map_finish_fmr(state, ch); - if (ret) - return ret; - - return 0; -} - static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, struct srp_request *req, struct scatterlist *scat, int count) @@ -1733,7 +1565,6 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, struct srp_device *dev = target->srp_host->srp_dev; struct srp_map_state state; struct srp_direct_buf idb_desc; - u64 idb_pages[1]; struct scatterlist idb_sg[1]; int ret; @@ -1756,14 +1587,6 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, if (ret < 0) return ret; WARN_ON_ONCE(ret < 1); - } else if (dev->use_fmr) { - state.pages = idb_pages; - state.pages[0] = (req->indirect_dma_addr & - dev->mr_page_mask); - state.npages = 1; - ret = srp_map_finish_fmr(&state, ch); - if (ret < 0) - return ret; } else { return -EINVAL; } @@ -1787,9 +1610,6 @@ static void srp_check_mapping(struct srp_map_state *state, if (dev->use_fast_reg) for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++) mr_len += (*pfr)->mr->length; - else if (dev->use_fmr) - for (i = 0; i < state->nmdesc; i++) - mr_len += be32_to_cpu(req->indirect_desc[i].len); if (desc_len != scsi_bufflen(req->scmnd) || mr_len > scsi_bufflen(req->scmnd)) pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n", @@ -1904,8 +1724,6 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, state.desc = req->indirect_desc; if (dev->use_fast_reg) ret = srp_map_sg_fr(&state, ch, req, scat, count); - else if (dev->use_fmr) - ret = srp_map_sg_fmr(&state, ch, req, scat, count); else ret = srp_map_sg_dma(&state, ch, req, scat, count); req->nmdesc = state.nmdesc; @@ -3874,13 +3692,13 @@ static ssize_t srp_create_target(struct device *dev, goto out; } - if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg && + if (!srp_dev->has_fr && !target->allow_ext_sg && target->cmd_sg_cnt < target->sg_tablesize) { pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); target->sg_tablesize = target->cmd_sg_cnt; } - if (srp_dev->use_fast_reg || srp_dev->use_fmr) { + if (srp_dev->use_fast_reg) { bool gaps_reg = (ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG); @@ -3888,12 +3706,12 @@ static ssize_t srp_create_target(struct device *dev, (ilog2(srp_dev->mr_page_size) - 9); if (!gaps_reg) { /* - * FR and FMR can only map one HCA page per entry. If - * the start address is not aligned on a HCA page - * boundary two entries will be used for the head and - * the tail although these two entries combined - * contain at most one HCA page of data. Hence the "+ - * 1" in the calculation below. + * FR can only map one HCA page per entry. If the start + * address is not aligned on a HCA page boundary two + * entries will be used for the head and the tail + * although these two entries combined contain at most + * one HCA page of data. Hence the "+ 1" in the + * calculation below. * * The indirect data buffer descriptor is contiguous * so the memory for that buffer will only be @@ -4174,23 +3992,15 @@ static int srp_add_one(struct ib_device *device) srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, max_pages_per_mr); - srp_dev->has_fmr = (device->ops.alloc_fmr && - device->ops.dealloc_fmr && - device->ops.map_phys_fmr && - device->ops.unmap_fmr); srp_dev->has_fr = (attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS); - if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) { - dev_warn(&device->dev, "neither FMR nor FR is supported\n"); - } else if (!never_register && - attr->max_mr_size >= 2 * srp_dev->mr_page_size) { - srp_dev->use_fast_reg = (srp_dev->has_fr && - (!srp_dev->has_fmr || prefer_fr)); - srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr; - } + if (!never_register && !srp_dev->has_fr) + dev_warn(&device->dev, "FR is not supported\n"); + else if (!never_register && + attr->max_mr_size >= 2 * srp_dev->mr_page_size) + srp_dev->use_fast_reg = srp_dev->has_fr; - if (never_register || !register_always || - (!srp_dev->has_fmr && !srp_dev->has_fr)) + if (never_register || !register_always || !srp_dev->has_fr) flags |= IB_PD_UNSAFE_GLOBAL_RKEY; if (srp_dev->use_fast_reg) { diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 6fabcc2faf1f..6818cac0a3b7 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -44,7 +44,6 @@ #include #include #include -#include #include enum { @@ -95,8 +94,7 @@ enum srp_iu_type { /* * @mr_page_mask: HCA memory registration page mask. * @mr_page_size: HCA memory registration page size. - * @mr_max_size: Maximum size in bytes of a single FMR / FR registration - * request. + * @mr_max_size: Maximum size in bytes of a single FR registration request. */ struct srp_device { struct list_head dev_list; @@ -107,9 +105,7 @@ struct srp_device { int mr_page_size; int mr_max_size; int max_pages_per_mr; - bool has_fmr; bool has_fr; - bool use_fmr; bool use_fast_reg; }; @@ -127,11 +123,7 @@ struct srp_host { struct srp_request { struct scsi_cmnd *scmnd; struct srp_iu *cmd; - union { - struct ib_pool_fmr **fmr_list; - struct srp_fr_desc **fr_list; - }; - u64 *map_page; + struct srp_fr_desc **fr_list; struct srp_direct_buf *indirect_desc; dma_addr_t indirect_dma_addr; short nmdesc; @@ -155,10 +147,7 @@ struct srp_rdma_ch { struct ib_cq *send_cq; struct ib_cq *recv_cq; struct ib_qp *qp; - union { - struct ib_fmr_pool *fmr_pool; - struct srp_fr_pool *fr_pool; - }; + struct srp_fr_pool *fr_pool; uint32_t max_it_iu_len; uint32_t max_ti_iu_len; u8 max_imm_sge; @@ -319,19 +308,15 @@ struct srp_fr_pool { * @pages: Array with DMA addresses of pages being considered for * memory registration. * @base_dma_addr: DMA address of the first page that has not yet been mapped. - * @dma_len: Number of bytes that will be registered with the next - * FMR or FR memory registration call. + * @dma_len: Number of bytes that will be registered with the next FR + * memory registration call. * @total_len: Total number of bytes in the sg-list being mapped. * @npages: Number of page addresses in the pages[] array. - * @nmdesc: Number of FMR or FR memory descriptors used for mapping. + * @nmdesc: Number of FR memory descriptors used for mapping. * @ndesc: Number of SRP buffer descriptors that have been filled in. */ struct srp_map_state { union { - struct { - struct ib_pool_fmr **next; - struct ib_pool_fmr **end; - } fmr; struct { struct srp_fr_desc **next; struct srp_fr_desc **end; From 07549ee21ce5247143ffb069bf838025d86b908c Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 28 May 2020 16:45:45 -0300 Subject: [PATCH 540/562] RDMA/rds: Remove FMR support for memory registration Use FRWR method for memory registration by default and remove the ancient and unsafe FMR method. Link: https://lore.kernel.org/r/3-v3-f58e6669d5d3+2cf-fmr_removal_jgg@mellanox.com Signed-off-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- net/rds/Makefile | 2 +- net/rds/ib.c | 20 ++-- net/rds/ib.h | 2 - net/rds/ib_cm.c | 4 +- net/rds/ib_fmr.c | 269 ---------------------------------------------- net/rds/ib_frmr.c | 4 +- net/rds/ib_mr.h | 14 +-- net/rds/ib_rdma.c | 28 ++--- 8 files changed, 21 insertions(+), 322 deletions(-) delete mode 100644 net/rds/ib_fmr.c diff --git a/net/rds/Makefile b/net/rds/Makefile index e647f9de104a..8fdc118e2927 100644 --- a/net/rds/Makefile +++ b/net/rds/Makefile @@ -7,7 +7,7 @@ rds-y := af_rds.o bind.o cong.o connection.o info.o message.o \ obj-$(CONFIG_RDS_RDMA) += rds_rdma.o rds_rdma-y := rdma_transport.o \ ib.o ib_cm.o ib_recv.o ib_ring.o ib_send.o ib_stats.o \ - ib_sysctl.o ib_rdma.o ib_fmr.o ib_frmr.o + ib_sysctl.o ib_rdma.o ib_frmr.o obj-$(CONFIG_RDS_TCP) += rds_tcp.o diff --git a/net/rds/ib.c b/net/rds/ib.c index 90212ed3edf1..6c43b3e4c736 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -130,13 +130,16 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev) static int rds_ib_add_one(struct ib_device *device) { struct rds_ib_device *rds_ibdev; - bool has_fr, has_fmr; int ret; /* Only handle IB (no iWARP) devices */ if (device->node_type != RDMA_NODE_IB_CA) return -EOPNOTSUPP; + /* Device must support FRWR */ + if (!(device->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) + return -EOPNOTSUPP; + rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL, ibdev_to_node(device)); if (!rds_ibdev) @@ -152,11 +155,6 @@ static int rds_ib_add_one(struct ib_device *device) rds_ibdev->max_wrs = device->attrs.max_qp_wr; rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE); - has_fr = (device->attrs.device_cap_flags & - IB_DEVICE_MEM_MGT_EXTENSIONS); - has_fmr = (device->ops.alloc_fmr && device->ops.dealloc_fmr && - device->ops.map_phys_fmr && device->ops.unmap_fmr); - rds_ibdev->use_fastreg = (has_fr && !has_fmr); rds_ibdev->odp_capable = !!(device->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING) && @@ -165,7 +163,6 @@ static int rds_ib_add_one(struct ib_device *device) !!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps & IB_ODP_SUPPORT_READ); - rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32; rds_ibdev->max_1m_mrs = device->attrs.max_mr ? min_t(unsigned int, (device->attrs.max_mr / 2), rds_ib_mr_1m_pool_size) : rds_ib_mr_1m_pool_size; @@ -219,14 +216,11 @@ static int rds_ib_add_one(struct ib_device *device) goto put_dev; } - rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_mrs = %d, max_8k_mrs = %d\n", + rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, max_1m_mrs = %d, max_8k_mrs = %d\n", device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge, - rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_mrs, - rds_ibdev->max_8k_mrs); + rds_ibdev->max_1m_mrs, rds_ibdev->max_8k_mrs); - pr_info("RDS/IB: %s: %s supported and preferred\n", - device->name, - rds_ibdev->use_fastreg ? "FRMR" : "FMR"); + pr_info("RDS/IB: %s: added\n", device->name); down_write(&rds_ib_devices_lock); list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices); diff --git a/net/rds/ib.h b/net/rds/ib.h index 0296f1f7acda..5ae069d39eab 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -247,13 +247,11 @@ struct rds_ib_device { struct ib_device *dev; struct ib_pd *pd; struct dma_pool *rid_hdrs_pool; /* RDS headers DMA pool */ - u8 use_fastreg:1; u8 odp_capable:1; unsigned int max_mrs; struct rds_ib_mr_pool *mr_1m_pool; struct rds_ib_mr_pool *mr_8k_pool; - unsigned int fmr_max_remaps; unsigned int max_8k_mrs; unsigned int max_1m_mrs; int max_sge; diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 0fec4171564e..c3319ff3ee11 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -527,10 +527,10 @@ static int rds_ib_setup_qp(struct rds_connection *conn) return -EOPNOTSUPP; /* The fr_queue_space is currently set to 512, to add extra space on - * completion queue and send queue. This extra space is used for FRMR + * completion queue and send queue. This extra space is used for FRWR * registration and invalidation work requests */ - fr_queue_space = (rds_ibdev->use_fastreg ? RDS_IB_DEFAULT_FR_WR : 0); + fr_queue_space = RDS_IB_DEFAULT_FR_WR; /* add the conn now so that connection establishment has the dev */ rds_ib_add_conn(rds_ibdev, conn); diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c deleted file mode 100644 index 93c0437e6a5f..000000000000 --- a/net/rds/ib_fmr.c +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Copyright (c) 2016 Oracle. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ib_mr.h" - -struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages) -{ - struct rds_ib_mr_pool *pool; - struct rds_ib_mr *ibmr = NULL; - struct rds_ib_fmr *fmr; - int err = 0; - - if (npages <= RDS_MR_8K_MSG_SIZE) - pool = rds_ibdev->mr_8k_pool; - else - pool = rds_ibdev->mr_1m_pool; - - if (atomic_read(&pool->dirty_count) >= pool->max_items / 10) - queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); - - /* Switch pools if one of the pool is reaching upper limit */ - if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) { - if (pool->pool_type == RDS_IB_MR_8K_POOL) - pool = rds_ibdev->mr_1m_pool; - else - pool = rds_ibdev->mr_8k_pool; - } - - ibmr = rds_ib_try_reuse_ibmr(pool); - if (ibmr) - return ibmr; - - ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL, - rdsibdev_to_node(rds_ibdev)); - if (!ibmr) { - err = -ENOMEM; - goto out_no_cigar; - } - - fmr = &ibmr->u.fmr; - fmr->fmr = ib_alloc_fmr(rds_ibdev->pd, - (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_ATOMIC), - &pool->fmr_attr); - if (IS_ERR(fmr->fmr)) { - err = PTR_ERR(fmr->fmr); - fmr->fmr = NULL; - pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, err); - goto out_no_cigar; - } - - ibmr->pool = pool; - if (pool->pool_type == RDS_IB_MR_8K_POOL) - rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc); - else - rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc); - - return ibmr; - -out_no_cigar: - kfree(ibmr); - atomic_dec(&pool->item_count); - - return ERR_PTR(err); -} - -static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, - struct rds_ib_mr *ibmr, struct scatterlist *sg, - unsigned int nents) -{ - struct ib_device *dev = rds_ibdev->dev; - struct rds_ib_fmr *fmr = &ibmr->u.fmr; - struct scatterlist *scat = sg; - u64 io_addr = 0; - u64 *dma_pages; - u32 len; - int page_cnt, sg_dma_len; - int i, j; - int ret; - - sg_dma_len = ib_dma_map_sg(dev, sg, nents, DMA_BIDIRECTIONAL); - if (unlikely(!sg_dma_len)) { - pr_warn("RDS/IB: %s failed!\n", __func__); - return -EBUSY; - } - - len = 0; - page_cnt = 0; - - for (i = 0; i < sg_dma_len; ++i) { - unsigned int dma_len = sg_dma_len(&scat[i]); - u64 dma_addr = sg_dma_address(&scat[i]); - - if (dma_addr & ~PAGE_MASK) { - if (i > 0) { - ib_dma_unmap_sg(dev, sg, nents, - DMA_BIDIRECTIONAL); - return -EINVAL; - } else { - ++page_cnt; - } - } - if ((dma_addr + dma_len) & ~PAGE_MASK) { - if (i < sg_dma_len - 1) { - ib_dma_unmap_sg(dev, sg, nents, - DMA_BIDIRECTIONAL); - return -EINVAL; - } else { - ++page_cnt; - } - } - - len += dma_len; - } - - page_cnt += len >> PAGE_SHIFT; - if (page_cnt > ibmr->pool->fmr_attr.max_pages) { - ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); - return -EINVAL; - } - - dma_pages = kmalloc_array_node(sizeof(u64), page_cnt, GFP_ATOMIC, - rdsibdev_to_node(rds_ibdev)); - if (!dma_pages) { - ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); - return -ENOMEM; - } - - page_cnt = 0; - for (i = 0; i < sg_dma_len; ++i) { - unsigned int dma_len = sg_dma_len(&scat[i]); - u64 dma_addr = sg_dma_address(&scat[i]); - - for (j = 0; j < dma_len; j += PAGE_SIZE) - dma_pages[page_cnt++] = - (dma_addr & PAGE_MASK) + j; - } - - ret = ib_map_phys_fmr(fmr->fmr, dma_pages, page_cnt, io_addr); - if (ret) { - ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); - goto out; - } - - /* Success - we successfully remapped the MR, so we can - * safely tear down the old mapping. - */ - rds_ib_teardown_mr(ibmr); - - ibmr->sg = scat; - ibmr->sg_len = nents; - ibmr->sg_dma_len = sg_dma_len; - ibmr->remap_count++; - - if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL) - rds_ib_stats_inc(s_ib_rdma_mr_8k_used); - else - rds_ib_stats_inc(s_ib_rdma_mr_1m_used); - ret = 0; - -out: - kfree(dma_pages); - - return ret; -} - -struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *rds_ibdev, - struct scatterlist *sg, - unsigned long nents, - u32 *key) -{ - struct rds_ib_mr *ibmr = NULL; - struct rds_ib_fmr *fmr; - int ret; - - ibmr = rds_ib_alloc_fmr(rds_ibdev, nents); - if (IS_ERR(ibmr)) - return ibmr; - - ibmr->device = rds_ibdev; - fmr = &ibmr->u.fmr; - ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents); - if (ret == 0) - *key = fmr->fmr->rkey; - else - rds_ib_free_mr(ibmr, 0); - - return ibmr; -} - -void rds_ib_unreg_fmr(struct list_head *list, unsigned int *nfreed, - unsigned long *unpinned, unsigned int goal) -{ - struct rds_ib_mr *ibmr, *next; - struct rds_ib_fmr *fmr; - LIST_HEAD(fmr_list); - int ret = 0; - unsigned int freed = *nfreed; - - /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ - list_for_each_entry(ibmr, list, unmap_list) { - fmr = &ibmr->u.fmr; - list_add(&fmr->fmr->list, &fmr_list); - } - - ret = ib_unmap_fmr(&fmr_list); - if (ret) - pr_warn("RDS/IB: FMR invalidation failed (err=%d)\n", ret); - - /* Now we can destroy the DMA mapping and unpin any pages */ - list_for_each_entry_safe(ibmr, next, list, unmap_list) { - fmr = &ibmr->u.fmr; - *unpinned += ibmr->sg_len; - __rds_ib_teardown_mr(ibmr); - if (freed < goal || - ibmr->remap_count >= ibmr->pool->fmr_attr.max_maps) { - if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL) - rds_ib_stats_inc(s_ib_rdma_mr_8k_free); - else - rds_ib_stats_inc(s_ib_rdma_mr_1m_free); - list_del(&ibmr->unmap_list); - ib_dealloc_fmr(fmr->fmr); - kfree(ibmr); - freed++; - } - } - *nfreed = freed; -} - -void rds_ib_free_fmr_list(struct rds_ib_mr *ibmr) -{ - struct rds_ib_mr_pool *pool = ibmr->pool; - - if (ibmr->remap_count >= pool->fmr_attr.max_maps) - llist_add(&ibmr->llnode, &pool->drop_list); - else - llist_add(&ibmr->llnode, &pool->free_list); -} diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 06ecf9d2d4bf..9b6ffff72f2d 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -76,7 +76,7 @@ static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev, frmr = &ibmr->u.frmr; frmr->mr = ib_alloc_mr(rds_ibdev->pd, IB_MR_TYPE_MEM_REG, - pool->fmr_attr.max_pages); + pool->max_pages); if (IS_ERR(frmr->mr)) { pr_warn("RDS/IB: %s failed to allocate MR", __func__); err = PTR_ERR(frmr->mr); @@ -240,7 +240,7 @@ static int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev, } frmr->dma_npages += len >> PAGE_SHIFT; - if (frmr->dma_npages > ibmr->pool->fmr_attr.max_pages) { + if (frmr->dma_npages > ibmr->pool->max_pages) { ret = -EMSGSIZE; goto out_unmap; } diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h index 0c8252d7fe2b..ea5e9aee4959 100644 --- a/net/rds/ib_mr.h +++ b/net/rds/ib_mr.h @@ -43,10 +43,6 @@ #define RDS_MR_8K_SCALE (256 / (RDS_MR_8K_MSG_SIZE + 1)) #define RDS_MR_8K_POOL_SIZE (RDS_MR_8K_SCALE * (8192 / 2)) -struct rds_ib_fmr { - struct ib_fmr *fmr; -}; - enum rds_ib_fr_state { FRMR_IS_FREE, /* mr invalidated & ready for use */ FRMR_IS_INUSE, /* mr is in use or used & can be invalidated */ @@ -84,7 +80,6 @@ struct rds_ib_mr { u8 odp:1; union { - struct rds_ib_fmr fmr; struct rds_ib_frmr frmr; struct ib_mr *mr; } u; @@ -109,8 +104,7 @@ struct rds_ib_mr_pool { unsigned long max_items; unsigned long max_items_soft; unsigned long max_free_pinned; - struct ib_fmr_attr fmr_attr; - bool use_fastreg; + unsigned int max_pages; }; extern struct workqueue_struct *rds_ib_mr_wq; @@ -136,15 +130,9 @@ u32 rds_ib_get_lkey(void *trans_private); void __rds_ib_teardown_mr(struct rds_ib_mr *); void rds_ib_teardown_mr(struct rds_ib_mr *); -struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *, int); struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *); int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *, int, struct rds_ib_mr **); -struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *, struct scatterlist *, - unsigned long, u32 *); struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *); -void rds_ib_unreg_fmr(struct list_head *, unsigned int *, - unsigned long *, unsigned int); -void rds_ib_free_fmr_list(struct rds_ib_mr *); struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev, struct rds_ib_connection *ic, struct scatterlist *sg, diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index b34b24e237f8..8f070ee7e742 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -181,7 +181,7 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_co struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool; iinfo->rdma_mr_max = pool_1m->max_items; - iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages; + iinfo->rdma_mr_size = pool_1m->max_pages; } #if IS_ENABLED(CONFIG_IPV6) @@ -191,7 +191,7 @@ void rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool; iinfo6->rdma_mr_max = pool_1m->max_items; - iinfo6->rdma_mr_size = pool_1m->fmr_attr.max_pages; + iinfo6->rdma_mr_size = pool_1m->max_pages; } #endif @@ -406,10 +406,7 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, if (list_empty(&unmap_list)) goto out; - if (pool->use_fastreg) - rds_ib_unreg_frmr(&unmap_list, &nfreed, &unpinned, free_goal); - else - rds_ib_unreg_fmr(&unmap_list, &nfreed, &unpinned, free_goal); + rds_ib_unreg_frmr(&unmap_list, &nfreed, &unpinned, free_goal); if (!list_empty(&unmap_list)) { unsigned long flags; @@ -503,10 +500,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate) } /* Return it to the pool's free list */ - if (rds_ibdev->use_fastreg) - rds_ib_free_frmr_list(ibmr); - else - rds_ib_free_fmr_list(ibmr); + rds_ib_free_frmr_list(ibmr); atomic_add(ibmr->sg_len, &pool->free_pinned); atomic_inc(&pool->dirty_count); @@ -622,10 +616,7 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, goto out; } - if (rds_ibdev->use_fastreg) - ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret); - else - ibmr = rds_ib_reg_fmr(rds_ibdev, sg, nents, key_ret); + ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret); if (IS_ERR(ibmr)) { ret = PTR_ERR(ibmr); pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret); @@ -669,19 +660,16 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev, if (pool_type == RDS_IB_MR_1M_POOL) { /* +1 allows for unaligned MRs */ - pool->fmr_attr.max_pages = RDS_MR_1M_MSG_SIZE + 1; + pool->max_pages = RDS_MR_1M_MSG_SIZE + 1; pool->max_items = rds_ibdev->max_1m_mrs; } else { /* pool_type == RDS_IB_MR_8K_POOL */ - pool->fmr_attr.max_pages = RDS_MR_8K_MSG_SIZE + 1; + pool->max_pages = RDS_MR_8K_MSG_SIZE + 1; pool->max_items = rds_ibdev->max_8k_mrs; } - pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4; - pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps; - pool->fmr_attr.page_shift = PAGE_SHIFT; + pool->max_free_pinned = pool->max_items * pool->max_pages / 4; pool->max_items_soft = rds_ibdev->max_mrs * 3 / 4; - pool->use_fastreg = rds_ibdev->use_fastreg; return pool; } From 4e373d5417ecbb4f438a8500f0379a2fc29c2643 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 28 May 2020 16:45:46 -0300 Subject: [PATCH 541/562] RDMA/core: Remove FMR pool API This ancient and unsafe method for memory registration is no longer used by any RDMA based ULP. Remove the FMR pool API from the core driver. Link: https://lore.kernel.org/r/4-v3-f58e6669d5d3+2cf-fmr_removal_jgg@mellanox.com Signed-off-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- Documentation/driver-api/infiniband.rst | 3 - drivers/infiniband/core/Makefile | 2 +- drivers/infiniband/core/fmr_pool.c | 494 ------------------------ include/rdma/ib_fmr_pool.h | 93 ----- 4 files changed, 1 insertion(+), 591 deletions(-) delete mode 100644 drivers/infiniband/core/fmr_pool.c delete mode 100644 include/rdma/ib_fmr_pool.h diff --git a/Documentation/driver-api/infiniband.rst b/Documentation/driver-api/infiniband.rst index 1a3116f32ff0..30e142ccbee9 100644 --- a/Documentation/driver-api/infiniband.rst +++ b/Documentation/driver-api/infiniband.rst @@ -37,9 +37,6 @@ InfiniBand core interfaces .. kernel-doc:: drivers/infiniband/core/ud_header.c :export: -.. kernel-doc:: drivers/infiniband/core/fmr_pool.c - :export: - .. kernel-doc:: drivers/infiniband/core/umem.c :export: diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 63c1591223ac..24cb71a16a28 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o $(user_access-y) ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ - device.o fmr_pool.o cache.o netlink.o \ + device.o cache.o netlink.o \ roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ multicast.o mad.o smi.o agent.o mad_rmpp.o \ nldev.o restrack.o counters.o ib_core_uverbs.o \ diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c deleted file mode 100644 index e08aec427027..000000000000 --- a/drivers/infiniband/core/fmr_pool.c +++ /dev/null @@ -1,494 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include - -#include - -#include "core_priv.h" - -#define PFX "fmr_pool: " - -enum { - IB_FMR_MAX_REMAPS = 32, - - IB_FMR_HASH_BITS = 8, - IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS, - IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1 -}; - -/* - * If an FMR is not in use, then the list member will point to either - * its pool's free_list (if the FMR can be mapped again; that is, - * remap_count < pool->max_remaps) or its pool's dirty_list (if the - * FMR needs to be unmapped before being remapped). In either of - * these cases it is a bug if the ref_count is not 0. In other words, - * if ref_count is > 0, then the list member must not be linked into - * either free_list or dirty_list. - * - * The cache_node member is used to link the FMR into a cache bucket - * (if caching is enabled). This is independent of the reference - * count of the FMR. When a valid FMR is released, its ref_count is - * decremented, and if ref_count reaches 0, the FMR is placed in - * either free_list or dirty_list as appropriate. However, it is not - * removed from the cache and may be "revived" if a call to - * ib_fmr_register_physical() occurs before the FMR is remapped. In - * this case we just increment the ref_count and remove the FMR from - * free_list/dirty_list. - * - * Before we remap an FMR from free_list, we remove it from the cache - * (to prevent another user from obtaining a stale FMR). When an FMR - * is released, we add it to the tail of the free list, so that our - * cache eviction policy is "least recently used." - * - * All manipulation of ref_count, list and cache_node is protected by - * pool_lock to maintain consistency. - */ - -struct ib_fmr_pool { - spinlock_t pool_lock; - - int pool_size; - int max_pages; - int max_remaps; - int dirty_watermark; - int dirty_len; - struct list_head free_list; - struct list_head dirty_list; - struct hlist_head *cache_bucket; - - void (*flush_function)(struct ib_fmr_pool *pool, - void * arg); - void *flush_arg; - - struct kthread_worker *worker; - struct kthread_work work; - - atomic_t req_ser; - atomic_t flush_ser; - - wait_queue_head_t force_wait; -}; - -static inline u32 ib_fmr_hash(u64 first_page) -{ - return jhash_2words((u32) first_page, (u32) (first_page >> 32), 0) & - (IB_FMR_HASH_SIZE - 1); -} - -/* Caller must hold pool_lock */ -static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool, - u64 *page_list, - int page_list_len, - u64 io_virtual_address) -{ - struct hlist_head *bucket; - struct ib_pool_fmr *fmr; - - if (!pool->cache_bucket) - return NULL; - - bucket = pool->cache_bucket + ib_fmr_hash(*page_list); - - hlist_for_each_entry(fmr, bucket, cache_node) - if (io_virtual_address == fmr->io_virtual_address && - page_list_len == fmr->page_list_len && - !memcmp(page_list, fmr->page_list, - page_list_len * sizeof *page_list)) - return fmr; - - return NULL; -} - -static void ib_fmr_batch_release(struct ib_fmr_pool *pool) -{ - int ret; - struct ib_pool_fmr *fmr; - LIST_HEAD(unmap_list); - LIST_HEAD(fmr_list); - - spin_lock_irq(&pool->pool_lock); - - list_for_each_entry(fmr, &pool->dirty_list, list) { - hlist_del_init(&fmr->cache_node); - fmr->remap_count = 0; - list_add_tail(&fmr->fmr->list, &fmr_list); - } - - list_splice_init(&pool->dirty_list, &unmap_list); - pool->dirty_len = 0; - - spin_unlock_irq(&pool->pool_lock); - - if (list_empty(&unmap_list)) { - return; - } - - ret = ib_unmap_fmr(&fmr_list); - if (ret) - pr_warn(PFX "ib_unmap_fmr returned %d\n", ret); - - spin_lock_irq(&pool->pool_lock); - list_splice(&unmap_list, &pool->free_list); - spin_unlock_irq(&pool->pool_lock); -} - -static void ib_fmr_cleanup_func(struct kthread_work *work) -{ - struct ib_fmr_pool *pool = container_of(work, struct ib_fmr_pool, work); - - ib_fmr_batch_release(pool); - atomic_inc(&pool->flush_ser); - wake_up_interruptible(&pool->force_wait); - - if (pool->flush_function) - pool->flush_function(pool, pool->flush_arg); - - if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) - kthread_queue_work(pool->worker, &pool->work); -} - -/** - * ib_create_fmr_pool - Create an FMR pool - * @pd:Protection domain for FMRs - * @params:FMR pool parameters - * - * Create a pool of FMRs. Return value is pointer to new pool or - * error code if creation failed. - */ -struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, - struct ib_fmr_pool_param *params) -{ - struct ib_device *device; - struct ib_fmr_pool *pool; - int i; - int ret; - int max_remaps; - - if (!params) - return ERR_PTR(-EINVAL); - - device = pd->device; - if (!device->ops.alloc_fmr || !device->ops.dealloc_fmr || - !device->ops.map_phys_fmr || !device->ops.unmap_fmr) { - dev_info(&device->dev, "Device does not support FMRs\n"); - return ERR_PTR(-ENOSYS); - } - - if (!device->attrs.max_map_per_fmr) - max_remaps = IB_FMR_MAX_REMAPS; - else - max_remaps = device->attrs.max_map_per_fmr; - - pool = kmalloc(sizeof *pool, GFP_KERNEL); - if (!pool) - return ERR_PTR(-ENOMEM); - - pool->cache_bucket = NULL; - pool->flush_function = params->flush_function; - pool->flush_arg = params->flush_arg; - - INIT_LIST_HEAD(&pool->free_list); - INIT_LIST_HEAD(&pool->dirty_list); - - if (params->cache) { - pool->cache_bucket = - kmalloc_array(IB_FMR_HASH_SIZE, - sizeof(*pool->cache_bucket), - GFP_KERNEL); - if (!pool->cache_bucket) { - ret = -ENOMEM; - goto out_free_pool; - } - - for (i = 0; i < IB_FMR_HASH_SIZE; ++i) - INIT_HLIST_HEAD(pool->cache_bucket + i); - } - - pool->pool_size = 0; - pool->max_pages = params->max_pages_per_fmr; - pool->max_remaps = max_remaps; - pool->dirty_watermark = params->dirty_watermark; - pool->dirty_len = 0; - spin_lock_init(&pool->pool_lock); - atomic_set(&pool->req_ser, 0); - atomic_set(&pool->flush_ser, 0); - init_waitqueue_head(&pool->force_wait); - - pool->worker = - kthread_create_worker(0, "ib_fmr(%s)", dev_name(&device->dev)); - if (IS_ERR(pool->worker)) { - pr_warn(PFX "couldn't start cleanup kthread worker\n"); - ret = PTR_ERR(pool->worker); - goto out_free_pool; - } - kthread_init_work(&pool->work, ib_fmr_cleanup_func); - - { - struct ib_pool_fmr *fmr; - struct ib_fmr_attr fmr_attr = { - .max_pages = params->max_pages_per_fmr, - .max_maps = pool->max_remaps, - .page_shift = params->page_shift - }; - int bytes_per_fmr = sizeof *fmr; - - if (pool->cache_bucket) - bytes_per_fmr += params->max_pages_per_fmr * sizeof (u64); - - for (i = 0; i < params->pool_size; ++i) { - fmr = kmalloc(bytes_per_fmr, GFP_KERNEL); - if (!fmr) - goto out_fail; - - fmr->pool = pool; - fmr->remap_count = 0; - fmr->ref_count = 0; - INIT_HLIST_NODE(&fmr->cache_node); - - fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr); - if (IS_ERR(fmr->fmr)) { - pr_warn(PFX "fmr_create failed for FMR %d\n", - i); - kfree(fmr); - goto out_fail; - } - - list_add_tail(&fmr->list, &pool->free_list); - ++pool->pool_size; - } - } - - return pool; - - out_free_pool: - kfree(pool->cache_bucket); - kfree(pool); - - return ERR_PTR(ret); - - out_fail: - ib_destroy_fmr_pool(pool); - - return ERR_PTR(-ENOMEM); -} -EXPORT_SYMBOL(ib_create_fmr_pool); - -/** - * ib_destroy_fmr_pool - Free FMR pool - * @pool:FMR pool to free - * - * Destroy an FMR pool and free all associated resources. - */ -void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) -{ - struct ib_pool_fmr *fmr; - struct ib_pool_fmr *tmp; - LIST_HEAD(fmr_list); - int i; - - kthread_destroy_worker(pool->worker); - ib_fmr_batch_release(pool); - - i = 0; - list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) { - if (fmr->remap_count) { - INIT_LIST_HEAD(&fmr_list); - list_add_tail(&fmr->fmr->list, &fmr_list); - ib_unmap_fmr(&fmr_list); - } - ib_dealloc_fmr(fmr->fmr); - list_del(&fmr->list); - kfree(fmr); - ++i; - } - - if (i < pool->pool_size) - pr_warn(PFX "pool still has %d regions registered\n", - pool->pool_size - i); - - kfree(pool->cache_bucket); - kfree(pool); -} -EXPORT_SYMBOL(ib_destroy_fmr_pool); - -/** - * ib_flush_fmr_pool - Invalidate all unmapped FMRs - * @pool:FMR pool to flush - * - * Ensure that all unmapped FMRs are fully invalidated. - */ -int ib_flush_fmr_pool(struct ib_fmr_pool *pool) -{ - int serial; - struct ib_pool_fmr *fmr, *next; - - /* - * The free_list holds FMRs that may have been used - * but have not been remapped enough times to be dirty. - * Put them on the dirty list now so that the cleanup - * thread will reap them too. - */ - spin_lock_irq(&pool->pool_lock); - list_for_each_entry_safe(fmr, next, &pool->free_list, list) { - if (fmr->remap_count > 0) - list_move(&fmr->list, &pool->dirty_list); - } - spin_unlock_irq(&pool->pool_lock); - - serial = atomic_inc_return(&pool->req_ser); - kthread_queue_work(pool->worker, &pool->work); - - if (wait_event_interruptible(pool->force_wait, - atomic_read(&pool->flush_ser) - serial >= 0)) - return -EINTR; - - return 0; -} -EXPORT_SYMBOL(ib_flush_fmr_pool); - -/** - * ib_fmr_pool_map_phys - Map an FMR from an FMR pool. - * @pool_handle: FMR pool to allocate FMR from - * @page_list: List of pages to map - * @list_len: Number of pages in @page_list - * @io_virtual_address: I/O virtual address for new FMR - */ -struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, - u64 *page_list, - int list_len, - u64 io_virtual_address) -{ - struct ib_fmr_pool *pool = pool_handle; - struct ib_pool_fmr *fmr; - unsigned long flags; - int result; - - if (list_len < 1 || list_len > pool->max_pages) - return ERR_PTR(-EINVAL); - - spin_lock_irqsave(&pool->pool_lock, flags); - fmr = ib_fmr_cache_lookup(pool, - page_list, - list_len, - io_virtual_address); - if (fmr) { - /* found in cache */ - ++fmr->ref_count; - if (fmr->ref_count == 1) { - list_del(&fmr->list); - } - - spin_unlock_irqrestore(&pool->pool_lock, flags); - - return fmr; - } - - if (list_empty(&pool->free_list)) { - spin_unlock_irqrestore(&pool->pool_lock, flags); - return ERR_PTR(-EAGAIN); - } - - fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list); - list_del(&fmr->list); - hlist_del_init(&fmr->cache_node); - spin_unlock_irqrestore(&pool->pool_lock, flags); - - result = ib_map_phys_fmr(fmr->fmr, page_list, list_len, - io_virtual_address); - - if (result) { - spin_lock_irqsave(&pool->pool_lock, flags); - list_add(&fmr->list, &pool->free_list); - spin_unlock_irqrestore(&pool->pool_lock, flags); - - pr_warn(PFX "fmr_map returns %d\n", result); - - return ERR_PTR(result); - } - - ++fmr->remap_count; - fmr->ref_count = 1; - - if (pool->cache_bucket) { - fmr->io_virtual_address = io_virtual_address; - fmr->page_list_len = list_len; - memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list)); - - spin_lock_irqsave(&pool->pool_lock, flags); - hlist_add_head(&fmr->cache_node, - pool->cache_bucket + ib_fmr_hash(fmr->page_list[0])); - spin_unlock_irqrestore(&pool->pool_lock, flags); - } - - return fmr; -} -EXPORT_SYMBOL(ib_fmr_pool_map_phys); - -/** - * ib_fmr_pool_unmap - Unmap FMR - * @fmr:FMR to unmap - * - * Unmap an FMR. The FMR mapping may remain valid until the FMR is - * reused (or until ib_flush_fmr_pool() is called). - */ -void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) -{ - struct ib_fmr_pool *pool; - unsigned long flags; - - pool = fmr->pool; - - spin_lock_irqsave(&pool->pool_lock, flags); - - --fmr->ref_count; - if (!fmr->ref_count) { - if (fmr->remap_count < pool->max_remaps) { - list_add_tail(&fmr->list, &pool->free_list); - } else { - list_add_tail(&fmr->list, &pool->dirty_list); - if (++pool->dirty_len >= pool->dirty_watermark) { - atomic_inc(&pool->req_ser); - kthread_queue_work(pool->worker, &pool->work); - } - } - } - - spin_unlock_irqrestore(&pool->pool_lock, flags); -} -EXPORT_SYMBOL(ib_fmr_pool_unmap); diff --git a/include/rdma/ib_fmr_pool.h b/include/rdma/ib_fmr_pool.h deleted file mode 100644 index 2fd9bfb6d648..000000000000 --- a/include/rdma/ib_fmr_pool.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Corporation. All rights reserved. - * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#if !defined(IB_FMR_POOL_H) -#define IB_FMR_POOL_H - -#include - -struct ib_fmr_pool; - -/** - * struct ib_fmr_pool_param - Parameters for creating FMR pool - * @max_pages_per_fmr:Maximum number of pages per map request. - * @page_shift: Log2 of sizeof "pages" mapped by this fmr - * @access:Access flags for FMRs in pool. - * @pool_size:Number of FMRs to allocate for pool. - * @dirty_watermark:Flush is triggered when @dirty_watermark dirty - * FMRs are present. - * @flush_function:Callback called when unmapped FMRs are flushed and - * more FMRs are possibly available for mapping - * @flush_arg:Context passed to user's flush function. - * @cache:If set, FMRs may be reused after unmapping for identical map - * requests. - */ -struct ib_fmr_pool_param { - int max_pages_per_fmr; - int page_shift; - enum ib_access_flags access; - int pool_size; - int dirty_watermark; - void (*flush_function)(struct ib_fmr_pool *pool, - void *arg); - void *flush_arg; - unsigned cache:1; -}; - -struct ib_pool_fmr { - struct ib_fmr *fmr; - struct ib_fmr_pool *pool; - struct list_head list; - struct hlist_node cache_node; - int ref_count; - int remap_count; - u64 io_virtual_address; - int page_list_len; - u64 page_list[]; -}; - -struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, - struct ib_fmr_pool_param *params); - -void ib_destroy_fmr_pool(struct ib_fmr_pool *pool); - -int ib_flush_fmr_pool(struct ib_fmr_pool *pool); - -struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, - u64 *page_list, - int list_len, - u64 io_virtual_address); - -void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr); - -#endif /* IB_FMR_POOL_H */ From d29d58e772ecde84df15f547dc06f0b7afc7ae5c Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 28 May 2020 16:45:47 -0300 Subject: [PATCH 542/562] RDMA/mlx5: Remove FMR leftovers Remove a few leftovers from FMR functionality which are no longer used. Link: https://lore.kernel.org/r/5-v3-f58e6669d5d3+2cf-fmr_removal_jgg@mellanox.com Signed-off-by: Gal Pressman Signed-off-by: Max Gurtovoy Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 2a702fa9e943..5dbe3eb0d9cb 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -675,12 +675,6 @@ struct umr_common { struct semaphore sem; }; -enum { - MLX5_FMR_INVALID, - MLX5_FMR_VALID, - MLX5_FMR_BUSY, -}; - struct mlx5_cache_ent { struct list_head head; /* sync access to the cahce entry @@ -1253,8 +1247,6 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); -int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev); -void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev); void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, unsigned long max_page_shift, int *count, int *shift, From 7c08bc195665201c207fb3fd91a75a8f77c3b3b0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 28 May 2020 16:45:48 -0300 Subject: [PATCH 543/562] RDMA/bnxt_re: Remove FMR leftovers The bnxt_re_fmr struct is never referenced and the max_fmr items in bnxt_qplib_dev_attr are never read. Link: https://lore.kernel.org/r/6-v3-f58e6669d5d3+2cf-fmr_removal_jgg@mellanox.com Reviewed-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 3 --- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 6 ------ drivers/infiniband/hw/bnxt_re/qplib_sp.c | 3 --- drivers/infiniband/hw/bnxt_re/qplib_sp.h | 2 -- 4 files changed, 14 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 5a7c090204c5..8b6ad5cddfce 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -177,9 +177,6 @@ int bnxt_re_query_device(struct ib_device *ibdev, ib_attr->max_total_mcast_qp_attach = 0; ib_attr->max_ah = dev_attr->max_ah; - ib_attr->max_fmr = 0; - ib_attr->max_map_per_fmr = 0; - ib_attr->max_srq = dev_attr->max_srq; ib_attr->max_srq_wr = dev_attr->max_srq_wqes; ib_attr->max_srq_sge = dev_attr->max_srq_sges; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 204c0849ba28..e5fbbeba6d28 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -122,12 +122,6 @@ struct bnxt_re_frpl { u64 *page_list; }; -struct bnxt_re_fmr { - struct bnxt_re_dev *rdev; - struct ib_fmr ib_fmr; - struct bnxt_qplib_mrw qplib_fmr; -}; - struct bnxt_re_mw { struct bnxt_re_dev *rdev; struct ib_mw ib_mw; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 66954ff6a2f2..4cd475ea97a2 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -132,9 +132,6 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->max_raw_ethy_qp = le32_to_cpu(sb->max_raw_eth_qp); attr->max_ah = le32_to_cpu(sb->max_ah); - attr->max_fmr = le32_to_cpu(sb->max_fmr); - attr->max_map_per_fmr = sb->max_map_per_fmr; - attr->max_srq = le16_to_cpu(sb->max_srq); attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1; attr->max_srq_sges = sb->max_srq_sge; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 13d9432d5ce2..6404f0da1051 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -64,8 +64,6 @@ struct bnxt_qplib_dev_attr { u32 max_mw; u32 max_raw_ethy_qp; u32 max_ah; - u32 max_fmr; - u32 max_map_per_fmr; u32 max_srq; u32 max_srq_wqes; u32 max_srq_sges; From f0c73c70db99e30f790572b97531aa569ec1ba60 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 28 May 2020 16:45:49 -0300 Subject: [PATCH 544/562] RDMA/i40iw: Remove FMR leftovers The ibfmr member is never referenced, remove it. Link: https://lore.kernel.org/r/7-v3-f58e6669d5d3+2cf-fmr_removal_jgg@mellanox.com Reviewed-by: Max Gurtovoy Acked-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw.h | 9 --------- drivers/infiniband/hw/i40iw/i40iw_verbs.h | 1 - 2 files changed, 10 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 3c62c9327a9c..49d92638e0db 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -381,15 +381,6 @@ static inline struct i40iw_mr *to_iwmr(struct ib_mr *ibmr) return container_of(ibmr, struct i40iw_mr, ibmr); } -/** - * to_iwmr_from_ibfmr - get device memory region - * @ibfmr: ib fmr - **/ -static inline struct i40iw_mr *to_iwmr_from_ibfmr(struct ib_fmr *ibfmr) -{ - return container_of(ibfmr, struct i40iw_mr, ibfmr); -} - /** * to_iwmw - get device memory window * @ibmw: ib memory window diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h index 3a413752ccc3..331bc21cbcc7 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h @@ -89,7 +89,6 @@ struct i40iw_mr { union { struct ib_mr ibmr; struct ib_mw ibmw; - struct ib_fmr ibfmr; }; struct ib_umem *region; u16 type; From 1f55b7ab907d373581e9abf3fc4b24ed19cf831f Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 28 May 2020 16:45:50 -0300 Subject: [PATCH 545/562] RDMA/mlx4: Remove FMR support for memory registration HCA's that are driven by mlx4 driver support FRWR method to register memory. Remove the ancient and unsafe FMR method. Link: https://lore.kernel.org/r/8-v3-f58e6669d5d3+2cf-fmr_removal_jgg@mellanox.com Reviewed-by: Max Gurtovoy Signed-off-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/main.c | 11 -- drivers/infiniband/hw/mlx4/mlx4_ib.h | 16 -- drivers/infiniband/hw/mlx4/mr.c | 93 ----------- drivers/net/ethernet/mellanox/mlx4/main.c | 2 - drivers/net/ethernet/mellanox/mlx4/mr.c | 183 ---------------------- include/linux/mlx4/device.h | 22 +-- 6 files changed, 2 insertions(+), 325 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 275722cec8c6..816d28854a8e 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -558,7 +558,6 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; - props->max_map_per_fmr = dev->dev->caps.max_fmr_maps; props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL; props->timestamp_mask = 0xFFFFFFFFFFFFULL; props->max_ah = INT_MAX; @@ -2600,13 +2599,6 @@ static const struct ib_device_ops mlx4_ib_dev_wq_ops = { .modify_wq = mlx4_ib_modify_wq, }; -static const struct ib_device_ops mlx4_ib_dev_fmr_ops = { - .alloc_fmr = mlx4_ib_fmr_alloc, - .dealloc_fmr = mlx4_ib_fmr_dealloc, - .map_phys_fmr = mlx4_ib_map_phys_fmr, - .unmap_fmr = mlx4_ib_unmap_fmr, -}; - static const struct ib_device_ops mlx4_ib_dev_mw_ops = { .alloc_mw = mlx4_ib_alloc_mw, .dealloc_mw = mlx4_ib_dealloc_mw, @@ -2724,9 +2716,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops); } - if (!mlx4_is_slave(ibdev->dev)) - ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fmr_ops); - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { ibdev->ib_dev.uverbs_cmd_mask |= diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 182a237b87f7..6f4ea1067095 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -146,11 +146,6 @@ struct mlx4_ib_mw { struct mlx4_mw mmw; }; -struct mlx4_ib_fmr { - struct ib_fmr ibfmr; - struct mlx4_fmr mfmr; -}; - #define MAX_REGS_PER_FLOW 2 struct mlx4_flow_reg_id { @@ -679,11 +674,6 @@ static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw) return container_of(ibmw, struct mlx4_ib_mw, ibmw); } -static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) -{ - return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr); -} - static inline struct mlx4_ib_flow *to_mflow(struct ib_flow *ibflow) { return container_of(ibflow, struct mlx4_ib_flow, ibflow); @@ -794,12 +784,6 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, int mlx4_ib_mad_init(struct mlx4_ib_dev *dev); void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev); -struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int mr_access_flags, - struct ib_fmr_attr *fmr_attr); -int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages, - u64 iova); -int mlx4_ib_unmap_fmr(struct list_head *fmr_list); -int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr); int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props, int netw_view); int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index b0121c90c561..e2fb71b23c80 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -698,99 +698,6 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, return ERR_PTR(err); } -struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, - struct ib_fmr_attr *fmr_attr) -{ - struct mlx4_ib_dev *dev = to_mdev(pd->device); - struct mlx4_ib_fmr *fmr; - int err = -ENOMEM; - - fmr = kmalloc(sizeof *fmr, GFP_KERNEL); - if (!fmr) - return ERR_PTR(-ENOMEM); - - err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc), - fmr_attr->max_pages, fmr_attr->max_maps, - fmr_attr->page_shift, &fmr->mfmr); - if (err) - goto err_free; - - err = mlx4_fmr_enable(to_mdev(pd->device)->dev, &fmr->mfmr); - if (err) - goto err_mr; - - fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key; - - return &fmr->ibfmr; - -err_mr: - (void) mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr); - -err_free: - kfree(fmr); - - return ERR_PTR(err); -} - -int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, - int npages, u64 iova) -{ - struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); - struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device); - - return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova, - &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey); -} - -int mlx4_ib_unmap_fmr(struct list_head *fmr_list) -{ - struct ib_fmr *ibfmr; - int err; - struct mlx4_dev *mdev = NULL; - - list_for_each_entry(ibfmr, fmr_list, list) { - if (mdev && to_mdev(ibfmr->device)->dev != mdev) - return -EINVAL; - mdev = to_mdev(ibfmr->device)->dev; - } - - if (!mdev) - return 0; - - list_for_each_entry(ibfmr, fmr_list, list) { - struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); - - mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey); - } - - /* - * Make sure all MPT status updates are visible before issuing - * SYNC_TPT firmware command. - */ - wmb(); - - err = mlx4_SYNC_TPT(mdev); - if (err) - pr_warn("SYNC_TPT error %d when " - "unmapping FMRs\n", err); - - return 0; -} - -int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr) -{ - struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); - struct mlx4_ib_dev *dev = to_mdev(ibfmr->device); - int err; - - err = mlx4_fmr_free(dev->dev, &ifmr->mfmr); - - if (!err) - kfree(ifmr); - - return err; -} - static int mlx4_set_page(struct ib_mr *ibmr, u64 addr) { struct mlx4_ib_mr *mr = to_mmr(ibmr); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index c72c4e1ea383..3d9aa7da95e9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2345,8 +2345,6 @@ static int mlx4_init_hca(struct mlx4_dev *dev) goto out_free; } - dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; - if (enable_4k_uar || !dev->persist->num_vfs) { init_hca->log_uar_sz = ilog2(dev->caps.num_uars) + PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT; diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 1a11bc0e1612..d2986f1f2db0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -966,189 +966,6 @@ void mlx4_cleanup_mr_table(struct mlx4_dev *dev) mlx4_bitmap_cleanup(&mr_table->mpt_bitmap); } -static inline int mlx4_check_fmr(struct mlx4_fmr *fmr, u64 *page_list, - int npages, u64 iova) -{ - int i, page_mask; - - if (npages > fmr->max_pages) - return -EINVAL; - - page_mask = (1 << fmr->page_shift) - 1; - - /* We are getting page lists, so va must be page aligned. */ - if (iova & page_mask) - return -EINVAL; - - /* Trust the user not to pass misaligned data in page_list */ - if (0) - for (i = 0; i < npages; ++i) { - if (page_list[i] & ~page_mask) - return -EINVAL; - } - - if (fmr->maps >= fmr->max_maps) - return -EINVAL; - - return 0; -} - -int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list, - int npages, u64 iova, u32 *lkey, u32 *rkey) -{ - u32 key; - int i, err; - - err = mlx4_check_fmr(fmr, page_list, npages, iova); - if (err) - return err; - - ++fmr->maps; - - key = key_to_hw_index(fmr->mr.key); - key += dev->caps.num_mpts; - *lkey = *rkey = fmr->mr.key = hw_index_to_key(key); - - *(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW; - - /* Make sure MPT status is visible before writing MTT entries */ - wmb(); - - dma_sync_single_for_cpu(&dev->persist->pdev->dev, fmr->dma_handle, - npages * sizeof(u64), DMA_TO_DEVICE); - - for (i = 0; i < npages; ++i) - fmr->mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT); - - dma_sync_single_for_device(&dev->persist->pdev->dev, fmr->dma_handle, - npages * sizeof(u64), DMA_TO_DEVICE); - - fmr->mpt->key = cpu_to_be32(key); - fmr->mpt->lkey = cpu_to_be32(key); - fmr->mpt->length = cpu_to_be64(npages * (1ull << fmr->page_shift)); - fmr->mpt->start = cpu_to_be64(iova); - - /* Make MTT entries are visible before setting MPT status */ - wmb(); - - *(u8 *) fmr->mpt = MLX4_MPT_STATUS_HW; - - /* Make sure MPT status is visible before consumer can use FMR */ - wmb(); - - return 0; -} -EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr); - -int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, - int max_maps, u8 page_shift, struct mlx4_fmr *fmr) -{ - struct mlx4_priv *priv = mlx4_priv(dev); - int err = -ENOMEM; - - if (max_maps > dev->caps.max_fmr_maps) - return -EINVAL; - - if (page_shift < (ffs(dev->caps.page_size_cap) - 1) || page_shift >= 32) - return -EINVAL; - - /* All MTTs must fit in the same page */ - if (max_pages * sizeof(*fmr->mtts) > PAGE_SIZE) - return -EINVAL; - - fmr->page_shift = page_shift; - fmr->max_pages = max_pages; - fmr->max_maps = max_maps; - fmr->maps = 0; - - err = mlx4_mr_alloc(dev, pd, 0, 0, access, max_pages, - page_shift, &fmr->mr); - if (err) - return err; - - fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table, - fmr->mr.mtt.offset, - &fmr->dma_handle); - - if (!fmr->mtts) { - err = -ENOMEM; - goto err_free; - } - - return 0; - -err_free: - (void) mlx4_mr_free(dev, &fmr->mr); - return err; -} -EXPORT_SYMBOL_GPL(mlx4_fmr_alloc); - -int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr) -{ - struct mlx4_priv *priv = mlx4_priv(dev); - int err; - - err = mlx4_mr_enable(dev, &fmr->mr); - if (err) - return err; - - fmr->mpt = mlx4_table_find(&priv->mr_table.dmpt_table, - key_to_hw_index(fmr->mr.key), NULL); - if (!fmr->mpt) - return -ENOMEM; - - return 0; -} -EXPORT_SYMBOL_GPL(mlx4_fmr_enable); - -void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, - u32 *lkey, u32 *rkey) -{ - if (!fmr->maps) - return; - - /* To unmap: it is sufficient to take back ownership from HW */ - *(u8 *)fmr->mpt = MLX4_MPT_STATUS_SW; - - /* Make sure MPT status is visible */ - wmb(); - - fmr->maps = 0; -} -EXPORT_SYMBOL_GPL(mlx4_fmr_unmap); - -int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr) -{ - int ret; - - if (fmr->maps) - return -EBUSY; - if (fmr->mr.enabled == MLX4_MPT_EN_HW) { - /* In case of FMR was enabled and unmapped - * make sure to give ownership of MPT back to HW - * so HW2SW_MPT command will success. - */ - *(u8 *)fmr->mpt = MLX4_MPT_STATUS_SW; - /* Make sure MPT status is visible before changing MPT fields */ - wmb(); - fmr->mpt->length = 0; - fmr->mpt->start = 0; - /* Make sure MPT data is visible after changing MPT status */ - wmb(); - *(u8 *)fmr->mpt = MLX4_MPT_STATUS_HW; - /* make sure MPT status is visible */ - wmb(); - } - - ret = mlx4_mr_free(dev, &fmr->mr); - if (ret) - return ret; - fmr->mr.enabled = MLX4_MPT_DISABLED; - - return 0; -} -EXPORT_SYMBOL_GPL(mlx4_fmr_free); - int mlx4_SYNC_TPT(struct mlx4_dev *dev) { return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 20372de0b587..06e066e04a4b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -573,7 +573,6 @@ struct mlx4_caps { int reserved_eqs; int num_comp_vectors; int num_mpts; - int max_fmr_maps; int num_mtts; int fmr_reserved_mtts; int reserved_mtts; @@ -707,17 +706,6 @@ struct mlx4_mw { int enabled; }; -struct mlx4_fmr { - struct mlx4_mr mr; - struct mlx4_mpt_entry *mpt; - __be64 *mtts; - dma_addr_t dma_handle; - int max_pages; - int max_maps; - int maps; - u8 page_shift; -}; - struct mlx4_uar { unsigned long pfn; int index; @@ -1412,14 +1400,6 @@ int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); -int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list, - int npages, u64 iova, u32 *lkey, u32 *rkey); -int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, - int max_maps, u8 page_shift, struct mlx4_fmr *fmr); -int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr); -void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, - u32 *lkey, u32 *rkey); -int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_SYNC_TPT(struct mlx4_dev *dev); int mlx4_test_interrupt(struct mlx4_dev *dev, int vector); int mlx4_test_async(struct mlx4_dev *dev); @@ -1522,6 +1502,8 @@ int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port); int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port); int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port, int enable); + +struct mlx4_mpt_entry; int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr, struct mlx4_mpt_entry ***mpt_entry); int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr, From d6747b3715561ddc14e805e7ad4dfab2c9f245bb Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 28 May 2020 16:45:51 -0300 Subject: [PATCH 546/562] RDMA/mthca: Remove FMR support for memory registration Remove the ancient and unsafe FMR method. Link: https://lore.kernel.org/r/9-v3-f58e6669d5d3+2cf-fmr_removal_jgg@mellanox.com Reviewed-by: Max Gurtovoy Signed-off-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mthca/mthca_dev.h | 10 - drivers/infiniband/hw/mthca/mthca_mr.c | 262 +------------------ drivers/infiniband/hw/mthca/mthca_provider.c | 86 ------ drivers/infiniband/hw/mthca/mthca_provider.h | 23 -- 4 files changed, 1 insertion(+), 380 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 599794c5a78f..7550e9d03dec 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -478,16 +478,6 @@ int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, u32 access, struct mthca_mr *mr); void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr); -int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, - u32 access, struct mthca_fmr *fmr); -int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, - int list_len, u64 iova); -void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr); -int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, - int list_len, u64 iova); -void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr); -int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr); - int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt); void mthca_unmap_eq_icm(struct mthca_dev *dev); diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 4250b2c18c64..ce0e0867e488 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -541,7 +541,7 @@ int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, return err; } -/* Free mr or fmr */ +/* Free mr */ static void mthca_free_region(struct mthca_dev *dev, u32 lkey) { mthca_table_put(dev, dev->mr_table.mpt_table, @@ -564,266 +564,6 @@ void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr) mthca_free_mtt(dev, mr->mtt); } -int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, - u32 access, struct mthca_fmr *mr) -{ - struct mthca_mpt_entry *mpt_entry; - struct mthca_mailbox *mailbox; - u64 mtt_seg; - u32 key, idx; - int list_len = mr->attr.max_pages; - int err = -ENOMEM; - int i; - - if (mr->attr.page_shift < 12 || mr->attr.page_shift >= 32) - return -EINVAL; - - /* For Arbel, all MTTs must fit in the same page. */ - if (mthca_is_memfree(dev) && - mr->attr.max_pages * sizeof *mr->mem.arbel.mtts > PAGE_SIZE) - return -EINVAL; - - mr->maps = 0; - - key = mthca_alloc(&dev->mr_table.mpt_alloc); - if (key == -1) - return -ENOMEM; - key = adjust_key(dev, key); - - idx = key & (dev->limits.num_mpts - 1); - mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key); - - if (mthca_is_memfree(dev)) { - err = mthca_table_get(dev, dev->mr_table.mpt_table, key); - if (err) - goto err_out_mpt_free; - - mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key, NULL); - BUG_ON(!mr->mem.arbel.mpt); - } else - mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base + - sizeof *(mr->mem.tavor.mpt) * idx; - - mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy); - if (IS_ERR(mr->mtt)) { - err = PTR_ERR(mr->mtt); - goto err_out_table; - } - - mtt_seg = mr->mtt->first_seg * dev->limits.mtt_seg_size; - - if (mthca_is_memfree(dev)) { - mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table, - mr->mtt->first_seg, - &mr->mem.arbel.dma_handle); - BUG_ON(!mr->mem.arbel.mtts); - } else - mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg; - - mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); - if (IS_ERR(mailbox)) { - err = PTR_ERR(mailbox); - goto err_out_free_mtt; - } - - mpt_entry = mailbox->buf; - - mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | - MTHCA_MPT_FLAG_MIO | - MTHCA_MPT_FLAG_REGION | - access); - - mpt_entry->page_size = cpu_to_be32(mr->attr.page_shift - 12); - mpt_entry->key = cpu_to_be32(key); - mpt_entry->pd = cpu_to_be32(pd); - memset(&mpt_entry->start, 0, - sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start)); - mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + mtt_seg); - - if (0) { - mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey); - for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) { - if (i % 4 == 0) - printk("[%02x] ", i * 4); - printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i])); - if ((i + 1) % 4 == 0) - printk("\n"); - } - } - - err = mthca_SW2HW_MPT(dev, mailbox, - key & (dev->limits.num_mpts - 1)); - if (err) { - mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err); - goto err_out_mailbox_free; - } - - mthca_free_mailbox(dev, mailbox); - return 0; - -err_out_mailbox_free: - mthca_free_mailbox(dev, mailbox); - -err_out_free_mtt: - mthca_free_mtt(dev, mr->mtt); - -err_out_table: - mthca_table_put(dev, dev->mr_table.mpt_table, key); - -err_out_mpt_free: - mthca_free(&dev->mr_table.mpt_alloc, key); - return err; -} - -int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr) -{ - if (fmr->maps) - return -EBUSY; - - mthca_free_region(dev, fmr->ibmr.lkey); - mthca_free_mtt(dev, fmr->mtt); - - return 0; -} - -static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list, - int list_len, u64 iova) -{ - int i, page_mask; - - if (list_len > fmr->attr.max_pages) - return -EINVAL; - - page_mask = (1 << fmr->attr.page_shift) - 1; - - /* We are getting page lists, so va must be page aligned. */ - if (iova & page_mask) - return -EINVAL; - - /* Trust the user not to pass misaligned data in page_list */ - if (0) - for (i = 0; i < list_len; ++i) { - if (page_list[i] & ~page_mask) - return -EINVAL; - } - - if (fmr->maps >= fmr->attr.max_maps) - return -EINVAL; - - return 0; -} - - -int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, - int list_len, u64 iova) -{ - struct mthca_fmr *fmr = to_mfmr(ibfmr); - struct mthca_dev *dev = to_mdev(ibfmr->device); - struct mthca_mpt_entry mpt_entry; - u32 key; - int i, err; - - err = mthca_check_fmr(fmr, page_list, list_len, iova); - if (err) - return err; - - ++fmr->maps; - - key = tavor_key_to_hw_index(fmr->ibmr.lkey); - key += dev->limits.num_mpts; - fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key); - - writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt); - - for (i = 0; i < list_len; ++i) { - __be64 mtt_entry = cpu_to_be64(page_list[i] | - MTHCA_MTT_FLAG_PRESENT); - mthca_write64_raw(mtt_entry, fmr->mem.tavor.mtts + i); - } - - mpt_entry.lkey = cpu_to_be32(key); - mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift)); - mpt_entry.start = cpu_to_be64(iova); - - __raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key); - memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start, - offsetof(struct mthca_mpt_entry, window_count) - - offsetof(struct mthca_mpt_entry, start)); - - writeb(MTHCA_MPT_STATUS_HW, fmr->mem.tavor.mpt); - - return 0; -} - -int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, - int list_len, u64 iova) -{ - struct mthca_fmr *fmr = to_mfmr(ibfmr); - struct mthca_dev *dev = to_mdev(ibfmr->device); - u32 key; - int i, err; - - err = mthca_check_fmr(fmr, page_list, list_len, iova); - if (err) - return err; - - ++fmr->maps; - - key = arbel_key_to_hw_index(fmr->ibmr.lkey); - if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) - key += SINAI_FMR_KEY_INC; - else - key += dev->limits.num_mpts; - fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key); - - *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW; - - wmb(); - - dma_sync_single_for_cpu(&dev->pdev->dev, fmr->mem.arbel.dma_handle, - list_len * sizeof(u64), DMA_TO_DEVICE); - - for (i = 0; i < list_len; ++i) - fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] | - MTHCA_MTT_FLAG_PRESENT); - - dma_sync_single_for_device(&dev->pdev->dev, fmr->mem.arbel.dma_handle, - list_len * sizeof(u64), DMA_TO_DEVICE); - - fmr->mem.arbel.mpt->key = cpu_to_be32(key); - fmr->mem.arbel.mpt->lkey = cpu_to_be32(key); - fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift)); - fmr->mem.arbel.mpt->start = cpu_to_be64(iova); - - wmb(); - - *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_HW; - - wmb(); - - return 0; -} - -void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) -{ - if (!fmr->maps) - return; - - fmr->maps = 0; - - writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt); -} - -void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) -{ - if (!fmr->maps) - return; - - fmr->maps = 0; - - *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW; -} - int mthca_init_mr_table(struct mthca_dev *dev) { phys_addr_t addr; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index bc3e3d741ca3..de2124a8ee2b 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -958,69 +958,6 @@ static int mthca_dereg_mr(struct ib_mr *mr, struct ib_udata *udata) return 0; } -static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags, - struct ib_fmr_attr *fmr_attr) -{ - struct mthca_fmr *fmr; - int err; - - fmr = kmalloc(sizeof *fmr, GFP_KERNEL); - if (!fmr) - return ERR_PTR(-ENOMEM); - - memcpy(&fmr->attr, fmr_attr, sizeof *fmr_attr); - err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num, - convert_access(mr_access_flags), fmr); - - if (err) { - kfree(fmr); - return ERR_PTR(err); - } - - return &fmr->ibmr; -} - -static int mthca_dealloc_fmr(struct ib_fmr *fmr) -{ - struct mthca_fmr *mfmr = to_mfmr(fmr); - int err; - - err = mthca_free_fmr(to_mdev(fmr->device), mfmr); - if (err) - return err; - - kfree(mfmr); - return 0; -} - -static int mthca_unmap_fmr(struct list_head *fmr_list) -{ - struct ib_fmr *fmr; - int err; - struct mthca_dev *mdev = NULL; - - list_for_each_entry(fmr, fmr_list, list) { - if (mdev && to_mdev(fmr->device) != mdev) - return -EINVAL; - mdev = to_mdev(fmr->device); - } - - if (!mdev) - return 0; - - if (mthca_is_memfree(mdev)) { - list_for_each_entry(fmr, fmr_list, list) - mthca_arbel_fmr_unmap(mdev, to_mfmr(fmr)); - - wmb(); - } else - list_for_each_entry(fmr, fmr_list, list) - mthca_tavor_fmr_unmap(mdev, to_mfmr(fmr)); - - err = mthca_SYNC_TPT(mdev); - return err; -} - static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { @@ -1204,20 +1141,6 @@ static const struct ib_device_ops mthca_dev_tavor_srq_ops = { INIT_RDMA_OBJ_SIZE(ib_srq, mthca_srq, ibsrq), }; -static const struct ib_device_ops mthca_dev_arbel_fmr_ops = { - .alloc_fmr = mthca_alloc_fmr, - .dealloc_fmr = mthca_dealloc_fmr, - .map_phys_fmr = mthca_arbel_map_phys_fmr, - .unmap_fmr = mthca_unmap_fmr, -}; - -static const struct ib_device_ops mthca_dev_tavor_fmr_ops = { - .alloc_fmr = mthca_alloc_fmr, - .dealloc_fmr = mthca_dealloc_fmr, - .map_phys_fmr = mthca_tavor_map_phys_fmr, - .unmap_fmr = mthca_unmap_fmr, -}; - static const struct ib_device_ops mthca_dev_arbel_ops = { .post_recv = mthca_arbel_post_receive, .post_send = mthca_arbel_post_send, @@ -1276,15 +1199,6 @@ int mthca_register_device(struct mthca_dev *dev) &mthca_dev_tavor_srq_ops); } - if (dev->mthca_flags & MTHCA_FLAG_FMR) { - if (mthca_is_memfree(dev)) - ib_set_device_ops(&dev->ib_dev, - &mthca_dev_arbel_fmr_ops); - else - ib_set_device_ops(&dev->ib_dev, - &mthca_dev_tavor_fmr_ops); - } - ib_set_device_ops(&dev->ib_dev, &mthca_dev_ops); if (mthca_is_memfree(dev)) diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 596acc45569b..84c64bff0d92 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -76,24 +76,6 @@ struct mthca_mr { struct mthca_mtt *mtt; }; -struct mthca_fmr { - struct ib_fmr ibmr; - struct ib_fmr_attr attr; - struct mthca_mtt *mtt; - int maps; - union { - struct { - struct mthca_mpt_entry __iomem *mpt; - u64 __iomem *mtts; - } tavor; - struct { - struct mthca_mpt_entry *mpt; - __be64 *mtts; - dma_addr_t dma_handle; - } arbel; - } mem; -}; - struct mthca_pd { struct ib_pd ibpd; u32 pd_num; @@ -301,11 +283,6 @@ static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext return container_of(ibucontext, struct mthca_ucontext, ibucontext); } -static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr) -{ - return container_of(ibmr, struct mthca_fmr, ibmr); -} - static inline struct mthca_mr *to_mmr(struct ib_mr *ibmr) { return container_of(ibmr, struct mthca_mr, ibmr); From 22c9cc2408b734d2e5b193d287572cd2c7011183 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 28 May 2020 16:45:52 -0300 Subject: [PATCH 547/562] RDMA/rdmavt: Remove FMR memory registration Use FRWR method to register memory by default and remove the ancient and unsafe FMR method. Link: https://lore.kernel.org/r/10-v3-f58e6669d5d3+2cf-fmr_removal_jgg@mellanox.com Signed-off-by: Max Gurtovoy Tested-by: Dennis Dalessandro Acked-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/mr.c | 154 ------------------------------ drivers/infiniband/sw/rdmavt/mr.h | 15 --- drivers/infiniband/sw/rdmavt/vt.c | 4 - 3 files changed, 173 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 72f6534fbb52..ddb0c0d771c2 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -713,160 +713,6 @@ int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey) } EXPORT_SYMBOL(rvt_invalidate_rkey); -/** - * rvt_alloc_fmr - allocate a fast memory region - * @pd: the protection domain for this memory region - * @mr_access_flags: access flags for this memory region - * @fmr_attr: fast memory region attributes - * - * Return: the memory region on success, otherwise returns an errno. - */ -struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, - struct ib_fmr_attr *fmr_attr) -{ - struct rvt_fmr *fmr; - int m; - struct ib_fmr *ret; - int rval = -ENOMEM; - - /* Allocate struct plus pointers to first level page tables. */ - m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ; - fmr = kzalloc(struct_size(fmr, mr.map, m), GFP_KERNEL); - if (!fmr) - goto bail; - - rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages, - PERCPU_REF_INIT_ATOMIC); - if (rval) - goto bail; - - /* - * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & - * rkey. - */ - rval = rvt_alloc_lkey(&fmr->mr, 0); - if (rval) - goto bail_mregion; - fmr->ibfmr.rkey = fmr->mr.lkey; - fmr->ibfmr.lkey = fmr->mr.lkey; - /* - * Resources are allocated but no valid mapping (RKEY can't be - * used). - */ - fmr->mr.access_flags = mr_access_flags; - fmr->mr.max_segs = fmr_attr->max_pages; - fmr->mr.page_shift = fmr_attr->page_shift; - - ret = &fmr->ibfmr; -done: - return ret; - -bail_mregion: - rvt_deinit_mregion(&fmr->mr); -bail: - kfree(fmr); - ret = ERR_PTR(rval); - goto done; -} - -/** - * rvt_map_phys_fmr - set up a fast memory region - * @ibfmr: the fast memory region to set up - * @page_list: the list of pages to associate with the fast memory region - * @list_len: the number of pages to associate with the fast memory region - * @iova: the virtual address of the start of the fast memory region - * - * This may be called from interrupt context. - * - * Return: 0 on success - */ - -int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, - int list_len, u64 iova) -{ - struct rvt_fmr *fmr = to_ifmr(ibfmr); - struct rvt_lkey_table *rkt; - unsigned long flags; - int m, n; - unsigned long i; - u32 ps; - struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device); - - i = atomic_long_read(&fmr->mr.refcount.count); - if (i > 2) - return -EBUSY; - - if (list_len > fmr->mr.max_segs) - return -EINVAL; - - rkt = &rdi->lkey_table; - spin_lock_irqsave(&rkt->lock, flags); - fmr->mr.user_base = iova; - fmr->mr.iova = iova; - ps = 1 << fmr->mr.page_shift; - fmr->mr.length = list_len * ps; - m = 0; - n = 0; - for (i = 0; i < list_len; i++) { - fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i]; - fmr->mr.map[m]->segs[n].length = ps; - trace_rvt_mr_fmr_seg(&fmr->mr, m, n, (void *)page_list[i], ps); - if (++n == RVT_SEGSZ) { - m++; - n = 0; - } - } - spin_unlock_irqrestore(&rkt->lock, flags); - return 0; -} - -/** - * rvt_unmap_fmr - unmap fast memory regions - * @fmr_list: the list of fast memory regions to unmap - * - * Return: 0 on success. - */ -int rvt_unmap_fmr(struct list_head *fmr_list) -{ - struct rvt_fmr *fmr; - struct rvt_lkey_table *rkt; - unsigned long flags; - struct rvt_dev_info *rdi; - - list_for_each_entry(fmr, fmr_list, ibfmr.list) { - rdi = ib_to_rvt(fmr->ibfmr.device); - rkt = &rdi->lkey_table; - spin_lock_irqsave(&rkt->lock, flags); - fmr->mr.user_base = 0; - fmr->mr.iova = 0; - fmr->mr.length = 0; - spin_unlock_irqrestore(&rkt->lock, flags); - } - return 0; -} - -/** - * rvt_dealloc_fmr - deallocate a fast memory region - * @ibfmr: the fast memory region to deallocate - * - * Return: 0 on success. - */ -int rvt_dealloc_fmr(struct ib_fmr *ibfmr) -{ - struct rvt_fmr *fmr = to_ifmr(ibfmr); - int ret = 0; - - rvt_free_lkey(&fmr->mr); - rvt_put_mr(&fmr->mr); /* will set completion if last */ - ret = rvt_check_refs(&fmr->mr, __func__); - if (ret) - goto out; - rvt_deinit_mregion(&fmr->mr); - kfree(fmr); -out: - return ret; -} - /** * rvt_sge_adjacent - is isge compressible * @last_sge: last outgoing SGE written diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h index 2c8d0752e8e3..780fc63af98b 100644 --- a/drivers/infiniband/sw/rdmavt/mr.h +++ b/drivers/infiniband/sw/rdmavt/mr.h @@ -49,10 +49,6 @@ */ #include -struct rvt_fmr { - struct ib_fmr ibfmr; - struct rvt_mregion mr; /* must be last */ -}; struct rvt_mr { struct ib_mr ibmr; @@ -60,11 +56,6 @@ struct rvt_mr { struct rvt_mregion mr; /* must be last */ }; -static inline struct rvt_fmr *to_ifmr(struct ib_fmr *ibfmr) -{ - return container_of(ibfmr, struct rvt_fmr, ibfmr); -} - static inline struct rvt_mr *to_imr(struct ib_mr *ibmr) { return container_of(ibmr, struct rvt_mr, ibmr); @@ -83,11 +74,5 @@ struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg, struct ib_udata *udata); int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); -struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, - struct ib_fmr_attr *fmr_attr); -int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, - int list_len, u64 iova); -int rvt_unmap_fmr(struct list_head *fmr_list); -int rvt_dealloc_fmr(struct ib_fmr *ibfmr); #endif /* DEF_RVTMR_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 72b031ab7092..f904bb34477a 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -378,7 +378,6 @@ enum { static const struct ib_device_ops rvt_dev_ops = { .uverbs_abi_ver = RVT_UVERBS_ABI_VERSION, - .alloc_fmr = rvt_alloc_fmr, .alloc_mr = rvt_alloc_mr, .alloc_pd = rvt_alloc_pd, .alloc_ucontext = rvt_alloc_ucontext, @@ -387,7 +386,6 @@ static const struct ib_device_ops rvt_dev_ops = { .create_cq = rvt_create_cq, .create_qp = rvt_create_qp, .create_srq = rvt_create_srq, - .dealloc_fmr = rvt_dealloc_fmr, .dealloc_pd = rvt_dealloc_pd, .dealloc_ucontext = rvt_dealloc_ucontext, .dereg_mr = rvt_dereg_mr, @@ -399,7 +397,6 @@ static const struct ib_device_ops rvt_dev_ops = { .get_dma_mr = rvt_get_dma_mr, .get_port_immutable = rvt_get_port_immutable, .map_mr_sg = rvt_map_mr_sg, - .map_phys_fmr = rvt_map_phys_fmr, .mmap = rvt_mmap, .modify_ah = rvt_modify_ah, .modify_device = rvt_modify_device, @@ -420,7 +417,6 @@ static const struct ib_device_ops rvt_dev_ops = { .reg_user_mr = rvt_reg_user_mr, .req_notify_cq = rvt_req_notify_cq, .resize_cq = rvt_resize_cq, - .unmap_fmr = rvt_unmap_fmr, INIT_RDMA_OBJ_SIZE(ib_ah, rvt_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, rvt_cq, ibcq), From 3a578152a9208bbcd196210be2f5396744cda302 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 28 May 2020 16:45:53 -0300 Subject: [PATCH 548/562] RDMA/core: Remove FMR device ops After removing FMR support from all the RDMA ULPs and providers, there is no need to keep FMR operation for IB devices. Link: https://lore.kernel.org/r/11-v3-f58e6669d5d3+2cf-fmr_removal_jgg@mellanox.com Signed-off-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- Documentation/infiniband/core_locking.rst | 2 - drivers/infiniband/core/device.c | 4 -- drivers/infiniband/core/verbs.c | 48 ------------------ include/rdma/ib_verbs.h | 59 ----------------------- 4 files changed, 113 deletions(-) diff --git a/Documentation/infiniband/core_locking.rst b/Documentation/infiniband/core_locking.rst index 8f76a8a5a38f..efd5e7603014 100644 --- a/Documentation/infiniband/core_locking.rst +++ b/Documentation/infiniband/core_locking.rst @@ -22,7 +22,6 @@ Sleeping and interrupt context - post_recv - poll_cq - req_notify_cq - - map_phys_fmr which may not sleep and must be callable from any context. @@ -36,7 +35,6 @@ Sleeping and interrupt context - ib_post_send - ib_post_recv - ib_req_notify_cq - - ib_map_phys_fmr are therefore safe to call from any context. diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 53f541f41ff3..905a2beaf885 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2571,7 +2571,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, add_gid); SET_DEVICE_OP(dev_ops, advise_mr); SET_DEVICE_OP(dev_ops, alloc_dm); - SET_DEVICE_OP(dev_ops, alloc_fmr); SET_DEVICE_OP(dev_ops, alloc_hw_stats); SET_DEVICE_OP(dev_ops, alloc_mr); SET_DEVICE_OP(dev_ops, alloc_mr_integrity); @@ -2598,7 +2597,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, create_wq); SET_DEVICE_OP(dev_ops, dealloc_dm); SET_DEVICE_OP(dev_ops, dealloc_driver); - SET_DEVICE_OP(dev_ops, dealloc_fmr); SET_DEVICE_OP(dev_ops, dealloc_mw); SET_DEVICE_OP(dev_ops, dealloc_pd); SET_DEVICE_OP(dev_ops, dealloc_ucontext); @@ -2642,7 +2640,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, iw_rem_ref); SET_DEVICE_OP(dev_ops, map_mr_sg); SET_DEVICE_OP(dev_ops, map_mr_sg_pi); - SET_DEVICE_OP(dev_ops, map_phys_fmr); SET_DEVICE_OP(dev_ops, mmap); SET_DEVICE_OP(dev_ops, mmap_free); SET_DEVICE_OP(dev_ops, modify_ah); @@ -2676,7 +2673,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, resize_cq); SET_DEVICE_OP(dev_ops, set_vf_guid); SET_DEVICE_OP(dev_ops, set_vf_link_state); - SET_DEVICE_OP(dev_ops, unmap_fmr); SET_OBJ_SIZE(dev_ops, ib_ah); SET_OBJ_SIZE(dev_ops, ib_cq); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 21815e125e98..53d6505c0c7b 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2212,54 +2212,6 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, } EXPORT_SYMBOL(ib_alloc_mr_integrity); -/* "Fast" memory regions */ - -struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, - int mr_access_flags, - struct ib_fmr_attr *fmr_attr) -{ - struct ib_fmr *fmr; - - if (!pd->device->ops.alloc_fmr) - return ERR_PTR(-EOPNOTSUPP); - - fmr = pd->device->ops.alloc_fmr(pd, mr_access_flags, fmr_attr); - if (!IS_ERR(fmr)) { - fmr->device = pd->device; - fmr->pd = pd; - atomic_inc(&pd->usecnt); - } - - return fmr; -} -EXPORT_SYMBOL(ib_alloc_fmr); - -int ib_unmap_fmr(struct list_head *fmr_list) -{ - struct ib_fmr *fmr; - - if (list_empty(fmr_list)) - return 0; - - fmr = list_entry(fmr_list->next, struct ib_fmr, list); - return fmr->device->ops.unmap_fmr(fmr_list); -} -EXPORT_SYMBOL(ib_unmap_fmr); - -int ib_dealloc_fmr(struct ib_fmr *fmr) -{ - struct ib_pd *pd; - int ret; - - pd = fmr->pd; - ret = fmr->device->ops.dealloc_fmr(fmr); - if (!ret) - atomic_dec(&pd->usecnt); - - return ret; -} -EXPORT_SYMBOL(ib_dealloc_fmr); - /* Multicast groups */ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 19864da78649..ff6a8053ec52 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1475,12 +1475,6 @@ enum ib_mr_rereg_flags { IB_MR_REREG_SUPPORTED = ((IB_MR_REREG_ACCESS << 1) - 1) }; -struct ib_fmr_attr { - int max_pages; - int max_maps; - u8 page_shift; -}; - struct ib_umem; enum rdma_remove_reason { @@ -1855,14 +1849,6 @@ struct ib_mw { enum ib_mw_type type; }; -struct ib_fmr { - struct ib_device *device; - struct ib_pd *pd; - struct list_head list; - u32 lkey; - u32 rkey; -}; - /* Supported steering options */ enum ib_flow_attr_type { /* steering according to rule specifications */ @@ -2505,12 +2491,6 @@ struct ib_device_ops { struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); int (*dealloc_mw)(struct ib_mw *mw); - struct ib_fmr *(*alloc_fmr)(struct ib_pd *pd, int mr_access_flags, - struct ib_fmr_attr *fmr_attr); - int (*map_phys_fmr)(struct ib_fmr *fmr, u64 *page_list, int list_len, - u64 iova); - int (*unmap_fmr)(struct list_head *fmr_list); - int (*dealloc_fmr)(struct ib_fmr *fmr); int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device, @@ -4319,45 +4299,6 @@ static inline u32 ib_inc_rkey(u32 rkey) return ((rkey + 1) & mask) | (rkey & ~mask); } -/** - * ib_alloc_fmr - Allocates a unmapped fast memory region. - * @pd: The protection domain associated with the unmapped region. - * @mr_access_flags: Specifies the memory access rights. - * @fmr_attr: Attributes of the unmapped region. - * - * A fast memory region must be mapped before it can be used as part of - * a work request. - */ -struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, - int mr_access_flags, - struct ib_fmr_attr *fmr_attr); - -/** - * ib_map_phys_fmr - Maps a list of physical pages to a fast memory region. - * @fmr: The fast memory region to associate with the pages. - * @page_list: An array of physical pages to map to the fast memory region. - * @list_len: The number of pages in page_list. - * @iova: The I/O virtual address to use with the mapped region. - */ -static inline int ib_map_phys_fmr(struct ib_fmr *fmr, - u64 *page_list, int list_len, - u64 iova) -{ - return fmr->device->ops.map_phys_fmr(fmr, page_list, list_len, iova); -} - -/** - * ib_unmap_fmr - Removes the mapping from a list of fast memory regions. - * @fmr_list: A linked list of fast memory regions to unmap. - */ -int ib_unmap_fmr(struct list_head *fmr_list); - -/** - * ib_dealloc_fmr - Deallocates a fast memory region. - * @fmr: The fast memory region to deallocate. - */ -int ib_dealloc_fmr(struct ib_fmr *fmr); - /** * ib_attach_mcast - Attaches the specified QP to a multicast group. * @qp: QP to attach to the multicast group. The QP must be type From 649392bf75a423287a9c4936b341677f12e8cf0b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 28 May 2020 16:45:54 -0300 Subject: [PATCH 549/562] RDMA: Remove 'max_fmr' Now that FMR support is gone, this attribute can be deleted from all places. Link: https://lore.kernel.org/r/12-v3-f58e6669d5d3+2cf-fmr_removal_jgg@mellanox.com Reviewed-by: Max Gurtovoy Reviewed-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 1 - drivers/infiniband/hw/ocrdma/ocrdma.h | 1 - drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 1 - drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 1 - drivers/infiniband/hw/qedr/main.c | 1 - drivers/infiniband/hw/qedr/qedr.h | 1 - drivers/infiniband/hw/qedr/verbs.c | 1 - drivers/infiniband/sw/rdmavt/mr.c | 1 - drivers/infiniband/sw/siw/siw.h | 2 -- drivers/infiniband/sw/siw/siw_main.c | 1 - drivers/infiniband/sw/siw/siw_verbs.c | 1 - drivers/net/ethernet/qlogic/qed/qed_rdma.c | 1 - drivers/net/ethernet/qlogic/qed/qed_rdma.h | 1 - include/linux/qed/qed_rdma_if.h | 1 - include/rdma/ib_verbs.h | 1 - net/rds/ib.c | 2 +- 16 files changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 2067a939788b..56d207405dbd 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -356,7 +356,6 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext, resp->max_mcast_qp_attach = attr->max_mcast_qp_attach; resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach; resp->max_ah = attr->max_ah; - resp->max_fmr = attr->max_fmr; resp->max_map_per_fmr = attr->max_map_per_fmr; resp->max_srq = attr->max_srq; resp->max_srq_wr = attr->max_srq_wr; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 7baedc74e39d..fcfe0e82197a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -98,7 +98,6 @@ struct ocrdma_dev_attr { u64 max_mr_size; u32 max_num_mr_pbl; int max_mw; - int max_fmr; int max_map_per_fmr; int max_pages_per_frmr; u16 max_ord_per_qp; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index d82d3ec3649e..e07bf0b2209a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1190,7 +1190,6 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_mr = rsp->max_mr; attr->max_mr_size = ((u64)rsp->max_mr_size_hi << 32) | rsp->max_mr_size_lo; - attr->max_fmr = 0; attr->max_pages_per_frmr = rsp->max_pages_per_frmr; attr->max_num_mr_pbl = rsp->max_num_mr_pbl; attr->max_cqe = rsp->max_cq_cqes_per_cq & diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 10e343894595..890e3fd41d21 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -99,7 +99,6 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, attr->max_mw = dev->attr.max_mw; attr->max_pd = dev->attr.max_pd; attr->atomic_cap = 0; - attr->max_fmr = 0; attr->max_map_per_fmr = 0; attr->max_qp_rd_atom = min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index dcdc85a1ab25..ccaedfd53e49 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -632,7 +632,6 @@ static int qedr_set_device_attr(struct qedr_dev *dev) attr->max_mr_size = qed_attr->max_mr_size; attr->max_cqe = min_t(u64, qed_attr->max_cqe, QEDR_MAX_CQES); attr->max_mw = qed_attr->max_mw; - attr->max_fmr = qed_attr->max_fmr; attr->max_mr_mw_fmr_pbl = qed_attr->max_mr_mw_fmr_pbl; attr->max_mr_mw_fmr_size = qed_attr->max_mr_mw_fmr_size; attr->max_pd = qed_attr->max_pd; diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 5488dbd59d3c..fdf90ecb2699 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -103,7 +103,6 @@ struct qedr_device_attr { u64 max_mr_size; u32 max_cqe; u32 max_mw; - u32 max_fmr; u32 max_mr_mw_fmr_pbl; u64 max_mr_mw_fmr_size; u32 max_pd; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index d6b94a713573..ca88006eaa66 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -145,7 +145,6 @@ int qedr_query_device(struct ib_device *ibdev, attr->max_mw = qattr->max_mw; attr->max_pd = qattr->max_pd; attr->atomic_cap = dev->atomic_cap; - attr->max_fmr = qattr->max_fmr; attr->max_map_per_fmr = 16; attr->max_qp_init_rd_atom = 1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1); diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index ddb0c0d771c2..60864e5ca7cb 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -97,7 +97,6 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL); rdi->dparms.props.max_mr = rdi->lkey_table.max; - rdi->dparms.props.max_fmr = rdi->lkey_table.max; return 0; } diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 5a58a1cc7a7e..e9753831ac3f 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -30,7 +30,6 @@ #define SIW_MAX_MR (SIW_MAX_QP * 10) #define SIW_MAX_PD SIW_MAX_QP #define SIW_MAX_MW 0 /* to be set if MW's are supported */ -#define SIW_MAX_FMR SIW_MAX_MR #define SIW_MAX_SRQ SIW_MAX_QP #define SIW_MAX_SRQ_WR (SIW_MAX_QP_WR * 10) #define SIW_MAX_CONTEXT SIW_MAX_PD @@ -59,7 +58,6 @@ struct siw_dev_cap { int max_mr; int max_pd; int max_mw; - int max_fmr; int max_srq; int max_srq_wr; int max_srq_sge; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 5cd40fb9e20c..a0b8cc643c5c 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -413,7 +413,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) sdev->attrs.max_mr = SIW_MAX_MR; sdev->attrs.max_pd = SIW_MAX_PD; sdev->attrs.max_mw = SIW_MAX_MW; - sdev->attrs.max_fmr = SIW_MAX_FMR; sdev->attrs.max_srq = SIW_MAX_SRQ; sdev->attrs.max_srq_wr = SIW_MAX_SRQ_WR; sdev->attrs.max_srq_sge = SIW_MAX_SGE; diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index aeb842bc7a1e..987e2ba05dbc 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -136,7 +136,6 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr, attr->max_cq = sdev->attrs.max_cq; attr->max_cqe = sdev->attrs.max_cqe; attr->max_fast_reg_page_list_len = SIW_MAX_SGE_PBL; - attr->max_fmr = sdev->attrs.max_fmr; attr->max_mr = sdev->attrs.max_mr; attr->max_mw = sdev->attrs.max_mw; attr->max_mr_size = ~0ull; diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index 38b1f402f7ed..5dc18a4bdda4 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -499,7 +499,6 @@ static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn, dev->max_cqe = QED_RDMA_MAX_CQE_16_BIT; dev->max_mw = 0; - dev->max_fmr = QED_RDMA_MAX_FMR; dev->max_mr_mw_fmr_pbl = (PAGE_SIZE / 8) * (PAGE_SIZE / 8); dev->max_mr_mw_fmr_size = dev->max_mr_mw_fmr_pbl * PAGE_SIZE; dev->max_pkey = QED_RDMA_MAX_P_KEY; diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.h b/drivers/net/ethernet/qlogic/qed/qed_rdma.h index 3689fe3e5935..dfaa2f552627 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.h +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.h @@ -45,7 +45,6 @@ #include "qed_iwarp.h" #include "qed_roce.h" -#define QED_RDMA_MAX_FMR (RDMA_MAX_TIDS) #define QED_RDMA_MAX_P_KEY (1) #define QED_RDMA_MAX_WQE (0x7FFF) #define QED_RDMA_MAX_SRQ_WQE_ELEM (0x7FFF) diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index 74efca15fde7..c90276cda5c1 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -91,7 +91,6 @@ struct qed_rdma_device { u64 max_mr_size; u32 max_cqe; u32 max_mw; - u32 max_fmr; u32 max_mr_mw_fmr_pbl; u64 max_mr_mw_fmr_size; u32 max_pd; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index ff6a8053ec52..c4708b3243f9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -430,7 +430,6 @@ struct ib_device_attr { int max_mcast_qp_attach; int max_total_mcast_qp_attach; int max_ah; - int max_fmr; int max_map_per_fmr; int max_srq; int max_srq_wr; diff --git a/net/rds/ib.c b/net/rds/ib.c index 6c43b3e4c736..deecbdcdae84 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -217,7 +217,7 @@ static int rds_ib_add_one(struct ib_device *device) } rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, max_1m_mrs = %d, max_8k_mrs = %d\n", - device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge, + device->attrs.max_mr, rds_ibdev->max_wrs, rds_ibdev->max_sge, rds_ibdev->max_1m_mrs, rds_ibdev->max_8k_mrs); pr_info("RDS/IB: %s: added\n", device->name); From 4d12c04caa88cd3115f25acd832a7cddb698981b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 28 May 2020 16:45:55 -0300 Subject: [PATCH 550/562] RDMA: Remove 'max_map_per_fmr' Now that FMR support is gone, this attribute can be deleted from all places. Link: https://lore.kernel.org/r/13-v3-f58e6669d5d3+2cf-fmr_removal_jgg@mellanox.com Reviewed-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 1 - drivers/infiniband/hw/hfi1/verbs.c | 1 - drivers/infiniband/hw/i40iw/i40iw_verbs.c | 1 - drivers/infiniband/hw/mlx5/main.c | 1 - drivers/infiniband/hw/mthca/mthca_provider.c | 10 ---------- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 1 - drivers/infiniband/hw/qedr/verbs.c | 1 - drivers/infiniband/hw/qib/qib_verbs.c | 1 - drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 1 - include/rdma/ib_verbs.h | 1 - 10 files changed, 19 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 56d207405dbd..b48b3f6e632d 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -356,7 +356,6 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext, resp->max_mcast_qp_attach = attr->max_mcast_qp_attach; resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach; resp->max_ah = attr->max_ah; - resp->max_map_per_fmr = attr->max_map_per_fmr; resp->max_srq = attr->max_srq; resp->max_srq_wr = attr->max_srq_wr; resp->max_srq_sge = attr->max_srq_sge; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 43ddced15951..30865635b449 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1361,7 +1361,6 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd) rdi->dparms.props.max_cq = hfi1_max_cqs; rdi->dparms.props.max_ah = hfi1_max_ahs; rdi->dparms.props.max_cqe = hfi1_max_cqes; - rdi->dparms.props.max_map_per_fmr = 32767; rdi->dparms.props.max_pd = hfi1_max_pds; rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC; rdi->dparms.props.max_qp_init_rd_atom = 255; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 1b6fb1380961..19af29a48c55 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -83,7 +83,6 @@ static int i40iw_query_device(struct ib_device *ibdev, props->max_qp_rd_atom = I40IW_MAX_IRD_SIZE; props->max_qp_init_rd_atom = props->max_qp_rd_atom; props->atomic_cap = IB_ATOMIC_NONE; - props->max_map_per_fmr = 1; props->max_fast_reg_page_list_len = I40IW_MAX_PAGES_PER_FMR; return 0; } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 49a1aff72715..343a8b8361e7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -999,7 +999,6 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; - props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ props->max_ah = INT_MAX; props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz); props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index de2124a8ee2b..9fa2f9164a47 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -118,16 +118,6 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *pr props->max_mcast_qp_attach = MTHCA_QP_PER_MGM; props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; - /* - * If Sinai memory key optimization is being used, then only - * the 8-bit key portion will change. For other HCAs, the - * unused index bits will also be used for FMR remapping. - */ - if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT) - props->max_map_per_fmr = 255; - else - props->max_map_per_fmr = - (1 << (32 - ilog2(mdev->limits.num_mpts))) - 1; err = 0; out: diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 890e3fd41d21..d11c74390a12 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -99,7 +99,6 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, attr->max_mw = dev->attr.max_mw; attr->max_pd = dev->attr.max_pd; attr->atomic_cap = 0; - attr->max_map_per_fmr = 0; attr->max_qp_rd_atom = min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp); attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index ca88006eaa66..9b9e80266367 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -145,7 +145,6 @@ int qedr_query_device(struct ib_device *ibdev, attr->max_mw = qattr->max_mw; attr->max_pd = qattr->max_pd; attr->atomic_cap = dev->atomic_cap; - attr->max_map_per_fmr = 16; attr->max_qp_init_rd_atom = 1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1); attr->max_qp_rd_atom = diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 7508abb6a0fa..7acf9ba5358a 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1460,7 +1460,6 @@ static void qib_fill_device_attr(struct qib_devdata *dd) rdi->dparms.props.max_cq = ib_qib_max_cqs; rdi->dparms.props.max_cqe = ib_qib_max_cqes; rdi->dparms.props.max_ah = ib_qib_max_ahs; - rdi->dparms.props.max_map_per_fmr = 32767; rdi->dparms.props.max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC; rdi->dparms.props.max_qp_init_rd_atom = 255; rdi->dparms.props.max_srq = ib_qib_max_srqs; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 71f82339446c..b8a77ce11590 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -322,7 +322,6 @@ int usnic_ib_query_device(struct ib_device *ibdev, props->max_mcast_grp = 0; props->max_mcast_qp_attach = 0; props->max_total_mcast_qp_attach = 0; - props->max_map_per_fmr = 0; /* Owned by Userspace * max_qp_wr, max_sge, max_sge_rd, max_cqe */ mutex_unlock(&us_ibdev->usdev_lock); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c4708b3243f9..033e7044f29c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -430,7 +430,6 @@ struct ib_device_attr { int max_mcast_qp_attach; int max_total_mcast_qp_attach; int max_ah; - int max_map_per_fmr; int max_srq; int max_srq_wr; int max_srq_sge; From 24c567ff75d342ed2392cd470c3acd29a09e03ba Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 30 May 2020 17:02:24 +0300 Subject: [PATCH 551/562] IB/hfi1: Fix hfi1_netdev_rx_init() error handling The hfi1_vnic_up() function doesn't check whether hfi1_netdev_rx_init() returns errors. In hfi1_vnic_init() we need to change the code to preserve the error code instead of returning success. Fixes: 2280740f01ae ("IB/hfi1: Virtual Network Interface Controller (VNIC) HW support") Fixes: 4730f4a6c6b2 ("IB/hfi1: Activate the dummy netdev") Link: https://lore.kernel.org/r/20200530140224.GA1330098@mwanda Signed-off-by: Dan Carpenter Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/vnic_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index b183c56b7b6a..a90824de0f57 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -457,13 +457,19 @@ static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo) if (rc < 0) return rc; - hfi1_netdev_rx_init(dd); + rc = hfi1_netdev_rx_init(dd); + if (rc) + goto err_remove; netif_carrier_on(netdev); netif_tx_start_all_queues(netdev); set_bit(HFI1_VNIC_UP, &vinfo->flags); return 0; + +err_remove: + hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id)); + return rc; } static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo) @@ -512,7 +518,8 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) goto txreq_fail; } - if (hfi1_netdev_rx_init(dd)) { + rc = hfi1_netdev_rx_init(dd); + if (rc) { dd_dev_err(dd, "Unable to initialize netdev contexts\n"); goto alloc_fail; } From 278f74b39e641e1315e1b7f11b26aa1f989a40fc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 30 May 2020 13:52:58 -0400 Subject: [PATCH 552/562] RDMA/core: Move and rename trace_cm_id_create() The restrack ID for an rdma_cm_id is not assigned until it is associated with a device. Here's an example I captured while testing NFS/RDMA's support for DEVICE_REMOVAL. The new tracepoint name is "cm_id_attach". <...>-4261 [001] 366.581299: cm_event_handler: cm.id=0 src=0.0.0.0:45919 dst=192.168.2.55:20049 tos=0 ADDR_ERROR (1/-19) <...>-4261 [001] 366.581304: cm_event_done: cm.id=0 src=0.0.0.0:45919 dst=192.168.2.55:20049 tos=0 ADDR_ERROR consumer returns 0 <...>-1950 [000] 366.581309: cm_id_destroy: cm.id=0 src=0.0.0.0:45919 dst=192.168.2.55:20049 tos=0 <...>-7 [001] 369.589400: cm_event_handler: cm.id=0 src=0.0.0.0:49023 dst=192.168.2.55:20049 tos=0 ADDR_ERROR (1/-19) <...>-7 [001] 369.589404: cm_event_done: cm.id=0 src=0.0.0.0:49023 dst=192.168.2.55:20049 tos=0 ADDR_ERROR consumer returns 0 <...>-1950 [000] 369.589407: cm_id_destroy: cm.id=0 src=0.0.0.0:49023 dst=192.168.2.55:20049 tos=0 <...>-4261 [001] 372.597650: cm_id_attach: cm.id=0 src=192.168.2.51:47492 dst=192.168.2.55:20049 device=mlx4_0 <...>-4261 [001] 372.597652: cm_event_handler: cm.id=0 src=192.168.2.51:47492 dst=192.168.2.55:20049 tos=0 ADDR_RESOLVED (0/0) <...>-4261 [001] 372.597654: cm_event_done: cm.id=0 src=192.168.2.51:47492 dst=192.168.2.55:20049 tos=0 ADDR_RESOLVED consumer returns 0 <...>-4261 [001] 372.597738: cm_event_handler: cm.id=0 src=192.168.2.51:47492 dst=192.168.2.55:20049 tos=0 ROUTE_RESOLVED (2/0) <...>-4261 [001] 372.597740: cm_event_done: cm.id=0 src=192.168.2.51:47492 dst=192.168.2.55:20049 tos=0 ROUTE_RESOLVED consumer returns 0 <...>-4691 [007] 372.600101: cm_qp_create: cm.id=0 src=192.168.2.51:47492 dst=192.168.2.55:20049 tos=0 pd.id=2 qp_type=RC send_wr=4091 recv_wr=256 qp_num=530 rc=0 <...>-4691 [007] 372.600207: cm_send_req: cm.id=0 src=192.168.2.51:47492 dst=192.168.2.55:20049 tos=0 qp_num=530 <...>-185 [002] 372.601212: cm_send_mra: cm.id=0 src=192.168.2.51:47492 dst=192.168.2.55:20049 tos=0 <...>-185 [002] 372.601362: cm_send_rtu: cm.id=0 src=192.168.2.51:47492 dst=192.168.2.55:20049 tos=0 <...>-185 [002] 372.601372: cm_event_handler: cm.id=0 src=192.168.2.51:47492 dst=192.168.2.55:20049 tos=0 ESTABLISHED (9/0) <...>-185 [002] 372.601379: cm_event_done: cm.id=0 src=192.168.2.51:47492 dst=192.168.2.55:20049 tos=0 ESTABLISHED consumer returns 0 Fixes: ed999f820a6c ("RDMA/cma: Add trace points in RDMA Connection Manager") Link: https://lore.kernel.org/r/20200530174934.21362.56754.stgit@manet.1015granger.net Signed-off-by: Chuck Lever Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 2 +- drivers/infiniband/core/cma_trace.h | 20 +++++++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 8026ee56546a..3d7cc9f0f3d4 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -484,6 +484,7 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv, rdma_restrack_kadd(&id_priv->res); else rdma_restrack_uadd(&id_priv->res); + trace_cm_id_attach(id_priv, cma_dev->device); } static void cma_attach_to_dev(struct rdma_id_private *id_priv, @@ -888,7 +889,6 @@ struct rdma_cm_id *__rdma_create_id(struct net *net, id_priv->id.route.addr.dev_addr.net = get_net(net); id_priv->seq_num &= 0x00ffffff; - trace_cm_id_create(id_priv); return &id_priv->id; } EXPORT_SYMBOL(__rdma_create_id); diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h index 81e36bf13159..e6e20c36c538 100644 --- a/drivers/infiniband/core/cma_trace.h +++ b/drivers/infiniband/core/cma_trace.h @@ -103,23 +103,33 @@ DEFINE_CMA_FSM_EVENT(sent_drep); DEFINE_CMA_FSM_EVENT(sent_dreq); DEFINE_CMA_FSM_EVENT(id_destroy); -TRACE_EVENT(cm_id_create, +TRACE_EVENT(cm_id_attach, TP_PROTO( - const struct rdma_id_private *id_priv + const struct rdma_id_private *id_priv, + const struct ib_device *device ), - TP_ARGS(id_priv), + TP_ARGS(id_priv, device), TP_STRUCT__entry( __field(u32, cm_id) + __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) + __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) + __string(devname, device->name) ), TP_fast_assign( __entry->cm_id = id_priv->res.id; + memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr, + sizeof(struct sockaddr_in6)); + memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr, + sizeof(struct sockaddr_in6)); + __assign_str(devname, device->name); ), - TP_printk("cm.id=%u", - __entry->cm_id + TP_printk("cm.id=%u src=%pISpc dst=%pISpc device=%s", + __entry->cm_id, __entry->srcaddr, __entry->dstaddr, + __get_str(devname) ) ); From 87d9e568496aeb519f1da61f54ac0dcb8d37561e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 29 May 2020 11:39:18 +0300 Subject: [PATCH 553/562] RDMA/hns: Uninitialized variable in modify_qp_init_to_rtr() The "dmac" variable is used before it is initialized. Fixes: 494c3b312255 ("RDMA/hns: Refactor the QP context filling process related to WQE buffer configure") Link: https://lore.kernel.org/r/20200529083918.GA1298465@mwanda Signed-off-by: Dan Carpenter Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 6454ac4ad06f..c597d7281629 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4030,6 +4030,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : hr_qp->port; smac = (u8 *)hr_dev->dev_addr[port]; + dmac = (u8 *)attr->ah_attr.roce.dmac; /* when dmac equals smac or loop_idc is 1, it should loopback */ if (ether_addr_equal_unaligned(dmac, smac) || hr_dev->loop_idc == 0x1) { @@ -4053,7 +4054,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, 0); - dmac = (u8 *)attr->ah_attr.roce.dmac; memcpy(&(context->dmac), dmac, sizeof(u32)); roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M, V2_QPC_BYTE_52_DMAC_S, *((u16 *)(&dmac[4]))); From 193ba03141bb987c3af985f6479840030fec0534 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 2 Jun 2020 14:16:35 +0800 Subject: [PATCH 554/562] IB/hfi1: Use free_netdev() in hfi1_netdev_free() dummy_netdev shold be freed by free_netdev() instead of kfree(). Also remove unneeded variable 'priv' Fixes: 4730f4a6c6b2 ("IB/hfi1: Activate the dummy netdev") Link: https://lore.kernel.org/r/20200602061635.31224-1-yuehaibing@huawei.com Signed-off-by: YueHaibing Reported-by: kbuild test robot Reported-by: Dan Carpenter Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/netdev_rx.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c index 58af6a454761..63688e85e8da 100644 --- a/drivers/infiniband/hw/hfi1/netdev_rx.c +++ b/drivers/infiniband/hw/hfi1/netdev_rx.c @@ -371,12 +371,9 @@ int hfi1_netdev_alloc(struct hfi1_devdata *dd) void hfi1_netdev_free(struct hfi1_devdata *dd) { - struct hfi1_netdev_priv *priv; - if (dd->dummy_netdev) { - priv = hfi1_netdev_priv(dd->dummy_netdev); dd_dev_info(dd, "hfi1 netdev freed\n"); - kfree(dd->dummy_netdev); + free_netdev(dd->dummy_netdev); dd->dummy_netdev = NULL; } } From 6a45b0e2589fafd7fc501a5fbd0ab56689a08124 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Thu, 28 May 2020 16:58:43 +0200 Subject: [PATCH 555/562] gpiolib: Introduce gpiochip_irqchip_add_domain() The function connects an IRQ domain to a gpiochip and reuses gpiochip_to_irq() which is provided by gpiolib. gpiochip_irqchip_* and regmap_irq partially provide the same functionality. This function will help to connect just the minimal functionality of the gpiochip_irqchip which is needed to work together with regmap-irq. Signed-off-by: Michael Walle Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200528145845.31436-2-michael@walle.cc Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 20 ++++++++++++++++++++ include/linux/gpio/driver.h | 3 +++ 2 files changed, 23 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 167860684be6..0a54a759dd40 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2779,6 +2779,26 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gc, } EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_key); +/** + * gpiochip_irqchip_add_domain() - adds an irqdomain to a gpiochip + * @gc: the gpiochip to add the irqchip to + * @domain: the irqdomain to add to the gpiochip + * + * This function adds an IRQ domain to the gpiochip. + */ +int gpiochip_irqchip_add_domain(struct gpio_chip *gc, + struct irq_domain *domain) +{ + if (!domain) + return -EINVAL; + + gc->to_irq = gpiochip_to_irq; + gc->irq.domain = domain; + + return 0; +} +EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_domain); + #else /* CONFIG_GPIOLIB_IRQCHIP */ static inline int gpiochip_add_irqchip(struct gpio_chip *gc, diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index c8bcaf315d03..c4f272af7af5 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -612,6 +612,9 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gc, bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gc, unsigned int offset); +int gpiochip_irqchip_add_domain(struct gpio_chip *gc, + struct irq_domain *domain); + #ifdef CONFIG_LOCKDEP /* From ebe363197e525ffbd279c729421f6f6c24d8d681 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Thu, 28 May 2020 16:58:44 +0200 Subject: [PATCH 556/562] gpio: add a reusable generic gpio_chip using regmap There are quite a lot simple GPIO controller which are using regmap to access the hardware. This driver tries to be a base to unify existing code into one place. This won't cover everything but it should be a good starting point. It does not implement its own irq_chip because there is already a generic one for regmap based devices. Instead, the irq_chip will be instantiated in the parent driver and its irq domain will be associate to this driver. For now it consists of the usual registers, like set (and an optional clear) data register, an input register and direction registers. Out-of-the-box, it supports consecutive register mappings and mappings where the registers have gaps between them with a linear mapping between GPIO offset and bit position. For weirder mappings the user can register its own .xlate(). Signed-off-by: Michael Walle Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200528145845.31436-3-michael@walle.cc Signed-off-by: Linus Walleij --- drivers/gpio/Kconfig | 4 + drivers/gpio/Makefile | 1 + drivers/gpio/gpio-regmap.c | 349 ++++++++++++++++++++++++++++++++++++ include/linux/gpio/regmap.h | 86 +++++++++ 4 files changed, 440 insertions(+) create mode 100644 drivers/gpio/gpio-regmap.c create mode 100644 include/linux/gpio/regmap.h diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 7d077be10a0f..bcacd9c74aa8 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -73,6 +73,10 @@ config GPIO_GENERIC depends on HAS_IOMEM # Only for IOMEM drivers tristate +config GPIO_REGMAP + depends on REGMAP + tristate + # put drivers in the right section, in alphabetical order # This symbol is selected by both I2C and SPI expanders diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 65bf3940e33c..1e4894e0bf0f 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_GPIO_SYSFS) += gpiolib-sysfs.o obj-$(CONFIG_GPIO_ACPI) += gpiolib-acpi.o # Device drivers. Generally keep list sorted alphabetically +obj-$(CONFIG_GPIO_REGMAP) += gpio-regmap.o obj-$(CONFIG_GPIO_GENERIC) += gpio-generic.o # directly supported by gpio-generic diff --git a/drivers/gpio/gpio-regmap.c b/drivers/gpio/gpio-regmap.c new file mode 100644 index 000000000000..5412cb3b0b2a --- /dev/null +++ b/drivers/gpio/gpio-regmap.c @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * regmap based generic GPIO driver + * + * Copyright 2020 Michael Walle + */ + +#include +#include +#include +#include +#include + +struct gpio_regmap { + struct device *parent; + struct regmap *regmap; + struct gpio_chip gpio_chip; + + int reg_stride; + int ngpio_per_reg; + unsigned int reg_dat_base; + unsigned int reg_set_base; + unsigned int reg_clr_base; + unsigned int reg_dir_in_base; + unsigned int reg_dir_out_base; + + int (*reg_mask_xlate)(struct gpio_regmap *gpio, unsigned int base, + unsigned int offset, unsigned int *reg, + unsigned int *mask); + + void *driver_data; +}; + +static unsigned int gpio_regmap_addr(unsigned int addr) +{ + if (addr == GPIO_REGMAP_ADDR_ZERO) + return 0; + + return addr; +} + +static int gpio_regmap_simple_xlate(struct gpio_regmap *gpio, + unsigned int base, unsigned int offset, + unsigned int *reg, unsigned int *mask) +{ + unsigned int line = offset % gpio->ngpio_per_reg; + unsigned int stride = offset / gpio->ngpio_per_reg; + + *reg = base + stride * gpio->reg_stride; + *mask = BIT(line); + + return 0; +} + +static int gpio_regmap_get(struct gpio_chip *chip, unsigned int offset) +{ + struct gpio_regmap *gpio = gpiochip_get_data(chip); + unsigned int base, val, reg, mask; + int ret; + + /* we might not have an output register if we are input only */ + if (gpio->reg_dat_base) + base = gpio_regmap_addr(gpio->reg_dat_base); + else + base = gpio_regmap_addr(gpio->reg_set_base); + + ret = gpio->reg_mask_xlate(gpio, base, offset, ®, &mask); + if (ret) + return ret; + + ret = regmap_read(gpio->regmap, reg, &val); + if (ret) + return ret; + + return !!(val & mask); +} + +static void gpio_regmap_set(struct gpio_chip *chip, unsigned int offset, + int val) +{ + struct gpio_regmap *gpio = gpiochip_get_data(chip); + unsigned int base = gpio_regmap_addr(gpio->reg_set_base); + unsigned int reg, mask; + + gpio->reg_mask_xlate(gpio, base, offset, ®, &mask); + if (val) + regmap_update_bits(gpio->regmap, reg, mask, mask); + else + regmap_update_bits(gpio->regmap, reg, mask, 0); +} + +static void gpio_regmap_set_with_clear(struct gpio_chip *chip, + unsigned int offset, int val) +{ + struct gpio_regmap *gpio = gpiochip_get_data(chip); + unsigned int base, reg, mask; + + if (val) + base = gpio_regmap_addr(gpio->reg_set_base); + else + base = gpio_regmap_addr(gpio->reg_clr_base); + + gpio->reg_mask_xlate(gpio, base, offset, ®, &mask); + regmap_write(gpio->regmap, reg, mask); +} + +static int gpio_regmap_get_direction(struct gpio_chip *chip, + unsigned int offset) +{ + struct gpio_regmap *gpio = gpiochip_get_data(chip); + unsigned int base, val, reg, mask; + int invert, ret; + + if (gpio->reg_dir_out_base) { + base = gpio_regmap_addr(gpio->reg_dir_out_base); + invert = 0; + } else if (gpio->reg_dir_in_base) { + base = gpio_regmap_addr(gpio->reg_dir_in_base); + invert = 1; + } else { + return -EOPNOTSUPP; + } + + ret = gpio->reg_mask_xlate(gpio, base, offset, ®, &mask); + if (ret) + return ret; + + ret = regmap_read(gpio->regmap, reg, &val); + if (ret) + return ret; + + if (!!(val & mask) ^ invert) + return GPIO_LINE_DIRECTION_OUT; + else + return GPIO_LINE_DIRECTION_IN; +} + +static int gpio_regmap_set_direction(struct gpio_chip *chip, + unsigned int offset, bool output) +{ + struct gpio_regmap *gpio = gpiochip_get_data(chip); + unsigned int base, val, reg, mask; + int invert, ret; + + if (gpio->reg_dir_out_base) { + base = gpio_regmap_addr(gpio->reg_dir_out_base); + invert = 0; + } else if (gpio->reg_dir_in_base) { + base = gpio_regmap_addr(gpio->reg_dir_in_base); + invert = 1; + } else { + return -EOPNOTSUPP; + } + + ret = gpio->reg_mask_xlate(gpio, base, offset, ®, &mask); + if (ret) + return ret; + + if (invert) + val = output ? 0 : mask; + else + val = output ? mask : 0; + + return regmap_update_bits(gpio->regmap, reg, mask, val); +} + +static int gpio_regmap_direction_input(struct gpio_chip *chip, + unsigned int offset) +{ + return gpio_regmap_set_direction(chip, offset, false); +} + +static int gpio_regmap_direction_output(struct gpio_chip *chip, + unsigned int offset, int value) +{ + gpio_regmap_set(chip, offset, value); + + return gpio_regmap_set_direction(chip, offset, true); +} + +void gpio_regmap_set_drvdata(struct gpio_regmap *gpio, void *data) +{ + gpio->driver_data = data; +} +EXPORT_SYMBOL_GPL(gpio_regmap_set_drvdata); + +void *gpio_regmap_get_drvdata(struct gpio_regmap *gpio) +{ + return gpio->driver_data; +} +EXPORT_SYMBOL_GPL(gpio_regmap_get_drvdata); + +/** + * gpio_regmap_register() - Register a generic regmap GPIO controller + * @config: configuration for gpio_regmap + * + * Return: A pointer to the registered gpio_regmap or ERR_PTR error value. + */ +struct gpio_regmap *gpio_regmap_register(const struct gpio_regmap_config *config) +{ + struct gpio_regmap *gpio; + struct gpio_chip *chip; + int ret; + + if (!config->parent) + return ERR_PTR(-EINVAL); + + if (!config->ngpio) + return ERR_PTR(-EINVAL); + + /* we need at least one */ + if (!config->reg_dat_base && !config->reg_set_base) + return ERR_PTR(-EINVAL); + + /* if we have a direction register we need both input and output */ + if ((config->reg_dir_out_base || config->reg_dir_in_base) && + (!config->reg_dat_base || !config->reg_set_base)) + return ERR_PTR(-EINVAL); + + /* we don't support having both registers simultaneously for now */ + if (config->reg_dir_out_base && config->reg_dir_in_base) + return ERR_PTR(-EINVAL); + + gpio = kzalloc(sizeof(*gpio), GFP_KERNEL); + if (!gpio) + return ERR_PTR(-ENOMEM); + + gpio->parent = config->parent; + gpio->regmap = config->regmap; + gpio->ngpio_per_reg = config->ngpio_per_reg; + gpio->reg_stride = config->reg_stride; + gpio->reg_mask_xlate = config->reg_mask_xlate; + gpio->reg_dat_base = config->reg_dat_base; + gpio->reg_set_base = config->reg_set_base; + gpio->reg_clr_base = config->reg_clr_base; + gpio->reg_dir_in_base = config->reg_dir_in_base; + gpio->reg_dir_out_base = config->reg_dir_out_base; + + /* if not set, assume there is only one register */ + if (!gpio->ngpio_per_reg) + gpio->ngpio_per_reg = config->ngpio; + + /* if not set, assume they are consecutive */ + if (!gpio->reg_stride) + gpio->reg_stride = 1; + + if (!gpio->reg_mask_xlate) + gpio->reg_mask_xlate = gpio_regmap_simple_xlate; + + chip = &gpio->gpio_chip; + chip->parent = config->parent; + chip->base = -1; + chip->ngpio = config->ngpio; + chip->names = config->names; + chip->label = config->label ?: dev_name(config->parent); + + /* + * If our regmap is fast_io we should probably set can_sleep to false. + * Right now, the regmap doesn't save this property, nor is there any + * access function for it. + * The only regmap type which uses fast_io is regmap-mmio. For now, + * assume a safe default of true here. + */ + chip->can_sleep = true; + + chip->get = gpio_regmap_get; + if (gpio->reg_set_base && gpio->reg_clr_base) + chip->set = gpio_regmap_set_with_clear; + else if (gpio->reg_set_base) + chip->set = gpio_regmap_set; + + if (gpio->reg_dir_in_base || gpio->reg_dir_out_base) { + chip->get_direction = gpio_regmap_get_direction; + chip->direction_input = gpio_regmap_direction_input; + chip->direction_output = gpio_regmap_direction_output; + } + + ret = gpiochip_add_data(chip, gpio); + if (ret < 0) + goto err_free_gpio; + + if (config->irq_domain) { + ret = gpiochip_irqchip_add_domain(chip, config->irq_domain); + if (ret) + goto err_remove_gpiochip; + } + + return gpio; + +err_remove_gpiochip: + gpiochip_remove(chip); +err_free_gpio: + kfree(gpio); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(gpio_regmap_register); + +/** + * gpio_regmap_unregister() - Unregister a generic regmap GPIO controller + * @gpio: gpio_regmap device to unregister + */ +void gpio_regmap_unregister(struct gpio_regmap *gpio) +{ + gpiochip_remove(&gpio->gpio_chip); + kfree(gpio); +} +EXPORT_SYMBOL_GPL(gpio_regmap_unregister); + +static void devm_gpio_regmap_unregister(struct device *dev, void *res) +{ + gpio_regmap_unregister(*(struct gpio_regmap **)res); +} + +/** + * devm_gpio_regmap_register() - resource managed gpio_regmap_register() + * @dev: device that is registering this GPIO device + * @config: configuration for gpio_regmap + * + * Managed gpio_regmap_register(). For generic regmap GPIO device registered by + * this function, gpio_regmap_unregister() is automatically called on driver + * detach. See gpio_regmap_register() for more information. + * + * Return: A pointer to the registered gpio_regmap or ERR_PTR error value. + */ +struct gpio_regmap *devm_gpio_regmap_register(struct device *dev, + const struct gpio_regmap_config *config) +{ + struct gpio_regmap **ptr, *gpio; + + ptr = devres_alloc(devm_gpio_regmap_unregister, sizeof(*ptr), + GFP_KERNEL); + if (!ptr) + return ERR_PTR(-ENOMEM); + + gpio = gpio_regmap_register(config); + if (!IS_ERR(gpio)) { + *ptr = gpio; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return gpio; +} +EXPORT_SYMBOL_GPL(devm_gpio_regmap_register); + +MODULE_AUTHOR("Michael Walle "); +MODULE_DESCRIPTION("GPIO generic regmap driver core"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/gpio/regmap.h b/include/linux/gpio/regmap.h new file mode 100644 index 000000000000..4c1e6b34e824 --- /dev/null +++ b/include/linux/gpio/regmap.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _LINUX_GPIO_REGMAP_H +#define _LINUX_GPIO_REGMAP_H + +struct device; +struct gpio_regmap; +struct irq_domain; +struct regmap; + +#define GPIO_REGMAP_ADDR_ZERO ((unsigned long)(-1)) +#define GPIO_REGMAP_ADDR(addr) ((addr) ? : GPIO_REGMAP_ADDR_ZERO) + +/** + * struct gpio_regmap_config - Description of a generic regmap gpio_chip. + * @parent: The parent device + * @regmap: The regmap used to access the registers + * given, the name of the device is used + * @label: (Optional) Descriptive name for GPIO controller. + * If not given, the name of the device is used. + * @ngpio: Number of GPIOs + * @names: (Optional) Array of names for gpios + * @reg_dat_base: (Optional) (in) register base address + * @reg_set_base: (Optional) set register base address + * @reg_clr_base: (Optional) clear register base address + * @reg_dir_in_base: (Optional) in setting register base address + * @reg_dir_out_base: (Optional) out setting register base address + * @reg_stride: (Optional) May be set if the registers (of the + * same type, dat, set, etc) are not consecutive. + * @ngpio_per_reg: Number of GPIOs per register + * @irq_domain: (Optional) IRQ domain if the controller is + * interrupt-capable + * @reg_mask_xlate: (Optional) Translates base address and GPIO + * offset to a register/bitmask pair. If not + * given the default gpio_regmap_simple_xlate() + * is used. + * + * The ->reg_mask_xlate translates a given base address and GPIO offset to + * register and mask pair. The base address is one of the given register + * base addresses in this structure. + * + * Although all register base addresses are marked as optional, there are + * several rules: + * 1. if you only have @reg_dat_base set, then it is input-only + * 2. if you only have @reg_set_base set, then it is output-only + * 3. if you have either @reg_dir_in_base or @reg_dir_out_base set, then + * you have to set both @reg_dat_base and @reg_set_base + * 4. if you have @reg_set_base set, you may also set @reg_clr_base to have + * two different registers for setting and clearing the output. This is + * also valid for the output-only case. + * 5. @reg_dir_in_base and @reg_dir_out_base are exclusive; is there really + * hardware which has redundant registers? + * + * Note: All base addresses may have the special value %GPIO_REGMAP_ADDR_ZERO + * which forces the address to the value 0. + */ +struct gpio_regmap_config { + struct device *parent; + struct regmap *regmap; + + const char *label; + int ngpio; + const char *const *names; + + unsigned int reg_dat_base; + unsigned int reg_set_base; + unsigned int reg_clr_base; + unsigned int reg_dir_in_base; + unsigned int reg_dir_out_base; + int reg_stride; + int ngpio_per_reg; + struct irq_domain *irq_domain; + + int (*reg_mask_xlate)(struct gpio_regmap *gpio, unsigned int base, + unsigned int offset, unsigned int *reg, + unsigned int *mask); +}; + +struct gpio_regmap *gpio_regmap_register(const struct gpio_regmap_config *config); +void gpio_regmap_unregister(struct gpio_regmap *gpio); +struct gpio_regmap *devm_gpio_regmap_register(struct device *dev, + const struct gpio_regmap_config *config); +void gpio_regmap_set_drvdata(struct gpio_regmap *gpio, void *data); +void *gpio_regmap_get_drvdata(struct gpio_regmap *gpio); + +#endif /* _LINUX_GPIO_REGMAP_H */ From 76bbea9a24df1feaed1f731ea4481dec2e39c239 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Thu, 28 May 2020 16:58:45 +0200 Subject: [PATCH 557/562] MAINTAINERS: Add gpio regmap section Add myself as a reviewer for the gpio regmap. Signed-off-by: Michael Walle Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200528145845.31436-4-michael@walle.cc Signed-off-by: Linus Walleij --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index abe8b329f9b9..9d4571cb8262 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7249,6 +7249,12 @@ S: Maintained F: drivers/gpio/gpio-mockup.c F: tools/testing/selftests/gpio/ +GPIO REGMAP +R: Michael Walle +S: Maintained +F: drivers/gpio/gpio-regmap.c +F: include/linux/gpio/regmap.h + GPIO SUBSYSTEM M: Linus Walleij M: Bartosz Golaszewski From 74910e15ab25f95f162bc4d4a634d029186543ce Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 21 May 2020 00:19:14 +0300 Subject: [PATCH 558/562] gpio: pca953x: Drop unneeded ACPI_PTR() ACPI_PTR() becomes a no-op when !CONFIG_ACPI. This is not needed since we always have ID table enabled. Moreover, in the mentioned case compiler will complain about defined but not used variable. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200520211916.25727-3-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-pca953x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 4bb3d3524bc7..1fca8dd7824f 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -1176,7 +1176,7 @@ static struct i2c_driver pca953x_driver = { .name = "pca953x", .pm = &pca953x_pm_ops, .of_match_table = pca953x_dt_ids, - .acpi_match_table = ACPI_PTR(pca953x_acpi_ids), + .acpi_match_table = pca953x_acpi_ids, }, .probe = pca953x_probe, .remove = pca953x_remove, From 6512f11d386c7cf83a48e71cfd7c7c1b0003c151 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 2 Jun 2020 15:55:46 +0300 Subject: [PATCH 559/562] RDMA/mlx5: Return an error if copy_to_user fails In theoretical event, the ib_copy_to_udata() can fail, so return -EFAULT error to the user, so he will destroy the QP. Fixes: 50aec2c3135e ("RDMA/mlx5: Return ECE data after modify QP") Link: https://lore.kernel.org/r/20200602125548.172654-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 9364a7a76ac2..9f0b7f1908da 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4305,12 +4305,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, /* resp.response_length is set in ECE supported flows only */ if (!err && resp.response_length && udata->outlen >= resp.response_length) - /* - * We don't check return value of the function below - * on purpose, because it is unclear how to unwind the - * error flow after QP was modified to the new state. - */ - ib_copy_to_udata(udata, &resp, resp.response_length); + /* Return -EFAULT to the user and expect him to destroy QP. */ + err = ib_copy_to_udata(udata, &resp, resp.response_length); out: mutex_unlock(&qp->mutex); From 92cd667c0e8a67de024134be0a6f0bdb320606a8 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 2 Jun 2020 15:55:47 +0300 Subject: [PATCH 560/562] RDMA/mlx5: Don't rely on FW to set zeros in ECE response The FW returns zeros in case feature is not enabled, but it is better to have the capability check and ensure that returned result is cleared. Fixes: 3e09a427ae7a ("RDMA/mlx5: Get ECE options from FW during create QP") Link: https://lore.kernel.org/r/20200602125548.172654-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 9f0b7f1908da..18135f908971 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1906,7 +1906,8 @@ static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, base->container_mibqp = qp; base->mqp.event = mlx5_ib_qp_event; - params->resp.ece_options = MLX5_GET(create_qp_out, out, ece); + if (MLX5_CAP_GEN(mdev, ece_support)) + params->resp.ece_options = MLX5_GET(create_qp_out, out, ece); spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); list_add_tail(&qp->qps_list, &dev->qp_list); @@ -2082,7 +2083,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, base->container_mibqp = qp; base->mqp.event = mlx5_ib_qp_event; - params->resp.ece_options = MLX5_GET(create_qp_out, out, ece); + if (MLX5_CAP_GEN(mdev, ece_support)) + params->resp.ece_options = MLX5_GET(create_qp_out, out, ece); get_cqs(qp->type, init_attr->send_cq, init_attr->recv_cq, &send_cq, &recv_cq); From a645a89d9a780a8fbb6e283f84fc91ad538c2edc Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 2 Jun 2020 15:55:48 +0300 Subject: [PATCH 561/562] RDMA/mlx5: Return ECE DC support The DC QPs are many-to-one QP types that means that first connection will establish ECE options that coming connections should follow. Due to this property, the ECE code was removed between first [1] and second [2] ECE submissions. This patch returns the dropped code, because ECE is a property of a connection and like any other connection users are needed to manage this data. Allow them to set ECE parameter for DC too and avoid need of having compatibility flag for the DC ECE. [1] https://lore.kernel.org/linux-rdma/20200523132243.817936-1-leon@kernel.org/ [2] https://lore.kernel.org/linux-rdma/20200525174401.71152-1-leon@kernel.org/ Link: https://lore.kernel.org/r/20200602125548.172654-4-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 45 +++++++++++++++++++++++---------- include/linux/mlx5/mlx5_ifc.h | 5 ++-- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 18135f908971..81bf6b975e0e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2404,7 +2404,8 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, destroy_qp(dev, qp, base, udata); } -static int create_dct(struct ib_pd *pd, struct mlx5_ib_qp *qp, +static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct mlx5_ib_qp *qp, struct mlx5_create_qp_params *params) { struct ib_qp_init_attr *attr = params->attr; @@ -2423,6 +2424,8 @@ static int create_dct(struct ib_pd *pd, struct mlx5_ib_qp *qp, MLX5_SET(dctc, dctc, cqn, to_mcq(attr->recv_cq)->mcq.cqn); MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key); MLX5_SET(dctc, dctc, user_index, uidx); + if (MLX5_CAP_GEN(dev->mdev, ece_support)) + MLX5_SET(dctc, dctc, ece, ucmd->ece_options); if (qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) { int rcqe_sz = mlx5_ib_get_cqe_size(attr->recv_cq); @@ -2768,7 +2771,7 @@ static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, } if (qp->type == MLX5_IB_QPT_DCT) { - err = create_dct(pd, qp, params); + err = create_dct(dev, pd, qp, params); goto out; } @@ -2882,9 +2885,8 @@ static int check_ucmd_data(struct mlx5_ib_dev *dev, */ last = sizeof(struct mlx5_ib_create_qp_rss); else - /* IB_QPT_RAW_PACKET and IB_QPT_DRIVER don't have ECE data */ + /* IB_QPT_RAW_PACKET doesn't have ECE data */ switch (attr->qp_type) { - case IB_QPT_DRIVER: case IB_QPT_RAW_PACKET: last = offsetof(struct mlx5_ib_create_qp, ece_options); break; @@ -4095,7 +4097,8 @@ static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new * Other transitions and attributes are illegal */ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata) + int attr_mask, struct mlx5_ib_modify_qp *ucmd, + struct ib_udata *udata) { struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_ib_dev *dev = to_mdev(ibqp->device); @@ -4111,6 +4114,15 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, new_state = attr->qp_state; dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry); + if (MLX5_CAP_GEN(dev->mdev, ece_support) && ucmd->ece_options) + /* + * DCT doesn't initialize QP till modify command is executed, + * so we need to overwrite previously set ECE field if user + * provided any value except zero, which means not set/not + * valid. + */ + MLX5_SET(dctc, dctc, ece, ucmd->ece_options); + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { u16 set_id; @@ -4145,14 +4157,21 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, MLX5_SET(dctc, dctc, counter_set_id, set_id); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { struct mlx5_ib_modify_qp_resp resp = {}; - u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {0}; - u32 min_resp_len = offsetof(typeof(resp), dctn) + - sizeof(resp.dctn); + u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {}; + u32 min_resp_len = offsetofend(typeof(resp), dctn); if (udata->outlen < min_resp_len) return -EINVAL; resp.response_length = min_resp_len; + /* + * If we don't have enough space for the ECE options, + * simply indicate it with resp.response_length. + */ + resp.response_length = (udata->outlen < sizeof(resp)) ? + min_resp_len : + sizeof(resp); + required |= IB_QP_MIN_RNR_TIMER | IB_QP_AV | IB_QP_PATH_MTU; if (!is_valid_mask(attr_mask, required, 0)) return -EINVAL; @@ -4169,6 +4188,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (err) return err; resp.dctn = qp->dct.mdct.mqp.qpn; + if (MLX5_CAP_GEN(dev->mdev, ece_support)) + resp.ece_options = MLX5_GET(create_dct_out, out, ece); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) { mlx5_core_destroy_dct(dev, &qp->dct.mdct); @@ -4226,12 +4247,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ? IB_QPT_GSI : qp->type; - if (qp_type == MLX5_IB_QPT_DCT) { - if (memchr_inv(&ucmd.ece_options, 0, sizeof(ucmd.ece_options))) - return -EOPNOTSUPP; - - return mlx5_ib_modify_dct(ibqp, attr, attr_mask, udata); - } + if (qp_type == MLX5_IB_QPT_DCT) + return mlx5_ib_modify_dct(ibqp, attr, attr_mask, &ucmd, udata); mutex_lock(&qp->mutex); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4d2e36dd2c6b..116bd9bb347f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -3690,7 +3690,8 @@ struct mlx5_ifc_dctc_bits { u8 ecn[0x2]; u8 dscp[0x6]; - u8 reserved_at_1c0[0x40]; + u8 reserved_at_1c0[0x20]; + u8 ece[0x20]; }; enum { @@ -7940,7 +7941,7 @@ struct mlx5_ifc_create_dct_out_bits { u8 reserved_at_40[0x8]; u8 dctn[0x18]; - u8 reserved_at_60[0x20]; + u8 ece[0x20]; }; struct mlx5_ifc_create_dct_in_bits { From fba97dc7fc76b2c9a909fa0b3786d30a9899f5cf Mon Sep 17 00:00:00 2001 From: Ka-Cheong Poon Date: Wed, 3 Jun 2020 06:33:38 -0700 Subject: [PATCH 562/562] RDMA/cm: Spurious WARNING triggered in cm_destroy_id() If the cm_id state is IB_CM_REP_SENT when cm_destroy_id() is called, it calls cm_send_rej_locked(). In cm_send_rej_locked(), it calls cm_enter_timewait() and the state is changed to IB_CM_TIMEWAIT. Now back to cm_destroy_id(), it breaks from the switch statement, and the next call is WARN_ON(cm_id->state != IB_CM_IDLE). This triggers a spurious warning. Instead, the code should goto retest after returning from cm_send_rej_locked() to move the state to IDLE. Fixes: 67b3c8dceac6 ("RDMA/cm: Make sure the cm_id is in the IB_CM_IDLE state in destroy") Link: https://lore.kernel.org/r/1591191218-9446-1-git-send-email-ka-cheong.poon@oracle.com Signed-off-by: Ka-Cheong Poon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 085c146fe400..9ce787e37e22 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1111,7 +1111,9 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err) case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - /* Fall through */ + cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL, + 0, NULL, 0); + goto retest; case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: