diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index a201c5871a77..19b51205dc8c 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -471,7 +471,6 @@ static void free_transport(struct smb_direct_transport *t) if (sc->ib.qp) { ib_drain_qp(sc->ib.qp); - ib_mr_pool_destroy(sc->ib.qp, &sc->ib.qp->rdma_mrs); sc->ib.qp = NULL; rdma_destroy_qp(sc->rdma.cm_id); } @@ -1871,20 +1870,11 @@ static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) return ret; } -static unsigned int smb_direct_get_max_fr_pages(struct smbdirect_socket *sc) -{ - return min_t(unsigned int, - sc->ib.dev->attrs.max_fast_reg_page_list_len, - 256); -} - -static int smb_direct_init_params(struct smbdirect_socket *sc, - struct ib_qp_cap *cap) +static int smb_direct_init_params(struct smbdirect_socket *sc) { struct smbdirect_socket_parameters *sp = &sc->parameters; - struct ib_device *device = sc->ib.dev; - int max_send_sges, max_rw_wrs, max_send_wrs; - unsigned int max_sge_per_wr, wrs_per_credit; + int max_send_sges; + unsigned int maxpages; /* need 3 more sge. because a SMB_DIRECT header, SMB2 header, * SMB2 response could be mapped. @@ -1895,67 +1885,18 @@ static int smb_direct_init_params(struct smbdirect_socket *sc, return -EINVAL; } - /* Calculate the number of work requests for RDMA R/W. - * The maximum number of pages which can be registered - * with one Memory region can be transferred with one - * R/W credit. And at least 4 work requests for each credit - * are needed for MR registration, RDMA R/W, local & remote - * MR invalidation. - */ - sc->rw_io.credits.num_pages = smb_direct_get_max_fr_pages(sc); - sc->rw_io.credits.max = DIV_ROUND_UP(sp->max_read_write_size, - (sc->rw_io.credits.num_pages - 1) * - PAGE_SIZE); - - max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge, - device->attrs.max_sge_rd); - max_sge_per_wr = max_t(unsigned int, max_sge_per_wr, - max_send_sges); - wrs_per_credit = max_t(unsigned int, 4, - DIV_ROUND_UP(sc->rw_io.credits.num_pages, - max_sge_per_wr) + 1); - max_rw_wrs = sc->rw_io.credits.max * wrs_per_credit; - - max_send_wrs = sp->send_credit_target + max_rw_wrs; - if (max_send_wrs > device->attrs.max_cqe || - max_send_wrs > device->attrs.max_qp_wr) { - pr_err("consider lowering send_credit_target = %d\n", - sp->send_credit_target); - pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", - device->attrs.max_cqe, device->attrs.max_qp_wr); - return -EINVAL; - } - - if (sp->recv_credit_max > device->attrs.max_cqe || - sp->recv_credit_max > device->attrs.max_qp_wr) { - pr_err("consider lowering receive_credit_max = %d\n", - sp->recv_credit_max); - pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n", - device->attrs.max_cqe, device->attrs.max_qp_wr); - return -EINVAL; - } - - if (device->attrs.max_send_sge < SMBDIRECT_SEND_IO_MAX_SGE) { - pr_err("warning: device max_send_sge = %d too small\n", - device->attrs.max_send_sge); - return -EINVAL; - } - if (device->attrs.max_recv_sge < SMBDIRECT_RECV_IO_MAX_SGE) { - pr_err("warning: device max_recv_sge = %d too small\n", - device->attrs.max_recv_sge); - return -EINVAL; - } + maxpages = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE); + sc->rw_io.credits.max = rdma_rw_mr_factor(sc->ib.dev, + sc->rdma.cm_id->port_num, + maxpages); + sc->rw_io.credits.num_pages = DIV_ROUND_UP(maxpages, sc->rw_io.credits.max); + /* add one extra in order to handle unaligned pages */ + sc->rw_io.credits.max += 1; sc->recv_io.credits.target = 1; atomic_set(&sc->rw_io.credits.count, sc->rw_io.credits.max); - cap->max_send_wr = max_send_wrs; - cap->max_recv_wr = sp->recv_credit_max; - cap->max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; - cap->max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; - cap->max_inline_data = 0; - cap->max_rdma_ctxs = sc->rw_io.credits.max; return 0; } @@ -2029,13 +1970,129 @@ static int smb_direct_create_pools(struct smbdirect_socket *sc) return -ENOMEM; } -static int smb_direct_create_qpair(struct smbdirect_socket *sc, - struct ib_qp_cap *cap) +static u32 smb_direct_rdma_rw_send_wrs(struct ib_device *dev, const struct ib_qp_init_attr *attr) +{ + /* + * This could be split out of rdma_rw_init_qp() + * and be a helper function next to rdma_rw_mr_factor() + * + * We can't check unlikely(rdma_rw_force_mr) here, + * but that is most likely 0 anyway. + */ + u32 factor; + + WARN_ON_ONCE(attr->port_num == 0); + + /* + * Each context needs at least one RDMA READ or WRITE WR. + * + * For some hardware we might need more, eventually we should ask the + * HCA driver for a multiplier here. + */ + factor = 1; + + /* + * If the device needs MRs to perform RDMA READ or WRITE operations, + * we'll need two additional MRs for the registrations and the + * invalidation. + */ + if (rdma_protocol_iwarp(dev, attr->port_num) || dev->attrs.max_sgl_rd) + factor += 2; /* inv + reg */ + + return factor * attr->cap.max_rdma_ctxs; +} + +static int smb_direct_create_qpair(struct smbdirect_socket *sc) { struct smbdirect_socket_parameters *sp = &sc->parameters; int ret; + struct ib_qp_cap qp_cap; struct ib_qp_init_attr qp_attr; - int pages_per_rw; + u32 max_send_wr; + u32 rdma_send_wr; + + /* + * Note that {rdma,ib}_create_qp() will call + * rdma_rw_init_qp() if cap->max_rdma_ctxs is not 0. + * It will adjust cap->max_send_wr to the required + * number of additional WRs for the RDMA RW operations. + * It will cap cap->max_send_wr to the device limit. + * + * +1 for ib_drain_qp + */ + qp_cap.max_send_wr = sp->send_credit_target + 1; + qp_cap.max_recv_wr = sp->recv_credit_max + 1; + qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; + qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; + qp_cap.max_inline_data = 0; + qp_cap.max_rdma_ctxs = sc->rw_io.credits.max; + + /* + * Find out the number of max_send_wr + * after rdma_rw_init_qp() adjusted it. + * + * We only do it on a temporary variable, + * as rdma_create_qp() will trigger + * rdma_rw_init_qp() again. + */ + memset(&qp_attr, 0, sizeof(qp_attr)); + qp_attr.cap = qp_cap; + qp_attr.port_num = sc->rdma.cm_id->port_num; + rdma_send_wr = smb_direct_rdma_rw_send_wrs(sc->ib.dev, &qp_attr); + max_send_wr = qp_cap.max_send_wr + rdma_send_wr; + + if (qp_cap.max_send_wr > sc->ib.dev->attrs.max_cqe || + qp_cap.max_send_wr > sc->ib.dev->attrs.max_qp_wr) { + pr_err("Possible CQE overrun: max_send_wr %d\n", + qp_cap.max_send_wr); + pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); + pr_err("consider lowering send_credit_target = %d\n", + sp->send_credit_target); + return -EINVAL; + } + + if (qp_cap.max_rdma_ctxs && + (max_send_wr >= sc->ib.dev->attrs.max_cqe || + max_send_wr >= sc->ib.dev->attrs.max_qp_wr)) { + pr_err("Possible CQE overrun: rdma_send_wr %d + max_send_wr %d = %d\n", + rdma_send_wr, qp_cap.max_send_wr, max_send_wr); + pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); + pr_err("consider lowering send_credit_target = %d, max_rdma_ctxs = %d\n", + sp->send_credit_target, qp_cap.max_rdma_ctxs); + return -EINVAL; + } + + if (qp_cap.max_recv_wr > sc->ib.dev->attrs.max_cqe || + qp_cap.max_recv_wr > sc->ib.dev->attrs.max_qp_wr) { + pr_err("Possible CQE overrun: max_recv_wr %d\n", + qp_cap.max_recv_wr); + pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); + pr_err("consider lowering receive_credit_max = %d\n", + sp->recv_credit_max); + return -EINVAL; + } + + if (qp_cap.max_send_sge > sc->ib.dev->attrs.max_send_sge || + qp_cap.max_recv_sge > sc->ib.dev->attrs.max_recv_sge) { + pr_err("device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_send_sge, + sc->ib.dev->attrs.max_recv_sge); + return -EINVAL; + } sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); if (IS_ERR(sc->ib.pd)) { @@ -2046,8 +2103,7 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc, } sc->ib.send_cq = ib_alloc_cq_any(sc->ib.dev, sc, - sp->send_credit_target + - cap->max_rdma_ctxs, + max_send_wr, IB_POLL_WORKQUEUE); if (IS_ERR(sc->ib.send_cq)) { pr_err("Can't create RDMA send CQ\n"); @@ -2057,7 +2113,7 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc, } sc->ib.recv_cq = ib_alloc_cq_any(sc->ib.dev, sc, - sp->recv_credit_max, + qp_cap.max_recv_wr, IB_POLL_WORKQUEUE); if (IS_ERR(sc->ib.recv_cq)) { pr_err("Can't create RDMA recv CQ\n"); @@ -2066,10 +2122,18 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc, goto err; } + /* + * We reset completely here! + * As the above use was just temporary + * to calc max_send_wr and rdma_send_wr. + * + * rdma_create_qp() will trigger rdma_rw_init_qp() + * again if max_rdma_ctxs is not 0. + */ memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.event_handler = smb_direct_qpair_handler; qp_attr.qp_context = sc; - qp_attr.cap = *cap; + qp_attr.cap = qp_cap; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; qp_attr.send_cq = sc->ib.send_cq; @@ -2085,18 +2149,6 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc, sc->ib.qp = sc->rdma.cm_id->qp; sc->rdma.cm_id->event_handler = smb_direct_cm_handler; - pages_per_rw = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE) + 1; - if (pages_per_rw > sc->ib.dev->attrs.max_sgl_rd) { - ret = ib_mr_pool_init(sc->ib.qp, &sc->ib.qp->rdma_mrs, - sc->rw_io.credits.max, IB_MR_TYPE_MEM_REG, - sc->rw_io.credits.num_pages, 0); - if (ret) { - pr_err("failed to init mr pool count %zu pages %zu\n", - sc->rw_io.credits.max, sc->rw_io.credits.num_pages); - goto err; - } - } - return 0; err: if (sc->ib.qp) { @@ -2183,10 +2235,9 @@ static int smb_direct_prepare(struct ksmbd_transport *t) static int smb_direct_connect(struct smbdirect_socket *sc) { - struct ib_qp_cap qp_cap; int ret; - ret = smb_direct_init_params(sc, &qp_cap); + ret = smb_direct_init_params(sc); if (ret) { pr_err("Can't configure RDMA parameters\n"); return ret; @@ -2198,7 +2249,7 @@ static int smb_direct_connect(struct smbdirect_socket *sc) return ret; } - ret = smb_direct_create_qpair(sc, &qp_cap); + ret = smb_direct_create_qpair(sc); if (ret) { pr_err("Can't accept RDMA client: %d\n", ret); return ret;