RDMA/mana_ib: polling of CQs for GSI/UD

Add polling for the kernel CQs.
Process completion events for UD/GSI QPs.

Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
Link: https://patch.msgid.link/1737394039-28772-13-git-send-email-kotaranov@linux.microsoft.com
Reviewed-by: Shiraz Saleem <shirazsaleem@microsoft.com>
Reviewed-by: Long Li <longli@microsoft.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
This commit is contained in:
Konstantin Taranov 2025-01-20 09:27:18 -08:00 committed by Leon Romanovsky
parent 8001e9257e
commit cfef452592
5 changed files with 202 additions and 0 deletions

View File

@ -90,6 +90,10 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
}
}
spin_lock_init(&cq->cq_lock);
INIT_LIST_HEAD(&cq->list_send_qp);
INIT_LIST_HEAD(&cq->list_recv_qp);
return 0;
err_remove_cq_cb:
@ -180,3 +184,134 @@ int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
mana_gd_ring_cq(gdma_cq, SET_ARM_BIT);
return 0;
}
static inline void handle_ud_sq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe)
{
struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data;
struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem;
struct ud_sq_shadow_wqe *shadow_wqe;
shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_sq);
if (!shadow_wqe)
return;
shadow_wqe->header.error_code = rdma_cqe->ud_send.vendor_error;
wq->tail += shadow_wqe->header.posted_wqe_size;
shadow_queue_advance_next_to_complete(&qp->shadow_sq);
}
static inline void handle_ud_rq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe)
{
struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data;
struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem;
struct ud_rq_shadow_wqe *shadow_wqe;
shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_rq);
if (!shadow_wqe)
return;
shadow_wqe->byte_len = rdma_cqe->ud_recv.msg_len;
shadow_wqe->src_qpn = rdma_cqe->ud_recv.src_qpn;
shadow_wqe->header.error_code = IB_WC_SUCCESS;
wq->tail += shadow_wqe->header.posted_wqe_size;
shadow_queue_advance_next_to_complete(&qp->shadow_rq);
}
static void mana_handle_cqe(struct mana_ib_dev *mdev, struct gdma_comp *cqe)
{
struct mana_ib_qp *qp = mana_get_qp_ref(mdev, cqe->wq_num, cqe->is_sq);
if (!qp)
return;
if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) {
if (cqe->is_sq)
handle_ud_sq_cqe(qp, cqe);
else
handle_ud_rq_cqe(qp, cqe);
}
mana_put_qp_ref(qp);
}
static void fill_verbs_from_shadow_wqe(struct mana_ib_qp *qp, struct ib_wc *wc,
const struct shadow_wqe_header *shadow_wqe)
{
const struct ud_rq_shadow_wqe *ud_wqe = (const struct ud_rq_shadow_wqe *)shadow_wqe;
wc->wr_id = shadow_wqe->wr_id;
wc->status = shadow_wqe->error_code;
wc->opcode = shadow_wqe->opcode;
wc->vendor_err = shadow_wqe->error_code;
wc->wc_flags = 0;
wc->qp = &qp->ibqp;
wc->pkey_index = 0;
if (shadow_wqe->opcode == IB_WC_RECV) {
wc->byte_len = ud_wqe->byte_len;
wc->src_qp = ud_wqe->src_qpn;
wc->wc_flags |= IB_WC_GRH;
}
}
static int mana_process_completions(struct mana_ib_cq *cq, int nwc, struct ib_wc *wc)
{
struct shadow_wqe_header *shadow_wqe;
struct mana_ib_qp *qp;
int wc_index = 0;
/* process send shadow queue completions */
list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_sq))
!= NULL) {
if (wc_index >= nwc)
goto out;
fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
shadow_queue_advance_consumer(&qp->shadow_sq);
wc_index++;
}
}
/* process recv shadow queue completions */
list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_rq))
!= NULL) {
if (wc_index >= nwc)
goto out;
fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
shadow_queue_advance_consumer(&qp->shadow_rq);
wc_index++;
}
}
out:
return wc_index;
}
int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
{
struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
struct mana_ib_dev *mdev = container_of(ibcq->device, struct mana_ib_dev, ib_dev);
struct gdma_queue *queue = cq->queue.kmem;
struct gdma_comp gdma_cqe;
unsigned long flags;
int num_polled = 0;
int comp_read, i;
spin_lock_irqsave(&cq->cq_lock, flags);
for (i = 0; i < num_entries; i++) {
comp_read = mana_gd_poll_cq(queue, &gdma_cqe, 1);
if (comp_read < 1)
break;
mana_handle_cqe(mdev, &gdma_cqe);
}
num_polled = mana_process_completions(cq, num_entries, wc);
spin_unlock_irqrestore(&cq->cq_lock, flags);
return num_polled;
}

View File

@ -40,6 +40,7 @@ static const struct ib_device_ops mana_ib_dev_ops = {
.mmap = mana_ib_mmap,
.modify_qp = mana_ib_modify_qp,
.modify_wq = mana_ib_modify_wq,
.poll_cq = mana_ib_poll_cq,
.post_recv = mana_ib_post_recv,
.post_send = mana_ib_post_send,
.query_device = mana_ib_query_device,

View File

@ -127,6 +127,10 @@ struct mana_ib_mr {
struct mana_ib_cq {
struct ib_cq ibcq;
struct mana_ib_queue queue;
/* protects CQ polling */
spinlock_t cq_lock;
struct list_head list_send_qp;
struct list_head list_recv_qp;
int cqe;
u32 comp_vector;
mana_handle_t cq_handle;
@ -169,6 +173,8 @@ struct mana_ib_qp {
/* The port on the IB device, starting with 1 */
u32 port;
struct list_head cq_send_list;
struct list_head cq_recv_list;
struct shadow_queue shadow_rq;
struct shadow_queue shadow_sq;
@ -435,6 +441,31 @@ struct rdma_send_oob {
};
}; /* HW DATA */
struct mana_rdma_cqe {
union {
struct {
u8 cqe_type;
u8 data[GDMA_COMP_DATA_SIZE - 1];
};
struct {
u32 cqe_type : 8;
u32 vendor_error : 9;
u32 reserved1 : 15;
u32 sge_offset : 5;
u32 tx_wqe_offset : 27;
} ud_send;
struct {
u32 cqe_type : 8;
u32 reserved1 : 24;
u32 msg_len;
u32 src_qpn : 24;
u32 reserved2 : 8;
u32 imm_data;
u32 rx_wqe_offset;
} ud_recv;
};
}; /* HW DATA */
static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev)
{
return mdev->gdma_dev->gdma_context;
@ -602,5 +633,6 @@ int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr);
int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
#endif

View File

@ -600,6 +600,36 @@ static int mana_ib_create_rc_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
return err;
}
static void mana_add_qp_to_cqs(struct mana_ib_qp *qp)
{
struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
unsigned long flags;
spin_lock_irqsave(&send_cq->cq_lock, flags);
list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
spin_unlock_irqrestore(&send_cq->cq_lock, flags);
spin_lock_irqsave(&recv_cq->cq_lock, flags);
list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
spin_unlock_irqrestore(&recv_cq->cq_lock, flags);
}
static void mana_remove_qp_from_cqs(struct mana_ib_qp *qp)
{
struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
unsigned long flags;
spin_lock_irqsave(&send_cq->cq_lock, flags);
list_del(&qp->cq_send_list);
spin_unlock_irqrestore(&send_cq->cq_lock, flags);
spin_lock_irqsave(&recv_cq->cq_lock, flags);
list_del(&qp->cq_recv_list);
spin_unlock_irqrestore(&recv_cq->cq_lock, flags);
}
static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
struct ib_qp_init_attr *attr, struct ib_udata *udata)
{
@ -654,6 +684,8 @@ static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
if (err)
goto destroy_qp;
mana_add_qp_to_cqs(qp);
return 0;
destroy_qp:
@ -840,6 +872,7 @@ static int mana_ib_destroy_ud_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
int i;
mana_remove_qp_from_cqs(qp);
mana_table_remove_qp(mdev, qp);
destroy_shadow_queue(&qp->shadow_rq);

View File

@ -1222,6 +1222,7 @@ int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe)
return cqe_idx;
}
EXPORT_SYMBOL_NS(mana_gd_poll_cq, "NET_MANA");
static irqreturn_t mana_gd_intr(int irq, void *arg)
{