RDMA/mlx4: Introduce a modern CQ creation interface

The uverbs CQ creation UAPI allows users to supply their own umem when
creating a CQ. Update mlx4 to support this model while preserving compatibility
with the legacy interface that allocates umem internally.

Link: https://patch.msgid.link/20260213-refactor-umem-v1-13-f3be85847922@nvidia.com
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
This commit is contained in:
Leon Romanovsky 2026-02-13 12:57:49 +02:00
parent 0e4b9841f4
commit f45f195af5
3 changed files with 116 additions and 90 deletions

View File

@ -136,8 +136,9 @@ static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
}
#define CQ_CREATE_FLAGS_SUPPORTED IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION
int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct uverbs_attr_bundle *attrs)
int mlx4_ib_create_user_cq(struct ib_cq *ibcq,
const struct ib_cq_init_attr *attr,
struct uverbs_attr_bundle *attrs)
{
struct ib_udata *udata = &attrs->driver_udata;
struct ib_device *ibdev = ibcq->device;
@ -145,13 +146,16 @@ int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
int vector = attr->comp_vector;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_cq *cq = to_mcq(ibcq);
struct mlx4_uar *uar;
struct mlx4_ib_create_cq ucmd;
int cqe_size = dev->dev->caps.cqe_size;
void *buf_addr;
int shift;
int n;
int err;
struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context(
udata, struct mlx4_ib_ucontext, ibucontext);
if (entries < 1 || entries > dev->dev->caps.max_cqes)
if (attr->cqe > dev->dev->caps.max_cqes)
return -EINVAL;
if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED)
@ -161,95 +165,63 @@ int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
cq->ibcq.cqe = entries - 1;
mutex_init(&cq->resize_mutex);
spin_lock_init(&cq->lock);
cq->resize_buf = NULL;
cq->resize_umem = NULL;
cq->create_flags = attr->flags;
INIT_LIST_HEAD(&cq->send_qp_list);
INIT_LIST_HEAD(&cq->recv_qp_list);
if (udata) {
struct mlx4_ib_create_cq ucmd;
int cqe_size = dev->dev->caps.cqe_size;
int shift;
int n;
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
err = -EFAULT;
goto err_cq;
}
buf_addr = (void *)(unsigned long)ucmd.buf_addr;
cq->umem = ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
entries * cqe_size,
IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(cq->umem)) {
err = PTR_ERR(cq->umem);
goto err_cq;
}
shift = mlx4_ib_umem_calc_optimal_mtt_size(cq->umem, 0, &n);
if (shift < 0) {
err = shift;
goto err_umem;
}
err = mlx4_mtt_init(dev->dev, n, shift, &cq->buf.mtt);
if (err)
goto err_umem;
err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->umem);
if (err)
goto err_mtt;
err = mlx4_ib_db_map_user(udata, ucmd.db_addr, &cq->db);
if (err)
goto err_mtt;
uar = &context->uar;
cq->mcq.usage = MLX4_RES_USAGE_USER_VERBS;
} else {
err = mlx4_db_alloc(dev->dev, &cq->db, 1);
if (err)
goto err_cq;
cq->mcq.set_ci_db = cq->db.db;
cq->mcq.arm_db = cq->db.db + 1;
*cq->mcq.set_ci_db = 0;
*cq->mcq.arm_db = 0;
err = mlx4_ib_alloc_cq_buf(dev, &cq->buf, entries);
if (err)
goto err_db;
buf_addr = &cq->buf.buf;
uar = &dev->priv_uar;
cq->mcq.usage = MLX4_RES_USAGE_DRIVER;
if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
err = -EFAULT;
goto err_cq;
}
buf_addr = (void *)(unsigned long)ucmd.buf_addr;
if (!ibcq->umem)
ibcq->umem = ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
entries * cqe_size,
IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(ibcq->umem)) {
err = PTR_ERR(ibcq->umem);
goto err_cq;
}
shift = mlx4_ib_umem_calc_optimal_mtt_size(cq->ibcq.umem, 0, &n);
if (shift < 0) {
err = shift;
goto err_cq;
}
err = mlx4_mtt_init(dev->dev, n, shift, &cq->buf.mtt);
if (err)
goto err_cq;
err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->ibcq.umem);
if (err)
goto err_mtt;
err = mlx4_ib_db_map_user(udata, ucmd.db_addr, &cq->db);
if (err)
goto err_mtt;
if (dev->eq_table)
vector = dev->eq_table[vector % ibdev->num_comp_vectors];
err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, cq->db.dma,
&cq->mcq, vector, 0,
err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, &context->uar,
cq->db.dma, &cq->mcq, vector, 0,
!!(cq->create_flags &
IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION),
buf_addr, !!udata);
buf_addr, true);
if (err)
goto err_dbmap;
if (udata)
cq->mcq.tasklet_ctx.comp = mlx4_ib_cq_comp;
else
cq->mcq.comp = mlx4_ib_cq_comp;
cq->mcq.tasklet_ctx.comp = mlx4_ib_cq_comp;
cq->mcq.event = mlx4_ib_cq_event;
cq->mcq.usage = MLX4_RES_USAGE_USER_VERBS;
if (udata)
if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) {
err = -EFAULT;
goto err_cq_free;
}
if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
err = -EFAULT;
goto err_cq_free;
}
return 0;
@ -257,21 +229,72 @@ int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
mlx4_cq_free(dev->dev, &cq->mcq);
err_dbmap:
if (udata)
mlx4_ib_db_unmap_user(context, &cq->db);
mlx4_ib_db_unmap_user(context, &cq->db);
err_mtt:
mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt);
/* UMEM is released by ib_core */
err_umem:
ib_umem_release(cq->umem);
if (!udata)
mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
err_cq:
return err;
}
int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct uverbs_attr_bundle *attrs)
{
struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
int vector = attr->comp_vector;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_cq *cq = to_mcq(ibcq);
void *buf_addr;
int err;
if (attr->cqe > dev->dev->caps.max_cqes)
return -EINVAL;
entries = roundup_pow_of_two(entries + 1);
cq->ibcq.cqe = entries - 1;
mutex_init(&cq->resize_mutex);
spin_lock_init(&cq->lock);
INIT_LIST_HEAD(&cq->send_qp_list);
INIT_LIST_HEAD(&cq->recv_qp_list);
err = mlx4_db_alloc(dev->dev, &cq->db, 1);
if (err)
return err;
cq->mcq.set_ci_db = cq->db.db;
cq->mcq.arm_db = cq->db.db + 1;
*cq->mcq.set_ci_db = 0;
*cq->mcq.arm_db = 0;
err = mlx4_ib_alloc_cq_buf(dev, &cq->buf, entries);
if (err)
goto err_db;
buf_addr = &cq->buf.buf;
if (dev->eq_table)
vector = dev->eq_table[vector % ibdev->num_comp_vectors];
err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, &dev->priv_uar,
cq->db.dma, &cq->mcq, vector, 0, 0,
buf_addr, false);
if (err)
goto err_buf;
cq->mcq.comp = mlx4_ib_cq_comp;
cq->mcq.event = mlx4_ib_cq_event;
cq->mcq.usage = MLX4_RES_USAGE_DRIVER;
return 0;
err_buf:
mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
err_db:
if (!udata)
mlx4_db_free(dev->dev, &cq->db);
err_cq:
mlx4_db_free(dev->dev, &cq->db);
return err;
}
@ -445,8 +468,8 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
if (ibcq->uobject) {
cq->buf = cq->resize_buf->buf;
cq->ibcq.cqe = cq->resize_buf->cqe;
ib_umem_release(cq->umem);
cq->umem = cq->resize_umem;
ib_umem_release(cq->ibcq.umem);
cq->ibcq.umem = cq->resize_umem;
kfree(cq->resize_buf);
cq->resize_buf = NULL;
@ -506,11 +529,11 @@ int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
struct mlx4_ib_ucontext,
ibucontext),
&mcq->db);
/* UMEM is released by ib_core */
} else {
mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe);
mlx4_db_free(dev->dev, &mcq->db);
}
ib_umem_release(mcq->umem);
return 0;
}

View File

@ -2525,6 +2525,7 @@ static const struct ib_device_ops mlx4_ib_dev_ops = {
.attach_mcast = mlx4_ib_mcg_attach,
.create_ah = mlx4_ib_create_ah,
.create_cq = mlx4_ib_create_cq,
.create_user_cq = mlx4_ib_create_user_cq,
.create_qp = mlx4_ib_create_qp,
.create_srq = mlx4_ib_create_srq,
.dealloc_pd = mlx4_ib_dealloc_pd,

View File

@ -121,7 +121,6 @@ struct mlx4_ib_cq {
struct mlx4_db db;
spinlock_t lock;
struct mutex resize_mutex;
struct ib_umem *umem;
struct ib_umem *resize_umem;
int create_flags;
/* List of qps that it serves.*/
@ -772,6 +771,9 @@ int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct uverbs_attr_bundle *attrs);
int mlx4_ib_create_user_cq(struct ib_cq *ibcq,
const struct ib_cq_init_attr *attr,
struct uverbs_attr_bundle *attrs);
int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);