RDMA/rxe: Add support for the traditional Atomic operations with ODP

Enable 'fetch and add' and 'compare and swap' operations to be used with
ODP. This is comprised of the following steps:
 1. Check the driver page table(umem_odp->dma_list) to see if the target
    page is both readable and writable.
 2. If not, then trigger page fault to map the page.
 3. Convert its user space address to a kernel logical address using PFNs
    in the driver page table(umem_odp->pfn_list).
 4. Execute the operation.

Link: https://patch.msgid.link/r/20241220100936.2193541-6-matsuda-daisuke@fujitsu.com
Signed-off-by: Daisuke Matsuda <matsuda-daisuke@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
Daisuke Matsuda 2024-12-20 19:09:36 +09:00 committed by Jason Gunthorpe
parent 2fae67ab63
commit b55e9d29ec
5 changed files with 86 additions and 2 deletions

View File

@ -107,6 +107,7 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_RECV;
rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_WRITE;
rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_READ;
rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
}
}

View File

@ -81,6 +81,9 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key);
int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
void rxe_mr_cleanup(struct rxe_pool_elem *elem);
/* defined in rxe_mr.c; used in rxe_mr.c and rxe_odp.c */
extern spinlock_t atomic_ops_lock;
/* rxe_mw.c */
int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
int rxe_dealloc_mw(struct ib_mw *ibmw);
@ -189,6 +192,8 @@ int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
u64 iova, int access_flags, struct rxe_mr *mr);
int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
enum rxe_mr_copy_dir dir);
int rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
u64 compare, u64 swap_add, u64 *orig_val);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline int
rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
@ -201,6 +206,12 @@ static inline int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
{
return -EOPNOTSUPP;
}
static inline int
rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
u64 compare, u64 swap_add, u64 *orig_val)
{
return RESPST_ERR_UNSUPPORTED_OPCODE;
}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
#endif /* RXE_LOC_H */

View File

@ -469,7 +469,7 @@ int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
}
/* Guarantee atomicity of atomic operations at the machine level. */
static DEFINE_SPINLOCK(atomic_ops_lock);
DEFINE_SPINLOCK(atomic_ops_lock);
int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
u64 compare, u64 swap_add, u64 *orig_val)

View File

@ -253,3 +253,72 @@ int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
return err;
}
static int rxe_odp_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
u64 compare, u64 swap_add, u64 *orig_val)
{
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
unsigned int page_offset;
struct page *page;
unsigned int idx;
u64 value;
u64 *va;
int err;
if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
rxe_dbg_mr(mr, "mr not in valid state\n");
return RESPST_ERR_RKEY_VIOLATION;
}
err = mr_check_range(mr, iova, sizeof(value));
if (err) {
rxe_dbg_mr(mr, "iova out of range\n");
return RESPST_ERR_RKEY_VIOLATION;
}
idx = (iova - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
page_offset = iova & (BIT(umem_odp->page_shift) - 1);
page = hmm_pfn_to_page(umem_odp->pfn_list[idx]);
if (!page)
return RESPST_ERR_RKEY_VIOLATION;
if (unlikely(page_offset & 0x7)) {
rxe_dbg_mr(mr, "iova not aligned\n");
return RESPST_ERR_MISALIGNED_ATOMIC;
}
va = kmap_local_page(page);
spin_lock_bh(&atomic_ops_lock);
value = *orig_val = va[page_offset >> 3];
if (opcode == IB_OPCODE_RC_COMPARE_SWAP) {
if (value == compare)
va[page_offset >> 3] = swap_add;
} else {
value += swap_add;
va[page_offset >> 3] = value;
}
spin_unlock_bh(&atomic_ops_lock);
kunmap_local(va);
return 0;
}
int rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
u64 compare, u64 swap_add, u64 *orig_val)
{
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
int err;
err = rxe_odp_map_range_and_lock(mr, iova, sizeof(char), 0);
if (err < 0)
return err;
err = rxe_odp_do_atomic_op(mr, iova, opcode, compare, swap_add,
orig_val);
mutex_unlock(&umem_odp->umem_mutex);
return err;
}

View File

@ -707,7 +707,10 @@ static enum resp_states atomic_reply(struct rxe_qp *qp,
u64 iova = qp->resp.va + qp->resp.offset;
if (mr->umem->is_odp)
err = RESPST_ERR_UNSUPPORTED_OPCODE;
err = rxe_odp_atomic_op(mr, iova, pkt->opcode,
atmeth_comp(pkt),
atmeth_swap_add(pkt),
&res->atomic.orig_val);
else
err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
atmeth_comp(pkt),