mirror of
https://github.com/torvalds/linux.git
synced 2026-05-31 02:24:24 +02:00
RDMA v6.16 merge window pull request
Usual collection of driver fixes:
- Small bug fixes and cleansup in hfi, hns, rxe, mlx5, mana siw
- Further ODP functionality in rxe
- Remote access MRs in mana, along with more page sizes
- Improve CM scalability with a rwlock around the agent
- More trace points for hns
- ODP hmm conversion to the new two step dma API
- Support the ethernet HW device in mana as well as the RNIC
- Cleanups:
* Use secs_to_jiffies() when appropriate
* Use ERR_CAST() instead of naked casts
* Don't use %pK in printk
* Unusued functions removed
* Allocation type matching
-----BEGIN PGP SIGNATURE-----
iHUEABYKAB0WIQRRRCHOFoQz/8F5bUaFwuHvBreFYQUCaDm95gAKCRCFwuHvBreF
YXJxAQCZ+p+mxt0rTeVI2j6YQ26thuvb/tH0Upu8epgdQ3T/ZgD/YOHBC6OrXWJa
Uz6BTiyz/xiyMtJLTD4kEiG2o74J1gE=
=DNQC
-----END PGP SIGNATURE-----
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"Usual collection of driver fixes:
- Small bug fixes and cleansup in hfi, hns, rxe, mlx5, mana siw
- Further ODP functionality in rxe
- Remote access MRs in mana, along with more page sizes
- Improve CM scalability with a rwlock around the agent
- More trace points for hns
- ODP hmm conversion to the new two step dma API
- Support the ethernet HW device in mana as well as the RNIC
- Cleanups:
- Use secs_to_jiffies() when appropriate
- Use ERR_CAST() instead of naked casts
- Don't use %pK in printk
- Unusued functions removed
- Allocation type matching"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (57 commits)
RDMA/cma: Fix hang when cma_netevent_callback fails to queue_work
RDMA/bnxt_re: Support extended stats for Thor2 VF
RDMA/hns: Fix endian issue in trace events
RDMA/mlx5: Avoid flexible array warning
IB/cm: Remove dead code and adjust naming
RDMA/core: Avoid hmm_dma_map_alloc() for virtual DMA devices
RDMA/rxe: Break endless pagefault loop for RO pages
RDMA/bnxt_re: Fix return code of bnxt_re_configure_cc
RDMA/bnxt_re: Fix missing error handling for tx_queue
RDMA/bnxt_re: Fix incorrect display of inactivity_cp in debugfs output
RDMA/mlx5: Add support for 200Gbps per lane speeds
RDMA/mlx5: Remove the redundant MLX5_IB_STAGE_UAR stage
RDMA/iwcm: Fix use-after-free of work objects after cm_id destruction
net: mana: Add support for auxiliary device servicing events
RDMA/mana_ib: unify mana_ib functions to support any gdma device
RDMA/mana_ib: Add support of mana_ib for RNIC and ETH nic
net: mana: Probe rdma device in mana driver
RDMA/siw: replace redundant ternary operator with just rv
RDMA/umem: Separate implicit ODP initialization from explicit ODP
RDMA/core: Convert UMEM ODP DMA mapping to caching IOVA and page linkage
...
This commit is contained in:
commit
dd91b5e1d6
|
|
@ -36,6 +36,7 @@ MODULE_LICENSE("Dual BSD/GPL");
|
|||
|
||||
#define CM_DESTROY_ID_WAIT_TIMEOUT 10000 /* msecs */
|
||||
#define CM_DIRECT_RETRY_CTX ((void *) 1UL)
|
||||
#define CM_MRA_SETTING 24 /* 4.096us * 2^24 = ~68.7 seconds */
|
||||
|
||||
static const char * const ibcm_rej_reason_strs[] = {
|
||||
[IB_CM_REJ_NO_QP] = "no QP",
|
||||
|
|
@ -167,7 +168,7 @@ struct cm_port {
|
|||
struct cm_device {
|
||||
struct kref kref;
|
||||
struct list_head list;
|
||||
spinlock_t mad_agent_lock;
|
||||
rwlock_t mad_agent_lock;
|
||||
struct ib_device *ib_device;
|
||||
u8 ack_delay;
|
||||
int going_down;
|
||||
|
|
@ -241,7 +242,6 @@ struct cm_id_private {
|
|||
u8 initiator_depth;
|
||||
u8 retry_count;
|
||||
u8 rnr_retry_count;
|
||||
u8 service_timeout;
|
||||
u8 target_ack_delay;
|
||||
|
||||
struct list_head work_list;
|
||||
|
|
@ -285,7 +285,7 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
|
|||
if (!cm_id_priv->av.port)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
|
||||
read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
|
||||
mad_agent = cm_id_priv->av.port->mad_agent;
|
||||
if (!mad_agent) {
|
||||
m = ERR_PTR(-EINVAL);
|
||||
|
|
@ -311,7 +311,7 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
|
|||
m->ah = ah;
|
||||
|
||||
out:
|
||||
spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
|
||||
read_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
|
||||
return m;
|
||||
}
|
||||
|
||||
|
|
@ -1297,10 +1297,10 @@ static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
|
|||
if (!cm_id_priv->av.port)
|
||||
return cpu_to_be64(low_tid);
|
||||
|
||||
spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
|
||||
read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
|
||||
if (cm_id_priv->av.port->mad_agent)
|
||||
hi_tid = ((u64)cm_id_priv->av.port->mad_agent->hi_tid) << 32;
|
||||
spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
|
||||
read_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
|
||||
return cpu_to_be64(hi_tid | low_tid);
|
||||
}
|
||||
|
||||
|
|
@ -1872,7 +1872,7 @@ static void cm_process_work(struct cm_id_private *cm_id_priv,
|
|||
|
||||
static void cm_format_mra(struct cm_mra_msg *mra_msg,
|
||||
struct cm_id_private *cm_id_priv,
|
||||
enum cm_msg_response msg_mraed, u8 service_timeout,
|
||||
enum cm_msg_response msg_mraed,
|
||||
const void *private_data, u8 private_data_len)
|
||||
{
|
||||
cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
|
||||
|
|
@ -1881,7 +1881,7 @@ static void cm_format_mra(struct cm_mra_msg *mra_msg,
|
|||
be32_to_cpu(cm_id_priv->id.local_id));
|
||||
IBA_SET(CM_MRA_REMOTE_COMM_ID, mra_msg,
|
||||
be32_to_cpu(cm_id_priv->id.remote_id));
|
||||
IBA_SET(CM_MRA_SERVICE_TIMEOUT, mra_msg, service_timeout);
|
||||
IBA_SET(CM_MRA_SERVICE_TIMEOUT, mra_msg, CM_MRA_SETTING);
|
||||
|
||||
if (private_data && private_data_len)
|
||||
IBA_SET_MEM(CM_MRA_PRIVATE_DATA, mra_msg, private_data,
|
||||
|
|
@ -1960,7 +1960,7 @@ static void cm_dup_req_handler(struct cm_work *work,
|
|||
switch (cm_id_priv->id.state) {
|
||||
case IB_CM_MRA_REQ_SENT:
|
||||
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
|
||||
CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
|
||||
CM_MSG_RESPONSE_REQ,
|
||||
cm_id_priv->private_data,
|
||||
cm_id_priv->private_data_len);
|
||||
break;
|
||||
|
|
@ -2454,7 +2454,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
|
|||
cm_id_priv->private_data_len);
|
||||
else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
|
||||
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
|
||||
CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
|
||||
CM_MSG_RESPONSE_REP,
|
||||
cm_id_priv->private_data,
|
||||
cm_id_priv->private_data_len);
|
||||
else
|
||||
|
|
@ -3094,26 +3094,13 @@ static int cm_rej_handler(struct cm_work *work)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
int ib_send_cm_mra(struct ib_cm_id *cm_id,
|
||||
u8 service_timeout,
|
||||
const void *private_data,
|
||||
u8 private_data_len)
|
||||
int ib_prepare_cm_mra(struct ib_cm_id *cm_id)
|
||||
{
|
||||
struct cm_id_private *cm_id_priv;
|
||||
struct ib_mad_send_buf *msg;
|
||||
enum ib_cm_state cm_state;
|
||||
enum ib_cm_lap_state lap_state;
|
||||
enum cm_msg_response msg_response;
|
||||
void *data;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
data = cm_copy_private_data(private_data, private_data_len);
|
||||
if (IS_ERR(data))
|
||||
return PTR_ERR(data);
|
||||
int ret = 0;
|
||||
|
||||
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
|
||||
|
||||
|
|
@ -3122,58 +3109,33 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
|
|||
case IB_CM_REQ_RCVD:
|
||||
cm_state = IB_CM_MRA_REQ_SENT;
|
||||
lap_state = cm_id->lap_state;
|
||||
msg_response = CM_MSG_RESPONSE_REQ;
|
||||
break;
|
||||
case IB_CM_REP_RCVD:
|
||||
cm_state = IB_CM_MRA_REP_SENT;
|
||||
lap_state = cm_id->lap_state;
|
||||
msg_response = CM_MSG_RESPONSE_REP;
|
||||
break;
|
||||
case IB_CM_ESTABLISHED:
|
||||
if (cm_id->lap_state == IB_CM_LAP_RCVD) {
|
||||
cm_state = cm_id->state;
|
||||
lap_state = IB_CM_MRA_LAP_SENT;
|
||||
msg_response = CM_MSG_RESPONSE_OTHER;
|
||||
break;
|
||||
}
|
||||
fallthrough;
|
||||
default:
|
||||
trace_icm_send_mra_unknown_err(&cm_id_priv->id);
|
||||
trace_icm_prepare_mra_unknown_err(&cm_id_priv->id);
|
||||
ret = -EINVAL;
|
||||
goto error_unlock;
|
||||
}
|
||||
|
||||
if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
|
||||
msg = cm_alloc_msg(cm_id_priv);
|
||||
if (IS_ERR(msg)) {
|
||||
ret = PTR_ERR(msg);
|
||||
goto error_unlock;
|
||||
}
|
||||
|
||||
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
|
||||
msg_response, service_timeout,
|
||||
private_data, private_data_len);
|
||||
trace_icm_send_mra(cm_id);
|
||||
ret = ib_post_send_mad(msg, NULL);
|
||||
if (ret)
|
||||
goto error_free_msg;
|
||||
}
|
||||
|
||||
cm_id->state = cm_state;
|
||||
cm_id->lap_state = lap_state;
|
||||
cm_id_priv->service_timeout = service_timeout;
|
||||
cm_set_private_data(cm_id_priv, data, private_data_len);
|
||||
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
|
||||
return 0;
|
||||
cm_set_private_data(cm_id_priv, NULL, 0);
|
||||
|
||||
error_free_msg:
|
||||
cm_free_msg(msg);
|
||||
error_unlock:
|
||||
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
|
||||
kfree(data);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(ib_send_cm_mra);
|
||||
EXPORT_SYMBOL(ib_prepare_cm_mra);
|
||||
|
||||
static struct cm_id_private *cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
|
||||
{
|
||||
|
|
@ -3377,7 +3339,6 @@ static int cm_lap_handler(struct cm_work *work)
|
|||
|
||||
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
|
||||
CM_MSG_RESPONSE_OTHER,
|
||||
cm_id_priv->service_timeout,
|
||||
cm_id_priv->private_data,
|
||||
cm_id_priv->private_data_len);
|
||||
spin_unlock_irq(&cm_id_priv->lock);
|
||||
|
|
@ -3786,7 +3747,8 @@ static void cm_process_send_error(struct cm_id_private *cm_id_priv,
|
|||
spin_lock_irq(&cm_id_priv->lock);
|
||||
if (msg != cm_id_priv->msg) {
|
||||
spin_unlock_irq(&cm_id_priv->lock);
|
||||
cm_free_priv_msg(msg);
|
||||
cm_free_msg(msg);
|
||||
cm_deref_id(cm_id_priv);
|
||||
return;
|
||||
}
|
||||
cm_free_priv_msg(msg);
|
||||
|
|
@ -4378,7 +4340,7 @@ static int cm_add_one(struct ib_device *ib_device)
|
|||
return -ENOMEM;
|
||||
|
||||
kref_init(&cm_dev->kref);
|
||||
spin_lock_init(&cm_dev->mad_agent_lock);
|
||||
rwlock_init(&cm_dev->mad_agent_lock);
|
||||
cm_dev->ib_device = ib_device;
|
||||
cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
|
||||
cm_dev->going_down = 0;
|
||||
|
|
@ -4494,9 +4456,9 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
|
|||
* The above ensures no call paths from the work are running,
|
||||
* the remaining paths all take the mad_agent_lock.
|
||||
*/
|
||||
spin_lock(&cm_dev->mad_agent_lock);
|
||||
write_lock(&cm_dev->mad_agent_lock);
|
||||
port->mad_agent = NULL;
|
||||
spin_unlock(&cm_dev->mad_agent_lock);
|
||||
write_unlock(&cm_dev->mad_agent_lock);
|
||||
ib_unregister_mad_agent(mad_agent);
|
||||
ib_port_unregister_client_groups(ib_device, i,
|
||||
cm_counter_groups);
|
||||
|
|
|
|||
|
|
@ -229,7 +229,7 @@ DEFINE_CM_ERR_EVENT(send_drep);
|
|||
DEFINE_CM_ERR_EVENT(dreq_unknown);
|
||||
DEFINE_CM_ERR_EVENT(send_unknown_rej);
|
||||
DEFINE_CM_ERR_EVENT(rej_unknown);
|
||||
DEFINE_CM_ERR_EVENT(send_mra_unknown);
|
||||
DEFINE_CM_ERR_EVENT(prepare_mra_unknown);
|
||||
DEFINE_CM_ERR_EVENT(mra_unknown);
|
||||
DEFINE_CM_ERR_EVENT(qp_init);
|
||||
DEFINE_CM_ERR_EVENT(qp_rtr);
|
||||
|
|
|
|||
|
|
@ -46,7 +46,6 @@ MODULE_LICENSE("Dual BSD/GPL");
|
|||
|
||||
#define CMA_CM_RESPONSE_TIMEOUT 20
|
||||
#define CMA_MAX_CM_RETRIES 15
|
||||
#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
|
||||
#define CMA_IBOE_PACKET_LIFETIME 16
|
||||
#define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP
|
||||
|
||||
|
|
@ -146,19 +145,6 @@ struct iw_cm_id *rdma_iw_cm_id(struct rdma_cm_id *id)
|
|||
}
|
||||
EXPORT_SYMBOL(rdma_iw_cm_id);
|
||||
|
||||
/**
|
||||
* rdma_res_to_id() - return the rdma_cm_id pointer for this restrack.
|
||||
* @res: rdma resource tracking entry pointer
|
||||
*/
|
||||
struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res)
|
||||
{
|
||||
struct rdma_id_private *id_priv =
|
||||
container_of(res, struct rdma_id_private, res);
|
||||
|
||||
return &id_priv->id;
|
||||
}
|
||||
EXPORT_SYMBOL(rdma_res_to_id);
|
||||
|
||||
static int cma_add_one(struct ib_device *device);
|
||||
static void cma_remove_one(struct ib_device *device, void *client_data);
|
||||
|
||||
|
|
@ -2214,8 +2200,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
|
|||
case IB_CM_REP_RECEIVED:
|
||||
if (state == RDMA_CM_CONNECT &&
|
||||
(id_priv->id.qp_type != IB_QPT_UD)) {
|
||||
trace_cm_send_mra(id_priv);
|
||||
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
|
||||
trace_cm_prepare_mra(id_priv);
|
||||
ib_prepare_cm_mra(cm_id);
|
||||
}
|
||||
if (id_priv->id.qp) {
|
||||
event.status = cma_rep_recv(id_priv);
|
||||
|
|
@ -2476,8 +2462,8 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
|
|||
|
||||
if (READ_ONCE(conn_id->state) == RDMA_CM_CONNECT &&
|
||||
conn_id->id.qp_type != IB_QPT_UD) {
|
||||
trace_cm_send_mra(cm_id->context);
|
||||
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
|
||||
trace_cm_prepare_mra(cm_id->context);
|
||||
ib_prepare_cm_mra(cm_id);
|
||||
}
|
||||
mutex_unlock(&conn_id->handler_mutex);
|
||||
|
||||
|
|
@ -5245,7 +5231,8 @@ static int cma_netevent_callback(struct notifier_block *self,
|
|||
neigh->ha, ETH_ALEN))
|
||||
continue;
|
||||
cma_id_get(current_id);
|
||||
queue_work(cma_wq, ¤t_id->id.net_work);
|
||||
if (!queue_work(cma_wq, ¤t_id->id.net_work))
|
||||
cma_id_put(current_id);
|
||||
}
|
||||
out:
|
||||
spin_unlock_irqrestore(&id_table_lock, flags);
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ DECLARE_EVENT_CLASS(cma_fsm_class,
|
|||
|
||||
DEFINE_CMA_FSM_EVENT(send_rtu);
|
||||
DEFINE_CMA_FSM_EVENT(send_rej);
|
||||
DEFINE_CMA_FSM_EVENT(send_mra);
|
||||
DEFINE_CMA_FSM_EVENT(prepare_mra);
|
||||
DEFINE_CMA_FSM_EVENT(send_sidr_req);
|
||||
DEFINE_CMA_FSM_EVENT(send_sidr_rep);
|
||||
DEFINE_CMA_FSM_EVENT(disconnect);
|
||||
|
|
|
|||
|
|
@ -368,12 +368,9 @@ EXPORT_SYMBOL(iw_cm_disconnect);
|
|||
/*
|
||||
* CM_ID <-- DESTROYING
|
||||
*
|
||||
* Clean up all resources associated with the connection and release
|
||||
* the initial reference taken by iw_create_cm_id.
|
||||
*
|
||||
* Returns true if and only if the last cm_id_priv reference has been dropped.
|
||||
* Clean up all resources associated with the connection.
|
||||
*/
|
||||
static bool destroy_cm_id(struct iw_cm_id *cm_id)
|
||||
static void destroy_cm_id(struct iw_cm_id *cm_id)
|
||||
{
|
||||
struct iwcm_id_private *cm_id_priv;
|
||||
struct ib_qp *qp;
|
||||
|
|
@ -442,20 +439,22 @@ static bool destroy_cm_id(struct iw_cm_id *cm_id)
|
|||
iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr);
|
||||
iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM);
|
||||
}
|
||||
|
||||
return iwcm_deref_id(cm_id_priv);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is only called by the application thread and cannot
|
||||
* be called by the event thread. The function will wait for all
|
||||
* references to be released on the cm_id and then kfree the cm_id
|
||||
* object.
|
||||
* Destroy cm_id. If the cm_id still has other references, wait for all
|
||||
* references to be released on the cm_id and then release the initial
|
||||
* reference taken by iw_create_cm_id.
|
||||
*/
|
||||
void iw_destroy_cm_id(struct iw_cm_id *cm_id)
|
||||
{
|
||||
if (!destroy_cm_id(cm_id))
|
||||
struct iwcm_id_private *cm_id_priv;
|
||||
|
||||
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
|
||||
destroy_cm_id(cm_id);
|
||||
if (refcount_read(&cm_id_priv->refcount) > 1)
|
||||
flush_workqueue(iwcm_wq);
|
||||
iwcm_deref_id(cm_id_priv);
|
||||
}
|
||||
EXPORT_SYMBOL(iw_destroy_cm_id);
|
||||
|
||||
|
|
@ -1035,8 +1034,10 @@ static void cm_work_handler(struct work_struct *_work)
|
|||
|
||||
if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
|
||||
ret = process_event(cm_id_priv, &levent);
|
||||
if (ret)
|
||||
WARN_ON_ONCE(destroy_cm_id(&cm_id_priv->id));
|
||||
if (ret) {
|
||||
destroy_cm_id(&cm_id_priv->id);
|
||||
WARN_ON_ONCE(iwcm_deref_id(cm_id_priv));
|
||||
}
|
||||
} else
|
||||
pr_debug("dropping event %d\n", levent.event);
|
||||
if (iwcm_deref_id(cm_id_priv))
|
||||
|
|
|
|||
|
|
@ -158,7 +158,7 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
|
|||
ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc,
|
||||
recv_wc->recv_buf.grh, agent->port_num);
|
||||
if (IS_ERR(ah))
|
||||
return (void *) ah;
|
||||
return ERR_CAST(ah);
|
||||
|
||||
hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
|
||||
msg = ib_create_send_mad(agent, recv_wc->wc->src_qp,
|
||||
|
|
|
|||
|
|
@ -41,67 +41,72 @@
|
|||
#include <linux/hugetlb.h>
|
||||
#include <linux/interval_tree.h>
|
||||
#include <linux/hmm.h>
|
||||
#include <linux/hmm-dma.h>
|
||||
#include <linux/pagemap.h>
|
||||
|
||||
#include <rdma/ib_umem_odp.h>
|
||||
|
||||
#include "uverbs.h"
|
||||
|
||||
static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
|
||||
const struct mmu_interval_notifier_ops *ops)
|
||||
static void ib_init_umem_implicit_odp(struct ib_umem_odp *umem_odp)
|
||||
{
|
||||
int ret;
|
||||
umem_odp->is_implicit_odp = 1;
|
||||
umem_odp->umem.is_odp = 1;
|
||||
mutex_init(&umem_odp->umem_mutex);
|
||||
}
|
||||
|
||||
static int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
|
||||
const struct mmu_interval_notifier_ops *ops)
|
||||
{
|
||||
struct ib_device *dev = umem_odp->umem.ibdev;
|
||||
size_t page_size = 1UL << umem_odp->page_shift;
|
||||
struct hmm_dma_map *map;
|
||||
unsigned long start;
|
||||
unsigned long end;
|
||||
size_t nr_entries;
|
||||
int ret = 0;
|
||||
|
||||
umem_odp->umem.is_odp = 1;
|
||||
mutex_init(&umem_odp->umem_mutex);
|
||||
|
||||
if (!umem_odp->is_implicit_odp) {
|
||||
size_t page_size = 1UL << umem_odp->page_shift;
|
||||
unsigned long start;
|
||||
unsigned long end;
|
||||
size_t ndmas, npfns;
|
||||
start = ALIGN_DOWN(umem_odp->umem.address, page_size);
|
||||
if (check_add_overflow(umem_odp->umem.address,
|
||||
(unsigned long)umem_odp->umem.length, &end))
|
||||
return -EOVERFLOW;
|
||||
end = ALIGN(end, page_size);
|
||||
if (unlikely(end < page_size))
|
||||
return -EOVERFLOW;
|
||||
|
||||
start = ALIGN_DOWN(umem_odp->umem.address, page_size);
|
||||
if (check_add_overflow(umem_odp->umem.address,
|
||||
(unsigned long)umem_odp->umem.length,
|
||||
&end))
|
||||
return -EOVERFLOW;
|
||||
end = ALIGN(end, page_size);
|
||||
if (unlikely(end < page_size))
|
||||
return -EOVERFLOW;
|
||||
nr_entries = (end - start) >> PAGE_SHIFT;
|
||||
if (!(nr_entries * PAGE_SIZE / page_size))
|
||||
return -EINVAL;
|
||||
|
||||
ndmas = (end - start) >> umem_odp->page_shift;
|
||||
if (!ndmas)
|
||||
return -EINVAL;
|
||||
|
||||
npfns = (end - start) >> PAGE_SHIFT;
|
||||
umem_odp->pfn_list = kvcalloc(
|
||||
npfns, sizeof(*umem_odp->pfn_list),
|
||||
GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!umem_odp->pfn_list)
|
||||
return -ENOMEM;
|
||||
|
||||
umem_odp->dma_list = kvcalloc(
|
||||
ndmas, sizeof(*umem_odp->dma_list),
|
||||
GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!umem_odp->dma_list) {
|
||||
map = &umem_odp->map;
|
||||
if (ib_uses_virt_dma(dev)) {
|
||||
map->pfn_list = kvcalloc(nr_entries, sizeof(*map->pfn_list),
|
||||
GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!map->pfn_list)
|
||||
ret = -ENOMEM;
|
||||
goto out_pfn_list;
|
||||
}
|
||||
} else
|
||||
ret = hmm_dma_map_alloc(dev->dma_device, map,
|
||||
(end - start) >> PAGE_SHIFT,
|
||||
1 << umem_odp->page_shift);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = mmu_interval_notifier_insert(&umem_odp->notifier,
|
||||
umem_odp->umem.owning_mm,
|
||||
start, end - start, ops);
|
||||
if (ret)
|
||||
goto out_dma_list;
|
||||
}
|
||||
ret = mmu_interval_notifier_insert(&umem_odp->notifier,
|
||||
umem_odp->umem.owning_mm, start,
|
||||
end - start, ops);
|
||||
if (ret)
|
||||
goto out_free_map;
|
||||
|
||||
return 0;
|
||||
|
||||
out_dma_list:
|
||||
kvfree(umem_odp->dma_list);
|
||||
out_pfn_list:
|
||||
kvfree(umem_odp->pfn_list);
|
||||
out_free_map:
|
||||
if (ib_uses_virt_dma(dev))
|
||||
kfree(map->pfn_list);
|
||||
else
|
||||
hmm_dma_map_free(dev->dma_device, map);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
@ -120,7 +125,6 @@ struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_device *device,
|
|||
{
|
||||
struct ib_umem *umem;
|
||||
struct ib_umem_odp *umem_odp;
|
||||
int ret;
|
||||
|
||||
if (access & IB_ACCESS_HUGETLB)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
|
@ -132,16 +136,10 @@ struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_device *device,
|
|||
umem->ibdev = device;
|
||||
umem->writable = ib_access_writable(access);
|
||||
umem->owning_mm = current->mm;
|
||||
umem_odp->is_implicit_odp = 1;
|
||||
umem_odp->page_shift = PAGE_SHIFT;
|
||||
|
||||
umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
|
||||
ret = ib_init_umem_odp(umem_odp, NULL);
|
||||
if (ret) {
|
||||
put_pid(umem_odp->tgid);
|
||||
kfree(umem_odp);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
ib_init_umem_implicit_odp(umem_odp);
|
||||
return umem_odp;
|
||||
}
|
||||
EXPORT_SYMBOL(ib_umem_odp_alloc_implicit);
|
||||
|
|
@ -262,74 +260,41 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device,
|
|||
}
|
||||
EXPORT_SYMBOL(ib_umem_odp_get);
|
||||
|
||||
void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
|
||||
static void ib_umem_odp_free(struct ib_umem_odp *umem_odp)
|
||||
{
|
||||
struct ib_device *dev = umem_odp->umem.ibdev;
|
||||
|
||||
/*
|
||||
* Ensure that no more pages are mapped in the umem.
|
||||
*
|
||||
* It is the driver's responsibility to ensure, before calling us,
|
||||
* that the hardware will not attempt to access the MR any more.
|
||||
*/
|
||||
if (!umem_odp->is_implicit_odp) {
|
||||
mutex_lock(&umem_odp->umem_mutex);
|
||||
ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
|
||||
ib_umem_end(umem_odp));
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
mmu_interval_notifier_remove(&umem_odp->notifier);
|
||||
kvfree(umem_odp->dma_list);
|
||||
kvfree(umem_odp->pfn_list);
|
||||
}
|
||||
mutex_lock(&umem_odp->umem_mutex);
|
||||
ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
|
||||
ib_umem_end(umem_odp));
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
mmu_interval_notifier_remove(&umem_odp->notifier);
|
||||
if (ib_uses_virt_dma(dev))
|
||||
kfree(umem_odp->map.pfn_list);
|
||||
else
|
||||
hmm_dma_map_free(dev->dma_device, &umem_odp->map);
|
||||
}
|
||||
|
||||
void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
|
||||
{
|
||||
if (!umem_odp->is_implicit_odp)
|
||||
ib_umem_odp_free(umem_odp);
|
||||
|
||||
put_pid(umem_odp->tgid);
|
||||
kfree(umem_odp);
|
||||
}
|
||||
EXPORT_SYMBOL(ib_umem_odp_release);
|
||||
|
||||
/*
|
||||
* Map for DMA and insert a single page into the on-demand paging page tables.
|
||||
*
|
||||
* @umem: the umem to insert the page to.
|
||||
* @dma_index: index in the umem to add the dma to.
|
||||
* @page: the page struct to map and add.
|
||||
* @access_mask: access permissions needed for this page.
|
||||
*
|
||||
* The function returns -EFAULT if the DMA mapping operation fails.
|
||||
*
|
||||
*/
|
||||
static int ib_umem_odp_map_dma_single_page(
|
||||
struct ib_umem_odp *umem_odp,
|
||||
unsigned int dma_index,
|
||||
struct page *page,
|
||||
u64 access_mask)
|
||||
{
|
||||
struct ib_device *dev = umem_odp->umem.ibdev;
|
||||
dma_addr_t *dma_addr = &umem_odp->dma_list[dma_index];
|
||||
|
||||
if (*dma_addr) {
|
||||
/*
|
||||
* If the page is already dma mapped it means it went through
|
||||
* a non-invalidating trasition, like read-only to writable.
|
||||
* Resync the flags.
|
||||
*/
|
||||
*dma_addr = (*dma_addr & ODP_DMA_ADDR_MASK) | access_mask;
|
||||
return 0;
|
||||
}
|
||||
|
||||
*dma_addr = ib_dma_map_page(dev, page, 0, 1 << umem_odp->page_shift,
|
||||
DMA_BIDIRECTIONAL);
|
||||
if (ib_dma_mapping_error(dev, *dma_addr)) {
|
||||
*dma_addr = 0;
|
||||
return -EFAULT;
|
||||
}
|
||||
umem_odp->npages++;
|
||||
*dma_addr |= access_mask;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ib_umem_odp_map_dma_and_lock - DMA map userspace memory in an ODP MR and lock it.
|
||||
*
|
||||
* Maps the range passed in the argument to DMA addresses.
|
||||
* The DMA addresses of the mapped pages is updated in umem_odp->dma_list.
|
||||
* Upon success the ODP MR will be locked to let caller complete its device
|
||||
* page table update.
|
||||
*
|
||||
|
|
@ -357,9 +322,6 @@ int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt,
|
|||
struct hmm_range range = {};
|
||||
unsigned long timeout;
|
||||
|
||||
if (access_mask == 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (user_virt < ib_umem_start(umem_odp) ||
|
||||
user_virt + bcnt > ib_umem_end(umem_odp))
|
||||
return -EFAULT;
|
||||
|
|
@ -385,11 +347,11 @@ int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt,
|
|||
if (fault) {
|
||||
range.default_flags = HMM_PFN_REQ_FAULT;
|
||||
|
||||
if (access_mask & ODP_WRITE_ALLOWED_BIT)
|
||||
if (access_mask & HMM_PFN_WRITE)
|
||||
range.default_flags |= HMM_PFN_REQ_WRITE;
|
||||
}
|
||||
|
||||
range.hmm_pfns = &(umem_odp->pfn_list[pfn_start_idx]);
|
||||
range.hmm_pfns = &(umem_odp->map.pfn_list[pfn_start_idx]);
|
||||
timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
|
||||
|
||||
retry:
|
||||
|
|
@ -417,22 +379,17 @@ int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt,
|
|||
for (pfn_index = 0; pfn_index < num_pfns;
|
||||
pfn_index += 1 << (page_shift - PAGE_SHIFT), dma_index++) {
|
||||
|
||||
if (fault) {
|
||||
/*
|
||||
* Since we asked for hmm_range_fault() to populate
|
||||
* pages it shouldn't return an error entry on success.
|
||||
*/
|
||||
WARN_ON(range.hmm_pfns[pfn_index] & HMM_PFN_ERROR);
|
||||
WARN_ON(!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID));
|
||||
} else {
|
||||
if (!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID)) {
|
||||
WARN_ON(umem_odp->dma_list[dma_index]);
|
||||
continue;
|
||||
}
|
||||
access_mask = ODP_READ_ALLOWED_BIT;
|
||||
if (range.hmm_pfns[pfn_index] & HMM_PFN_WRITE)
|
||||
access_mask |= ODP_WRITE_ALLOWED_BIT;
|
||||
}
|
||||
/*
|
||||
* Since we asked for hmm_range_fault() to populate
|
||||
* pages it shouldn't return an error entry on success.
|
||||
*/
|
||||
WARN_ON(fault && range.hmm_pfns[pfn_index] & HMM_PFN_ERROR);
|
||||
WARN_ON(fault && !(range.hmm_pfns[pfn_index] & HMM_PFN_VALID));
|
||||
if (!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID))
|
||||
continue;
|
||||
|
||||
if (range.hmm_pfns[pfn_index] & HMM_PFN_DMA_MAPPED)
|
||||
continue;
|
||||
|
||||
hmm_order = hmm_pfn_to_map_order(range.hmm_pfns[pfn_index]);
|
||||
/* If a hugepage was detected and ODP wasn't set for, the umem
|
||||
|
|
@ -445,15 +402,6 @@ int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt,
|
|||
__func__, hmm_order, page_shift);
|
||||
break;
|
||||
}
|
||||
|
||||
ret = ib_umem_odp_map_dma_single_page(
|
||||
umem_odp, dma_index, hmm_pfn_to_page(range.hmm_pfns[pfn_index]),
|
||||
access_mask);
|
||||
if (ret < 0) {
|
||||
ibdev_dbg(umem_odp->umem.ibdev,
|
||||
"ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* upon success lock should stay on hold for the callee */
|
||||
if (!ret)
|
||||
|
|
@ -473,45 +421,38 @@ EXPORT_SYMBOL(ib_umem_odp_map_dma_and_lock);
|
|||
void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
|
||||
u64 bound)
|
||||
{
|
||||
dma_addr_t dma_addr;
|
||||
dma_addr_t dma;
|
||||
int idx;
|
||||
u64 addr;
|
||||
struct ib_device *dev = umem_odp->umem.ibdev;
|
||||
u64 addr;
|
||||
|
||||
lockdep_assert_held(&umem_odp->umem_mutex);
|
||||
|
||||
virt = max_t(u64, virt, ib_umem_start(umem_odp));
|
||||
bound = min_t(u64, bound, ib_umem_end(umem_odp));
|
||||
for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
|
||||
idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
|
||||
dma = umem_odp->dma_list[idx];
|
||||
u64 offset = addr - ib_umem_start(umem_odp);
|
||||
size_t idx = offset >> umem_odp->page_shift;
|
||||
unsigned long pfn = umem_odp->map.pfn_list[idx];
|
||||
|
||||
/* The access flags guaranteed a valid DMA address in case was NULL */
|
||||
if (dma) {
|
||||
unsigned long pfn_idx = (addr - ib_umem_start(umem_odp)) >> PAGE_SHIFT;
|
||||
struct page *page = hmm_pfn_to_page(umem_odp->pfn_list[pfn_idx]);
|
||||
if (!hmm_dma_unmap_pfn(dev->dma_device, &umem_odp->map, idx))
|
||||
goto clear;
|
||||
|
||||
dma_addr = dma & ODP_DMA_ADDR_MASK;
|
||||
ib_dma_unmap_page(dev, dma_addr,
|
||||
BIT(umem_odp->page_shift),
|
||||
DMA_BIDIRECTIONAL);
|
||||
if (dma & ODP_WRITE_ALLOWED_BIT) {
|
||||
struct page *head_page = compound_head(page);
|
||||
/*
|
||||
* set_page_dirty prefers being called with
|
||||
* the page lock. However, MMU notifiers are
|
||||
* called sometimes with and sometimes without
|
||||
* the lock. We rely on the umem_mutex instead
|
||||
* to prevent other mmu notifiers from
|
||||
* continuing and allowing the page mapping to
|
||||
* be removed.
|
||||
*/
|
||||
set_page_dirty(head_page);
|
||||
}
|
||||
umem_odp->dma_list[idx] = 0;
|
||||
umem_odp->npages--;
|
||||
if (pfn & HMM_PFN_WRITE) {
|
||||
struct page *page = hmm_pfn_to_page(pfn);
|
||||
struct page *head_page = compound_head(page);
|
||||
/*
|
||||
* set_page_dirty prefers being called with
|
||||
* the page lock. However, MMU notifiers are
|
||||
* called sometimes with and sometimes without
|
||||
* the lock. We rely on the umem_mutex instead
|
||||
* to prevent other mmu notifiers from
|
||||
* continuing and allowing the page mapping to
|
||||
* be removed.
|
||||
*/
|
||||
set_page_dirty(head_page);
|
||||
}
|
||||
umem_odp->npages--;
|
||||
clear:
|
||||
umem_odp->map.pfn_list[idx] &= ~HMM_PFN_FLAGS;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
|
||||
|
|
|
|||
|
|
@ -193,7 +193,7 @@ _ib_uverbs_lookup_comp_file(s32 fd, struct uverbs_attr_bundle *attrs)
|
|||
fd, attrs);
|
||||
|
||||
if (IS_ERR(uobj))
|
||||
return (void *)uobj;
|
||||
return ERR_CAST(uobj);
|
||||
|
||||
uverbs_uobject_get(uobj);
|
||||
uobj_put_read(uobj);
|
||||
|
|
|
|||
|
|
@ -572,7 +572,7 @@ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
|
|||
GFP_KERNEL : GFP_ATOMIC);
|
||||
if (IS_ERR(slave)) {
|
||||
rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
|
||||
return (void *)slave;
|
||||
return ERR_CAST(slave);
|
||||
}
|
||||
ah = _rdma_create_ah(pd, ah_attr, flags, NULL, slave);
|
||||
rdma_lag_put_ah_roce_slave(slave);
|
||||
|
|
|
|||
|
|
@ -170,6 +170,9 @@ static int map_cc_config_offset_gen0_ext0(u32 offset, struct bnxt_qplib_cc_param
|
|||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TCP_CP:
|
||||
*val = ccparam->tcp_cp;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP:
|
||||
*val = ccparam->inact_th;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
|
@ -203,7 +206,7 @@ static ssize_t bnxt_re_cc_config_get(struct file *filp, char __user *buffer,
|
|||
return simple_read_from_buffer(buffer, usr_buf_len, ppos, (u8 *)(buf), rc);
|
||||
}
|
||||
|
||||
static void bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offset, u32 val)
|
||||
static int bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offset, u32 val)
|
||||
{
|
||||
u32 modify_mask;
|
||||
|
||||
|
|
@ -247,7 +250,9 @@ static void bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offs
|
|||
ccparam->tcp_cp = val;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TX_QUEUE:
|
||||
return -EOPNOTSUPP;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP:
|
||||
ccparam->inact_th = val;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TIME_PER_PHASE:
|
||||
ccparam->time_pph = val;
|
||||
|
|
@ -258,17 +263,20 @@ static void bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offs
|
|||
}
|
||||
|
||||
ccparam->mask = modify_mask;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bnxt_re_configure_cc(struct bnxt_re_dev *rdev, u32 gen_ext, u32 offset, u32 val)
|
||||
{
|
||||
struct bnxt_qplib_cc_param ccparam = { };
|
||||
int rc;
|
||||
|
||||
/* Supporting only Gen 0 now */
|
||||
if (gen_ext == CC_CONFIG_GEN0_EXT0)
|
||||
bnxt_re_fill_gen0_ext0(&ccparam, offset, val);
|
||||
else
|
||||
return -EINVAL;
|
||||
if (gen_ext != CC_CONFIG_GEN0_EXT0)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
rc = bnxt_re_fill_gen0_ext0(&ccparam, offset, val);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
bnxt_qplib_modify_cc(&rdev->qplib_res, &ccparam);
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -1113,7 +1113,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
|
|||
qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION;
|
||||
if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE)
|
||||
qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED;
|
||||
if (_is_ext_stats_supported(res->dattr->dev_cap_flags) && !res->is_vf)
|
||||
if (bnxt_ext_stats_supported(res->cctx, res->dattr->dev_cap_flags, res->is_vf))
|
||||
qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED;
|
||||
|
||||
req.qp_flags = cpu_to_le32(qp_flags);
|
||||
|
|
|
|||
|
|
@ -846,7 +846,12 @@ int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid,
|
|||
|
||||
req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
|
||||
req.resp_addr = cpu_to_le64(sbuf.dma_addr);
|
||||
req.function_id = cpu_to_le32(fid);
|
||||
if (bnxt_qplib_is_chip_gen_p7(rcfw->res->cctx) && rcfw->res->is_vf)
|
||||
req.function_id =
|
||||
cpu_to_le32(CMDQ_QUERY_ROCE_STATS_EXT_VF_VALID |
|
||||
(fid << CMDQ_QUERY_ROCE_STATS_EXT_VF_NUM_SFT));
|
||||
else
|
||||
req.function_id = cpu_to_le32(fid);
|
||||
req.flags = cpu_to_le16(CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_FUNCTION_ID);
|
||||
|
||||
bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
|
||||
|
|
|
|||
|
|
@ -124,7 +124,6 @@ struct opa_mad_notice_attr {
|
|||
} __packed ntc_2048;
|
||||
|
||||
};
|
||||
u8 class_data[];
|
||||
};
|
||||
|
||||
#define IB_VLARB_LOWPRI_0_31 1
|
||||
|
|
|
|||
|
|
@ -1361,16 +1361,6 @@ void sc_flush(struct send_context *sc)
|
|||
sc_wait_for_packet_egress(sc, 1);
|
||||
}
|
||||
|
||||
/* drop all packets on the context, no waiting until they are sent */
|
||||
void sc_drop(struct send_context *sc)
|
||||
{
|
||||
if (!sc)
|
||||
return;
|
||||
|
||||
dd_dev_info(sc->dd, "%s: context %u(%u) - not implemented\n",
|
||||
__func__, sc->sw_index, sc->hw_context);
|
||||
}
|
||||
|
||||
/*
|
||||
* Start the software reaction to a context halt or SPC freeze:
|
||||
* - mark the context as halted or frozen
|
||||
|
|
|
|||
|
|
@ -246,7 +246,6 @@ void sc_disable(struct send_context *sc);
|
|||
int sc_restart(struct send_context *sc);
|
||||
void sc_return_credits(struct send_context *sc);
|
||||
void sc_flush(struct send_context *sc);
|
||||
void sc_drop(struct send_context *sc);
|
||||
void sc_stop(struct send_context *sc, int bit);
|
||||
struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
|
||||
pio_release_cb cb, void *arg);
|
||||
|
|
|
|||
|
|
@ -1520,24 +1520,6 @@ void sdma_all_running(struct hfi1_devdata *dd)
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* sdma_all_idle() - called when the link goes down
|
||||
* @dd: hfi1_devdata
|
||||
*
|
||||
* This routine moves all engines to the idle state.
|
||||
*/
|
||||
void sdma_all_idle(struct hfi1_devdata *dd)
|
||||
{
|
||||
struct sdma_engine *sde;
|
||||
unsigned int i;
|
||||
|
||||
/* idle all engines */
|
||||
for (i = 0; i < dd->num_sdma; ++i) {
|
||||
sde = &dd->per_sdma[i];
|
||||
sdma_process_event(sde, sdma_event_e70_go_idle);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* sdma_start() - called to kick off state processing for all engines
|
||||
* @dd: hfi1_devdata
|
||||
|
|
|
|||
|
|
@ -373,7 +373,6 @@ void sdma_start(struct hfi1_devdata *dd);
|
|||
void sdma_exit(struct hfi1_devdata *dd);
|
||||
void sdma_clean(struct hfi1_devdata *dd, size_t num_engines);
|
||||
void sdma_all_running(struct hfi1_devdata *dd);
|
||||
void sdma_all_idle(struct hfi1_devdata *dd);
|
||||
void sdma_freeze_notify(struct hfi1_devdata *dd, int go_idle);
|
||||
void sdma_freeze(struct hfi1_devdata *dd);
|
||||
void sdma_unfreeze(struct hfi1_devdata *dd);
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
|
|||
int ret = 0;
|
||||
|
||||
fd->entry_to_rb = kcalloc(uctxt->expected_count,
|
||||
sizeof(struct rb_node *),
|
||||
sizeof(*fd->entry_to_rb),
|
||||
GFP_KERNEL);
|
||||
if (!fd->entry_to_rb)
|
||||
return -ENOMEM;
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#
|
||||
|
||||
ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
|
||||
ccflags-y += -I $(src)
|
||||
|
||||
hns-roce-hw-v2-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
|
||||
hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
|
||||
|
|
|
|||
|
|
@ -33,7 +33,6 @@
|
|||
#include <linux/pci.h>
|
||||
#include <rdma/ib_addr.h>
|
||||
#include <rdma/ib_cache.h>
|
||||
#include "hnae3.h"
|
||||
#include "hns_roce_device.h"
|
||||
#include "hns_roce_hw_v2.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -1027,6 +1027,26 @@ struct hns_roce_dev {
|
|||
atomic64_t *dfx_cnt;
|
||||
};
|
||||
|
||||
enum hns_roce_trace_type {
|
||||
TRACE_SQ,
|
||||
TRACE_RQ,
|
||||
TRACE_SRQ,
|
||||
};
|
||||
|
||||
static inline const char *trace_type_to_str(enum hns_roce_trace_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case TRACE_SQ:
|
||||
return "SQ";
|
||||
case TRACE_RQ:
|
||||
return "RQ";
|
||||
case TRACE_SRQ:
|
||||
return "SRQ";
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev)
|
||||
{
|
||||
return container_of(ib_dev, struct hns_roce_dev, ib_dev);
|
||||
|
|
|
|||
|
|
@ -43,13 +43,15 @@
|
|||
#include <rdma/ib_umem.h>
|
||||
#include <rdma/uverbs_ioctl.h>
|
||||
|
||||
#include "hnae3.h"
|
||||
#include "hns_roce_common.h"
|
||||
#include "hns_roce_device.h"
|
||||
#include "hns_roce_cmd.h"
|
||||
#include "hns_roce_hem.h"
|
||||
#include "hns_roce_hw_v2.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "hns_roce_trace.h"
|
||||
|
||||
enum {
|
||||
CMD_RST_PRC_OTHERS,
|
||||
CMD_RST_PRC_SUCCESS,
|
||||
|
|
@ -738,6 +740,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
|
|||
else
|
||||
ret = set_ud_wqe(qp, wr, wqe, &sge_idx, owner_bit);
|
||||
|
||||
trace_hns_sq_wqe(qp->qpn, wqe_idx, wqe, 1 << qp->sq.wqe_shift,
|
||||
wr->wr_id, TRACE_SQ);
|
||||
if (unlikely(ret)) {
|
||||
*bad_wr = wr;
|
||||
goto out;
|
||||
|
|
@ -807,6 +811,9 @@ static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr,
|
|||
|
||||
wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
|
||||
fill_recv_sge_to_wqe(wr, wqe, max_sge, hr_qp->rq.rsv_sge);
|
||||
|
||||
trace_hns_rq_wqe(hr_qp->qpn, wqe_idx, wqe, 1 << hr_qp->rq.wqe_shift,
|
||||
wr->wr_id, TRACE_RQ);
|
||||
}
|
||||
|
||||
static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
|
||||
|
|
@ -943,7 +950,7 @@ static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
|
|||
static void update_srq_db(struct hns_roce_srq *srq)
|
||||
{
|
||||
struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
|
||||
struct hns_roce_v2_db db;
|
||||
struct hns_roce_v2_db db = {};
|
||||
|
||||
hr_reg_write(&db, DB_TAG, srq->srqn);
|
||||
hr_reg_write(&db, DB_CMD, HNS_ROCE_V2_SRQ_DB);
|
||||
|
|
@ -984,6 +991,9 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
|
|||
fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge);
|
||||
fill_wqe_idx(srq, wqe_idx);
|
||||
srq->wrid[wqe_idx] = wr->wr_id;
|
||||
|
||||
trace_hns_srq_wqe(srq->srqn, wqe_idx, wqe, 1 << srq->wqe_shift,
|
||||
wr->wr_id, TRACE_SRQ);
|
||||
}
|
||||
|
||||
if (likely(nreq)) {
|
||||
|
|
@ -1311,6 +1321,8 @@ static int __hns_roce_cmq_send_one(struct hns_roce_dev *hr_dev,
|
|||
tail = csq->head;
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
trace_hns_cmdq_req(hr_dev, &desc[i]);
|
||||
|
||||
csq->desc[csq->head++] = desc[i];
|
||||
if (csq->head == csq->desc_num)
|
||||
csq->head = 0;
|
||||
|
|
@ -1325,6 +1337,8 @@ static int __hns_roce_cmq_send_one(struct hns_roce_dev *hr_dev,
|
|||
if (hns_roce_cmq_csq_done(hr_dev)) {
|
||||
ret = 0;
|
||||
for (i = 0; i < num; i++) {
|
||||
trace_hns_cmdq_resp(hr_dev, &csq->desc[tail]);
|
||||
|
||||
/* check the result of hardware write back */
|
||||
desc_ret = le16_to_cpu(csq->desc[tail++].retval);
|
||||
if (tail == csq->desc_num)
|
||||
|
|
@ -4302,8 +4316,7 @@ static inline int get_pdn(struct ib_pd *ib_pd)
|
|||
}
|
||||
|
||||
static void modify_qp_reset_to_init(struct ib_qp *ibqp,
|
||||
struct hns_roce_v2_qp_context *context,
|
||||
struct hns_roce_v2_qp_context *qpc_mask)
|
||||
struct hns_roce_v2_qp_context *context)
|
||||
{
|
||||
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
|
||||
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
|
||||
|
|
@ -5122,7 +5135,7 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
|
|||
|
||||
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
|
||||
memset(qpc_mask, 0, hr_dev->caps.qpc_sz);
|
||||
modify_qp_reset_to_init(ibqp, context, qpc_mask);
|
||||
modify_qp_reset_to_init(ibqp, context);
|
||||
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
|
||||
modify_qp_init_to_init(ibqp, context, qpc_mask);
|
||||
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
|
||||
|
|
@ -5313,6 +5326,7 @@ static void v2_set_flushed_fields(struct ib_qp *ibqp,
|
|||
return;
|
||||
|
||||
spin_lock_irqsave(&hr_qp->sq.lock, sq_flag);
|
||||
trace_hns_sq_flush_cqe(hr_qp->qpn, hr_qp->sq.head, TRACE_SQ);
|
||||
hr_reg_write(context, QPC_SQ_PRODUCER_IDX, hr_qp->sq.head);
|
||||
hr_reg_clear(qpc_mask, QPC_SQ_PRODUCER_IDX);
|
||||
hr_qp->state = IB_QPS_ERR;
|
||||
|
|
@ -5322,6 +5336,7 @@ static void v2_set_flushed_fields(struct ib_qp *ibqp,
|
|||
return;
|
||||
|
||||
spin_lock_irqsave(&hr_qp->rq.lock, rq_flag);
|
||||
trace_hns_rq_flush_cqe(hr_qp->qpn, hr_qp->rq.head, TRACE_RQ);
|
||||
hr_reg_write(context, QPC_RQ_PRODUCER_IDX, hr_qp->rq.head);
|
||||
hr_reg_clear(qpc_mask, QPC_RQ_PRODUCER_IDX);
|
||||
spin_unlock_irqrestore(&hr_qp->rq.lock, rq_flag);
|
||||
|
|
@ -6248,6 +6263,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
|
|||
eq->sub_type = sub_type;
|
||||
++eq->cons_index;
|
||||
aeqe_found = IRQ_HANDLED;
|
||||
trace_hns_ae_info(event_type, aeqe, eq->eqe_size);
|
||||
|
||||
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AEQE_CNT]);
|
||||
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@
|
|||
#define _HNS_ROCE_HW_V2_H
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include "hnae3.h"
|
||||
|
||||
#define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32
|
||||
#define HNS_ROCE_V2_MTT_ENTRY_SZ 64
|
||||
|
|
|
|||
|
|
@ -37,7 +37,6 @@
|
|||
#include <rdma/ib_smi.h>
|
||||
#include <rdma/ib_user_verbs.h>
|
||||
#include <rdma/ib_cache.h>
|
||||
#include "hnae3.h"
|
||||
#include "hns_roce_common.h"
|
||||
#include "hns_roce_device.h"
|
||||
#include "hns_roce_hem.h"
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@
|
|||
#include "hns_roce_device.h"
|
||||
#include "hns_roce_cmd.h"
|
||||
#include "hns_roce_hem.h"
|
||||
#include "hns_roce_trace.h"
|
||||
|
||||
static u32 hw_index_to_key(int ind)
|
||||
{
|
||||
|
|
@ -159,6 +160,7 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
|
|||
if (IS_ERR(mailbox))
|
||||
return PTR_ERR(mailbox);
|
||||
|
||||
trace_hns_mr(mr);
|
||||
if (mr->type != MR_TYPE_FRMR)
|
||||
ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr);
|
||||
else
|
||||
|
|
@ -1146,6 +1148,7 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
|
|||
struct ib_device *ibdev = &hr_dev->ib_dev;
|
||||
int ret;
|
||||
|
||||
trace_hns_buf_attr(buf_attr);
|
||||
/* The caller has its own buffer list and invokes the hns_roce_mtr_map()
|
||||
* to finish the MTT configuration.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
#include <rdma/rdma_cm.h>
|
||||
#include <rdma/restrack.h>
|
||||
#include <uapi/rdma/rdma_netlink.h>
|
||||
#include "hnae3.h"
|
||||
#include "hns_roce_common.h"
|
||||
#include "hns_roce_device.h"
|
||||
#include "hns_roce_hw_v2.h"
|
||||
|
|
|
|||
216
drivers/infiniband/hw/hns/hns_roce_trace.h
Normal file
216
drivers/infiniband/hw/hns/hns_roce_trace.h
Normal file
|
|
@ -0,0 +1,216 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0+ */
|
||||
/*
|
||||
* Copyright (c) 2025 Hisilicon Limited.
|
||||
*/
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM hns_roce
|
||||
|
||||
#if !defined(__HNS_ROCE_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define __HNS_ROCE_TRACE_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
#include <linux/string_choices.h>
|
||||
#include "hns_roce_device.h"
|
||||
#include "hns_roce_hw_v2.h"
|
||||
|
||||
DECLARE_EVENT_CLASS(flush_head_template,
|
||||
TP_PROTO(unsigned long qpn, u32 pi,
|
||||
enum hns_roce_trace_type type),
|
||||
TP_ARGS(qpn, pi, type),
|
||||
|
||||
TP_STRUCT__entry(__field(unsigned long, qpn)
|
||||
__field(u32, pi)
|
||||
__field(enum hns_roce_trace_type, type)
|
||||
),
|
||||
|
||||
TP_fast_assign(__entry->qpn = qpn;
|
||||
__entry->pi = pi;
|
||||
__entry->type = type;
|
||||
),
|
||||
|
||||
TP_printk("%s 0x%lx flush head 0x%x.",
|
||||
trace_type_to_str(__entry->type),
|
||||
__entry->qpn, __entry->pi)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(flush_head_template, hns_sq_flush_cqe,
|
||||
TP_PROTO(unsigned long qpn, u32 pi,
|
||||
enum hns_roce_trace_type type),
|
||||
TP_ARGS(qpn, pi, type));
|
||||
DEFINE_EVENT(flush_head_template, hns_rq_flush_cqe,
|
||||
TP_PROTO(unsigned long qpn, u32 pi,
|
||||
enum hns_roce_trace_type type),
|
||||
TP_ARGS(qpn, pi, type));
|
||||
|
||||
#define MAX_SGE_PER_WQE 64
|
||||
#define MAX_WQE_SIZE (MAX_SGE_PER_WQE * HNS_ROCE_SGE_SIZE)
|
||||
DECLARE_EVENT_CLASS(wqe_template,
|
||||
TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len,
|
||||
u64 id, enum hns_roce_trace_type type),
|
||||
TP_ARGS(qpn, idx, wqe, len, id, type),
|
||||
|
||||
TP_STRUCT__entry(__field(unsigned long, qpn)
|
||||
__field(u32, idx)
|
||||
__array(u32, wqe,
|
||||
MAX_WQE_SIZE / sizeof(__le32))
|
||||
__field(u32, len)
|
||||
__field(u64, id)
|
||||
__field(enum hns_roce_trace_type, type)
|
||||
),
|
||||
|
||||
TP_fast_assign(__entry->qpn = qpn;
|
||||
__entry->idx = idx;
|
||||
__entry->id = id;
|
||||
__entry->len = len / sizeof(__le32);
|
||||
__entry->type = type;
|
||||
for (int i = 0; i < __entry->len; i++)
|
||||
__entry->wqe[i] = le32_to_cpu(((__le32 *)wqe)[i]);
|
||||
),
|
||||
|
||||
TP_printk("%s 0x%lx wqe(0x%x/0x%llx): %s",
|
||||
trace_type_to_str(__entry->type),
|
||||
__entry->qpn, __entry->idx, __entry->id,
|
||||
__print_array(__entry->wqe, __entry->len,
|
||||
sizeof(__le32)))
|
||||
);
|
||||
|
||||
DEFINE_EVENT(wqe_template, hns_sq_wqe,
|
||||
TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len, u64 id,
|
||||
enum hns_roce_trace_type type),
|
||||
TP_ARGS(qpn, idx, wqe, len, id, type));
|
||||
DEFINE_EVENT(wqe_template, hns_rq_wqe,
|
||||
TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len, u64 id,
|
||||
enum hns_roce_trace_type type),
|
||||
TP_ARGS(qpn, idx, wqe, len, id, type));
|
||||
DEFINE_EVENT(wqe_template, hns_srq_wqe,
|
||||
TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len, u64 id,
|
||||
enum hns_roce_trace_type type),
|
||||
TP_ARGS(qpn, idx, wqe, len, id, type));
|
||||
|
||||
TRACE_EVENT(hns_ae_info,
|
||||
TP_PROTO(int event_type, void *aeqe, unsigned int len),
|
||||
TP_ARGS(event_type, aeqe, len),
|
||||
|
||||
TP_STRUCT__entry(__field(int, event_type)
|
||||
__array(u32, aeqe,
|
||||
HNS_ROCE_V3_EQE_SIZE / sizeof(__le32))
|
||||
__field(u32, len)
|
||||
),
|
||||
|
||||
TP_fast_assign(__entry->event_type = event_type;
|
||||
__entry->len = len / sizeof(__le32);
|
||||
for (int i = 0; i < __entry->len; i++)
|
||||
__entry->aeqe[i] = le32_to_cpu(((__le32 *)aeqe)[i]);
|
||||
),
|
||||
|
||||
TP_printk("event %2d aeqe: %s", __entry->event_type,
|
||||
__print_array(__entry->aeqe, __entry->len, sizeof(__le32)))
|
||||
);
|
||||
|
||||
TRACE_EVENT(hns_mr,
|
||||
TP_PROTO(struct hns_roce_mr *mr),
|
||||
TP_ARGS(mr),
|
||||
|
||||
TP_STRUCT__entry(__field(u64, iova)
|
||||
__field(u64, size)
|
||||
__field(u32, key)
|
||||
__field(u32, pd)
|
||||
__field(u32, pbl_hop_num)
|
||||
__field(u32, npages)
|
||||
__field(int, type)
|
||||
__field(int, enabled)
|
||||
),
|
||||
|
||||
TP_fast_assign(__entry->iova = mr->iova;
|
||||
__entry->size = mr->size;
|
||||
__entry->key = mr->key;
|
||||
__entry->pd = mr->pd;
|
||||
__entry->pbl_hop_num = mr->pbl_hop_num;
|
||||
__entry->npages = mr->npages;
|
||||
__entry->type = mr->type;
|
||||
__entry->enabled = mr->enabled;
|
||||
),
|
||||
|
||||
TP_printk("iova:0x%llx, size:%llu, key:%u, pd:%u, pbl_hop:%u, npages:%u, type:%d, status:%d",
|
||||
__entry->iova, __entry->size, __entry->key,
|
||||
__entry->pd, __entry->pbl_hop_num, __entry->npages,
|
||||
__entry->type, __entry->enabled)
|
||||
);
|
||||
|
||||
TRACE_EVENT(hns_buf_attr,
|
||||
TP_PROTO(struct hns_roce_buf_attr *attr),
|
||||
TP_ARGS(attr),
|
||||
|
||||
TP_STRUCT__entry(__field(unsigned int, region_count)
|
||||
__field(unsigned int, region0_size)
|
||||
__field(int, region0_hopnum)
|
||||
__field(unsigned int, region1_size)
|
||||
__field(int, region1_hopnum)
|
||||
__field(unsigned int, region2_size)
|
||||
__field(int, region2_hopnum)
|
||||
__field(unsigned int, page_shift)
|
||||
__field(bool, mtt_only)
|
||||
),
|
||||
|
||||
TP_fast_assign(__entry->region_count = attr->region_count;
|
||||
__entry->region0_size = attr->region[0].size;
|
||||
__entry->region0_hopnum = attr->region[0].hopnum;
|
||||
__entry->region1_size = attr->region[1].size;
|
||||
__entry->region1_hopnum = attr->region[1].hopnum;
|
||||
__entry->region2_size = attr->region[2].size;
|
||||
__entry->region2_hopnum = attr->region[2].hopnum;
|
||||
__entry->page_shift = attr->page_shift;
|
||||
__entry->mtt_only = attr->mtt_only;
|
||||
),
|
||||
|
||||
TP_printk("rg cnt:%u, pg_sft:0x%x, mtt_only:%s, rg 0 (sz:%u, hop:%u), rg 1 (sz:%u, hop:%u), rg 2 (sz:%u, hop:%u)\n",
|
||||
__entry->region_count, __entry->page_shift,
|
||||
str_yes_no(__entry->mtt_only),
|
||||
__entry->region0_size, __entry->region0_hopnum,
|
||||
__entry->region1_size, __entry->region1_hopnum,
|
||||
__entry->region2_size, __entry->region2_hopnum)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(cmdq,
|
||||
TP_PROTO(struct hns_roce_dev *hr_dev,
|
||||
struct hns_roce_cmq_desc *desc),
|
||||
TP_ARGS(hr_dev, desc),
|
||||
|
||||
TP_STRUCT__entry(__string(dev_name, dev_name(hr_dev->dev))
|
||||
__field(u16, opcode)
|
||||
__field(u16, flag)
|
||||
__field(u16, retval)
|
||||
__array(u32, data, 6)
|
||||
),
|
||||
|
||||
TP_fast_assign(__assign_str(dev_name);
|
||||
__entry->opcode = le16_to_cpu(desc->opcode);
|
||||
__entry->flag = le16_to_cpu(desc->flag);
|
||||
__entry->retval = le16_to_cpu(desc->retval);
|
||||
for (int i = 0; i < 6; i++)
|
||||
__entry->data[i] = le32_to_cpu(desc->data[i]);
|
||||
),
|
||||
|
||||
TP_printk("%s cmdq opcode:0x%x, flag:0x%x, retval:0x%x, data:%s\n",
|
||||
__get_str(dev_name), __entry->opcode,
|
||||
__entry->flag, __entry->retval,
|
||||
__print_array(__entry->data, 6, sizeof(__le32)))
|
||||
);
|
||||
|
||||
DEFINE_EVENT(cmdq, hns_cmdq_req,
|
||||
TP_PROTO(struct hns_roce_dev *hr_dev,
|
||||
struct hns_roce_cmq_desc *desc),
|
||||
TP_ARGS(hr_dev, desc));
|
||||
DEFINE_EVENT(cmdq, hns_cmdq_resp,
|
||||
TP_PROTO(struct hns_roce_dev *hr_dev,
|
||||
struct hns_roce_cmq_desc *desc),
|
||||
TP_ARGS(hr_dev, desc));
|
||||
|
||||
#endif /* __HNS_ROCE_TRACE_H */
|
||||
|
||||
#undef TRACE_INCLUDE_FILE
|
||||
#define TRACE_INCLUDE_FILE hns_roce_trace
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#define TRACE_INCLUDE_PATH .
|
||||
#include <trace/define_trace.h>
|
||||
|
|
@ -3131,7 +3131,7 @@ int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
|
|||
writel(0, cqp->dev->hw_regs[IRDMA_CCQPSTATUS]);
|
||||
|
||||
ibdev_dbg(to_ibdev(cqp->dev),
|
||||
"WQE: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%pK] cqp[%p] polarity[x%04x]\n",
|
||||
"WQE: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%p] cqp[%p] polarity[x%04x]\n",
|
||||
cqp->sq_size, cqp->hw_sq_size, cqp->sq_base,
|
||||
(u64 *)(uintptr_t)cqp->sq_pa, cqp, cqp->polarity);
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ static int add_sd_direct(struct irdma_hmc_pble_rsrc *pble_rsrc,
|
|||
chunk->vaddr = sd_entry->u.bp.addr.va + offset;
|
||||
chunk->fpm_addr = pble_rsrc->next_fpm_addr;
|
||||
ibdev_dbg(to_ibdev(dev),
|
||||
"PBLE: chunk_size[%lld] = 0x%llx vaddr=0x%pK fpm_addr = %llx\n",
|
||||
"PBLE: chunk_size[%lld] = 0x%llx vaddr=0x%p fpm_addr = %llx\n",
|
||||
chunk->size, chunk->size, chunk->vaddr, chunk->fpm_addr);
|
||||
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -15,14 +15,12 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
|
|||
struct ib_device *ibdev = ibcq->device;
|
||||
struct mana_ib_create_cq ucmd = {};
|
||||
struct mana_ib_dev *mdev;
|
||||
struct gdma_context *gc;
|
||||
bool is_rnic_cq;
|
||||
u32 doorbell;
|
||||
u32 buf_size;
|
||||
int err;
|
||||
|
||||
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
|
||||
gc = mdev_to_gc(mdev);
|
||||
|
||||
cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
|
||||
cq->cq_handle = INVALID_MANA_HANDLE;
|
||||
|
|
@ -65,7 +63,7 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
|
|||
ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
|
||||
return err;
|
||||
}
|
||||
doorbell = gc->mana_ib.doorbell;
|
||||
doorbell = mdev->gdma_dev->doorbell;
|
||||
}
|
||||
|
||||
if (is_rnic_cq) {
|
||||
|
|
|
|||
|
|
@ -101,103 +101,95 @@ static int mana_ib_probe(struct auxiliary_device *adev,
|
|||
const struct auxiliary_device_id *id)
|
||||
{
|
||||
struct mana_adev *madev = container_of(adev, struct mana_adev, adev);
|
||||
struct gdma_context *gc = madev->mdev->gdma_context;
|
||||
struct mana_context *mc = gc->mana.driver_data;
|
||||
struct gdma_dev *mdev = madev->mdev;
|
||||
struct net_device *ndev;
|
||||
struct mana_context *mc;
|
||||
struct mana_ib_dev *dev;
|
||||
u8 mac_addr[ETH_ALEN];
|
||||
int ret;
|
||||
|
||||
mc = mdev->driver_data;
|
||||
|
||||
dev = ib_alloc_device(mana_ib_dev, ib_dev);
|
||||
if (!dev)
|
||||
return -ENOMEM;
|
||||
|
||||
ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops);
|
||||
|
||||
dev->ib_dev.phys_port_cnt = mc->num_ports;
|
||||
|
||||
ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
|
||||
mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
|
||||
|
||||
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
|
||||
|
||||
/*
|
||||
* num_comp_vectors needs to set to the max MSIX index
|
||||
* when interrupts and event queues are implemented
|
||||
*/
|
||||
dev->ib_dev.num_comp_vectors = mdev->gdma_context->max_num_queues;
|
||||
dev->ib_dev.dev.parent = mdev->gdma_context->dev;
|
||||
|
||||
ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
|
||||
if (!ndev) {
|
||||
ret = -ENODEV;
|
||||
ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1");
|
||||
goto free_ib_device;
|
||||
}
|
||||
ether_addr_copy(mac_addr, ndev->dev_addr);
|
||||
addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr);
|
||||
ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1);
|
||||
/* mana_get_primary_netdev() returns ndev with refcount held */
|
||||
netdev_put(ndev, &dev->dev_tracker);
|
||||
if (ret) {
|
||||
ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret);
|
||||
goto free_ib_device;
|
||||
}
|
||||
|
||||
ret = mana_gd_register_device(&mdev->gdma_context->mana_ib);
|
||||
if (ret) {
|
||||
ibdev_err(&dev->ib_dev, "Failed to register device, ret %d",
|
||||
ret);
|
||||
goto free_ib_device;
|
||||
}
|
||||
dev->gdma_dev = &mdev->gdma_context->mana_ib;
|
||||
|
||||
dev->nb.notifier_call = mana_ib_netdev_event;
|
||||
ret = register_netdevice_notifier(&dev->nb);
|
||||
if (ret) {
|
||||
ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d",
|
||||
ret);
|
||||
goto deregister_device;
|
||||
}
|
||||
|
||||
ret = mana_ib_gd_query_adapter_caps(dev);
|
||||
if (ret) {
|
||||
ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d",
|
||||
ret);
|
||||
goto deregister_net_notifier;
|
||||
}
|
||||
|
||||
ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
|
||||
|
||||
ret = mana_ib_create_eqs(dev);
|
||||
if (ret) {
|
||||
ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
|
||||
goto deregister_net_notifier;
|
||||
}
|
||||
|
||||
ret = mana_ib_gd_create_rnic_adapter(dev);
|
||||
if (ret)
|
||||
goto destroy_eqs;
|
||||
|
||||
dev->ib_dev.num_comp_vectors = gc->max_num_queues;
|
||||
dev->ib_dev.dev.parent = gc->dev;
|
||||
dev->gdma_dev = mdev;
|
||||
xa_init_flags(&dev->qp_table_wq, XA_FLAGS_LOCK_IRQ);
|
||||
ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr);
|
||||
if (ret) {
|
||||
ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d",
|
||||
ret);
|
||||
goto destroy_rnic;
|
||||
|
||||
if (mana_ib_is_rnic(dev)) {
|
||||
dev->ib_dev.phys_port_cnt = 1;
|
||||
ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
|
||||
if (!ndev) {
|
||||
ret = -ENODEV;
|
||||
ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1");
|
||||
goto free_ib_device;
|
||||
}
|
||||
ether_addr_copy(mac_addr, ndev->dev_addr);
|
||||
addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr);
|
||||
ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1);
|
||||
/* mana_get_primary_netdev() returns ndev with refcount held */
|
||||
netdev_put(ndev, &dev->dev_tracker);
|
||||
if (ret) {
|
||||
ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret);
|
||||
goto free_ib_device;
|
||||
}
|
||||
|
||||
dev->nb.notifier_call = mana_ib_netdev_event;
|
||||
ret = register_netdevice_notifier(&dev->nb);
|
||||
if (ret) {
|
||||
ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d",
|
||||
ret);
|
||||
goto free_ib_device;
|
||||
}
|
||||
|
||||
ret = mana_ib_gd_query_adapter_caps(dev);
|
||||
if (ret) {
|
||||
ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d", ret);
|
||||
goto deregister_net_notifier;
|
||||
}
|
||||
|
||||
ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
|
||||
|
||||
ret = mana_ib_create_eqs(dev);
|
||||
if (ret) {
|
||||
ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
|
||||
goto deregister_net_notifier;
|
||||
}
|
||||
|
||||
ret = mana_ib_gd_create_rnic_adapter(dev);
|
||||
if (ret)
|
||||
goto destroy_eqs;
|
||||
|
||||
ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr);
|
||||
if (ret) {
|
||||
ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d", ret);
|
||||
goto destroy_rnic;
|
||||
}
|
||||
} else {
|
||||
dev->ib_dev.phys_port_cnt = mc->num_ports;
|
||||
ret = mana_eth_query_adapter_caps(dev);
|
||||
if (ret) {
|
||||
ibdev_err(&dev->ib_dev, "Failed to query ETH device caps, ret %d", ret);
|
||||
goto free_ib_device;
|
||||
}
|
||||
}
|
||||
|
||||
dev->av_pool = dma_pool_create("mana_ib_av", mdev->gdma_context->dev,
|
||||
MANA_AV_BUFFER_SIZE, MANA_AV_BUFFER_SIZE, 0);
|
||||
dev->av_pool = dma_pool_create("mana_ib_av", gc->dev, MANA_AV_BUFFER_SIZE,
|
||||
MANA_AV_BUFFER_SIZE, 0);
|
||||
if (!dev->av_pool) {
|
||||
ret = -ENOMEM;
|
||||
goto destroy_rnic;
|
||||
}
|
||||
|
||||
ret = ib_register_device(&dev->ib_dev, "mana_%d",
|
||||
mdev->gdma_context->dev);
|
||||
ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
|
||||
mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
|
||||
|
||||
ret = ib_register_device(&dev->ib_dev, mana_ib_is_rnic(dev) ? "mana_%d" : "manae_%d",
|
||||
gc->dev);
|
||||
if (ret)
|
||||
goto deallocate_pool;
|
||||
|
||||
|
|
@ -208,15 +200,16 @@ static int mana_ib_probe(struct auxiliary_device *adev,
|
|||
deallocate_pool:
|
||||
dma_pool_destroy(dev->av_pool);
|
||||
destroy_rnic:
|
||||
xa_destroy(&dev->qp_table_wq);
|
||||
mana_ib_gd_destroy_rnic_adapter(dev);
|
||||
if (mana_ib_is_rnic(dev))
|
||||
mana_ib_gd_destroy_rnic_adapter(dev);
|
||||
destroy_eqs:
|
||||
mana_ib_destroy_eqs(dev);
|
||||
if (mana_ib_is_rnic(dev))
|
||||
mana_ib_destroy_eqs(dev);
|
||||
deregister_net_notifier:
|
||||
unregister_netdevice_notifier(&dev->nb);
|
||||
deregister_device:
|
||||
mana_gd_deregister_device(dev->gdma_dev);
|
||||
if (mana_ib_is_rnic(dev))
|
||||
unregister_netdevice_notifier(&dev->nb);
|
||||
free_ib_device:
|
||||
xa_destroy(&dev->qp_table_wq);
|
||||
ib_dealloc_device(&dev->ib_dev);
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -227,25 +220,24 @@ static void mana_ib_remove(struct auxiliary_device *adev)
|
|||
|
||||
ib_unregister_device(&dev->ib_dev);
|
||||
dma_pool_destroy(dev->av_pool);
|
||||
if (mana_ib_is_rnic(dev)) {
|
||||
mana_ib_gd_destroy_rnic_adapter(dev);
|
||||
mana_ib_destroy_eqs(dev);
|
||||
unregister_netdevice_notifier(&dev->nb);
|
||||
}
|
||||
xa_destroy(&dev->qp_table_wq);
|
||||
mana_ib_gd_destroy_rnic_adapter(dev);
|
||||
mana_ib_destroy_eqs(dev);
|
||||
unregister_netdevice_notifier(&dev->nb);
|
||||
mana_gd_deregister_device(dev->gdma_dev);
|
||||
ib_dealloc_device(&dev->ib_dev);
|
||||
}
|
||||
|
||||
static const struct auxiliary_device_id mana_id_table[] = {
|
||||
{
|
||||
.name = "mana.rdma",
|
||||
},
|
||||
{ .name = "mana.rdma", },
|
||||
{ .name = "mana.eth", },
|
||||
{},
|
||||
};
|
||||
|
||||
MODULE_DEVICE_TABLE(auxiliary, mana_id_table);
|
||||
|
||||
static struct auxiliary_driver mana_driver = {
|
||||
.name = "rdma",
|
||||
.probe = mana_ib_probe,
|
||||
.remove = mana_ib_remove,
|
||||
.id_table = mana_id_table,
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
*/
|
||||
|
||||
#include "mana_ib.h"
|
||||
#include "linux/pci.h"
|
||||
|
||||
void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
|
||||
u32 port)
|
||||
|
|
@ -243,7 +244,6 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
|
|||
int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
|
||||
struct mana_ib_queue *queue)
|
||||
{
|
||||
struct gdma_context *gc = mdev_to_gc(mdev);
|
||||
struct gdma_queue_spec spec = {};
|
||||
int err;
|
||||
|
||||
|
|
@ -252,7 +252,7 @@ int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_qu
|
|||
spec.type = type;
|
||||
spec.monitor_avl_buf = false;
|
||||
spec.queue_size = size;
|
||||
err = mana_gd_create_mana_wq_cq(&gc->mana_ib, &spec, &queue->kmem);
|
||||
err = mana_gd_create_mana_wq_cq(mdev->gdma_dev, &spec, &queue->kmem);
|
||||
if (err)
|
||||
return err;
|
||||
/* take ownership into mana_ib from mana */
|
||||
|
|
@ -479,7 +479,7 @@ int mana_ib_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
|
|||
{
|
||||
unsigned long page_sz;
|
||||
|
||||
page_sz = ib_umem_find_best_pgsz(umem, PAGE_SZ_BM, virt);
|
||||
page_sz = ib_umem_find_best_pgsz(umem, dev->adapter_caps.page_size_cap, virt);
|
||||
if (!page_sz) {
|
||||
ibdev_dbg(&dev->ib_dev, "Failed to find page size.\n");
|
||||
return -EINVAL;
|
||||
|
|
@ -494,7 +494,7 @@ int mana_ib_create_zero_offset_dma_region(struct mana_ib_dev *dev, struct ib_ume
|
|||
unsigned long page_sz;
|
||||
|
||||
/* Hardware requires dma region to align to chosen page size */
|
||||
page_sz = ib_umem_find_best_pgoff(umem, PAGE_SZ_BM, 0);
|
||||
page_sz = ib_umem_find_best_pgoff(umem, dev->adapter_caps.page_size_cap, 0);
|
||||
if (!page_sz) {
|
||||
ibdev_dbg(&dev->ib_dev, "Failed to find page size.\n");
|
||||
return -EINVAL;
|
||||
|
|
@ -551,6 +551,7 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
|
|||
int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
|
||||
struct ib_port_immutable *immutable)
|
||||
{
|
||||
struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
|
||||
struct ib_port_attr attr;
|
||||
int err;
|
||||
|
||||
|
|
@ -560,10 +561,12 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
|
|||
|
||||
immutable->pkey_tbl_len = attr.pkey_tbl_len;
|
||||
immutable->gid_tbl_len = attr.gid_tbl_len;
|
||||
immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
|
||||
if (port_num == 1) {
|
||||
immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
|
||||
|
||||
if (mana_ib_is_rnic(dev)) {
|
||||
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
|
||||
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
|
||||
} else {
|
||||
immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
@ -572,12 +575,14 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
|
|||
int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
|
||||
struct ib_udata *uhw)
|
||||
{
|
||||
struct mana_ib_dev *dev = container_of(ibdev,
|
||||
struct mana_ib_dev, ib_dev);
|
||||
struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
|
||||
struct pci_dev *pdev = to_pci_dev(mdev_to_gc(dev)->dev);
|
||||
|
||||
memset(props, 0, sizeof(*props));
|
||||
props->vendor_id = pdev->vendor;
|
||||
props->vendor_part_id = dev->gdma_dev->dev_id.type;
|
||||
props->max_mr_size = MANA_IB_MAX_MR_SIZE;
|
||||
props->page_size_cap = PAGE_SZ_BM;
|
||||
props->page_size_cap = dev->adapter_caps.page_size_cap;
|
||||
props->max_qp = dev->adapter_caps.max_qp_count;
|
||||
props->max_qp_wr = dev->adapter_caps.max_qp_wr;
|
||||
props->device_cap_flags = IB_DEVICE_RC_RNR_NAK_GEN;
|
||||
|
|
@ -596,6 +601,8 @@ int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
|
|||
props->max_ah = INT_MAX;
|
||||
props->max_pkeys = 1;
|
||||
props->local_ca_ack_delay = MANA_CA_ACK_DELAY;
|
||||
if (!mana_ib_is_rnic(dev))
|
||||
props->raw_packet_caps = IB_RAW_PACKET_CAP_IP_CSUM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -603,6 +610,7 @@ int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
|
|||
int mana_ib_query_port(struct ib_device *ibdev, u32 port,
|
||||
struct ib_port_attr *props)
|
||||
{
|
||||
struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
|
||||
struct net_device *ndev = mana_ib_get_netdev(ibdev, port);
|
||||
|
||||
if (!ndev)
|
||||
|
|
@ -623,7 +631,7 @@ int mana_ib_query_port(struct ib_device *ibdev, u32 port,
|
|||
props->active_width = IB_WIDTH_4X;
|
||||
props->active_speed = IB_SPEED_EDR;
|
||||
props->pkey_tbl_len = 1;
|
||||
if (port == 1) {
|
||||
if (mana_ib_is_rnic(dev)) {
|
||||
props->gid_tbl_len = 16;
|
||||
props->port_cap_flags = IB_PORT_CM_SUP;
|
||||
props->ip_gids = true;
|
||||
|
|
@ -696,6 +704,41 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
|
|||
caps->max_recv_sge_count = resp.max_recv_sge_count;
|
||||
caps->feature_flags = resp.feature_flags;
|
||||
|
||||
caps->page_size_cap = PAGE_SZ_BM;
|
||||
if (mdev_to_gc(dev)->pf_cap_flags1 & GDMA_DRV_CAP_FLAG_1_GDMA_PAGES_4MB_1GB_2GB)
|
||||
caps->page_size_cap |= (SZ_4M | SZ_1G | SZ_2G);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mana_eth_query_adapter_caps(struct mana_ib_dev *dev)
|
||||
{
|
||||
struct mana_ib_adapter_caps *caps = &dev->adapter_caps;
|
||||
struct gdma_query_max_resources_resp resp = {};
|
||||
struct gdma_general_req req = {};
|
||||
int err;
|
||||
|
||||
mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_MAX_RESOURCES,
|
||||
sizeof(req), sizeof(resp));
|
||||
|
||||
err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req), &req, sizeof(resp), &resp);
|
||||
if (err) {
|
||||
ibdev_err(&dev->ib_dev,
|
||||
"Failed to query adapter caps err %d", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
caps->max_qp_count = min_t(u32, resp.max_sq, resp.max_rq);
|
||||
caps->max_cq_count = resp.max_cq;
|
||||
caps->max_mr_count = resp.max_mst;
|
||||
caps->max_pd_count = 0x6000;
|
||||
caps->max_qp_wr = min_t(u32,
|
||||
0x100000 / GDMA_MAX_SQE_SIZE,
|
||||
0x100000 / GDMA_MAX_RQE_SIZE);
|
||||
caps->max_send_sge_count = 30;
|
||||
caps->max_recv_sge_count = 15;
|
||||
caps->page_size_cap = PAGE_SZ_BM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -740,7 +783,7 @@ int mana_ib_create_eqs(struct mana_ib_dev *mdev)
|
|||
spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
|
||||
spec.eq.msix_index = 0;
|
||||
|
||||
err = mana_gd_create_mana_eq(&gc->mana_ib, &spec, &mdev->fatal_err_eq);
|
||||
err = mana_gd_create_mana_eq(mdev->gdma_dev, &spec, &mdev->fatal_err_eq);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
|
|
@ -791,7 +834,7 @@ int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev)
|
|||
|
||||
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_ADAPTER, sizeof(req), sizeof(resp));
|
||||
req.hdr.req.msg_version = GDMA_MESSAGE_V2;
|
||||
req.hdr.dev_id = gc->mana_ib.dev_id;
|
||||
req.hdr.dev_id = mdev->gdma_dev->dev_id;
|
||||
req.notify_eq_id = mdev->fatal_err_eq->id;
|
||||
|
||||
if (mdev->adapter_caps.feature_flags & MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT)
|
||||
|
|
@ -816,7 +859,7 @@ int mana_ib_gd_destroy_rnic_adapter(struct mana_ib_dev *mdev)
|
|||
|
||||
gc = mdev_to_gc(mdev);
|
||||
mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_ADAPTER, sizeof(req), sizeof(resp));
|
||||
req.hdr.dev_id = gc->mana_ib.dev_id;
|
||||
req.hdr.dev_id = mdev->gdma_dev->dev_id;
|
||||
req.adapter = mdev->adapter_handle;
|
||||
|
||||
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
||||
|
|
@ -843,7 +886,7 @@ int mana_ib_gd_add_gid(const struct ib_gid_attr *attr, void **context)
|
|||
}
|
||||
|
||||
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_IP_ADDR, sizeof(req), sizeof(resp));
|
||||
req.hdr.dev_id = gc->mana_ib.dev_id;
|
||||
req.hdr.dev_id = mdev->gdma_dev->dev_id;
|
||||
req.adapter = mdev->adapter_handle;
|
||||
req.op = ADDR_OP_ADD;
|
||||
req.sgid_type = (ntype == RDMA_NETWORK_IPV6) ? SGID_TYPE_IPV6 : SGID_TYPE_IPV4;
|
||||
|
|
@ -873,7 +916,7 @@ int mana_ib_gd_del_gid(const struct ib_gid_attr *attr, void **context)
|
|||
}
|
||||
|
||||
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_IP_ADDR, sizeof(req), sizeof(resp));
|
||||
req.hdr.dev_id = gc->mana_ib.dev_id;
|
||||
req.hdr.dev_id = mdev->gdma_dev->dev_id;
|
||||
req.adapter = mdev->adapter_handle;
|
||||
req.op = ADDR_OP_REMOVE;
|
||||
req.sgid_type = (ntype == RDMA_NETWORK_IPV6) ? SGID_TYPE_IPV6 : SGID_TYPE_IPV4;
|
||||
|
|
@ -896,7 +939,7 @@ int mana_ib_gd_config_mac(struct mana_ib_dev *mdev, enum mana_ib_addr_op op, u8
|
|||
int err;
|
||||
|
||||
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_MAC_ADDR, sizeof(req), sizeof(resp));
|
||||
req.hdr.dev_id = gc->mana_ib.dev_id;
|
||||
req.hdr.dev_id = mdev->gdma_dev->dev_id;
|
||||
req.adapter = mdev->adapter_handle;
|
||||
req.op = op;
|
||||
copy_in_reverse(req.mac_addr, mac, ETH_ALEN);
|
||||
|
|
@ -917,8 +960,11 @@ int mana_ib_gd_create_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq, u32 do
|
|||
struct mana_rnic_create_cq_req req = {};
|
||||
int err;
|
||||
|
||||
if (!mdev->eqs)
|
||||
return -EINVAL;
|
||||
|
||||
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_CQ, sizeof(req), sizeof(resp));
|
||||
req.hdr.dev_id = gc->mana_ib.dev_id;
|
||||
req.hdr.dev_id = mdev->gdma_dev->dev_id;
|
||||
req.adapter = mdev->adapter_handle;
|
||||
req.gdma_region = cq->queue.gdma_region;
|
||||
req.eq_id = mdev->eqs[cq->comp_vector]->id;
|
||||
|
|
@ -950,7 +996,7 @@ int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
|
|||
return 0;
|
||||
|
||||
mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_CQ, sizeof(req), sizeof(resp));
|
||||
req.hdr.dev_id = gc->mana_ib.dev_id;
|
||||
req.hdr.dev_id = mdev->gdma_dev->dev_id;
|
||||
req.adapter = mdev->adapter_handle;
|
||||
req.cq_handle = cq->cq_handle;
|
||||
|
||||
|
|
@ -976,7 +1022,7 @@ int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
|
|||
int err, i;
|
||||
|
||||
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_RC_QP, sizeof(req), sizeof(resp));
|
||||
req.hdr.dev_id = gc->mana_ib.dev_id;
|
||||
req.hdr.dev_id = mdev->gdma_dev->dev_id;
|
||||
req.adapter = mdev->adapter_handle;
|
||||
req.pd_handle = pd->pd_handle;
|
||||
req.send_cq_handle = send_cq->cq_handle;
|
||||
|
|
@ -1012,7 +1058,7 @@ int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
|
|||
int err;
|
||||
|
||||
mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_RC_QP, sizeof(req), sizeof(resp));
|
||||
req.hdr.dev_id = gc->mana_ib.dev_id;
|
||||
req.hdr.dev_id = mdev->gdma_dev->dev_id;
|
||||
req.adapter = mdev->adapter_handle;
|
||||
req.rc_qp_handle = qp->qp_handle;
|
||||
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
||||
|
|
@ -1035,7 +1081,7 @@ int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
|
|||
int err, i;
|
||||
|
||||
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_UD_QP, sizeof(req), sizeof(resp));
|
||||
req.hdr.dev_id = gc->mana_ib.dev_id;
|
||||
req.hdr.dev_id = mdev->gdma_dev->dev_id;
|
||||
req.adapter = mdev->adapter_handle;
|
||||
req.pd_handle = pd->pd_handle;
|
||||
req.send_cq_handle = send_cq->cq_handle;
|
||||
|
|
@ -1070,7 +1116,7 @@ int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
|
|||
int err;
|
||||
|
||||
mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_UD_QP, sizeof(req), sizeof(resp));
|
||||
req.hdr.dev_id = gc->mana_ib.dev_id;
|
||||
req.hdr.dev_id = mdev->gdma_dev->dev_id;
|
||||
req.adapter = mdev->adapter_handle;
|
||||
req.qp_handle = qp->qp_handle;
|
||||
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
||||
|
|
|
|||
|
|
@ -60,6 +60,7 @@ struct mana_ib_adapter_caps {
|
|||
u32 max_recv_sge_count;
|
||||
u32 max_inline_data_size;
|
||||
u64 feature_flags;
|
||||
u64 page_size_cap;
|
||||
};
|
||||
|
||||
struct mana_ib_queue {
|
||||
|
|
@ -543,6 +544,11 @@ static inline void mana_put_qp_ref(struct mana_ib_qp *qp)
|
|||
complete(&qp->free);
|
||||
}
|
||||
|
||||
static inline bool mana_ib_is_rnic(struct mana_ib_dev *mdev)
|
||||
{
|
||||
return mdev->gdma_dev->dev_id.type == GDMA_DEVICE_MANA_IB;
|
||||
}
|
||||
|
||||
static inline struct net_device *mana_ib_get_netdev(struct ib_device *ibdev, u32 port)
|
||||
{
|
||||
struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
|
||||
|
|
@ -642,6 +648,7 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
|
|||
void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
|
||||
|
||||
int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *mdev);
|
||||
int mana_eth_query_adapter_caps(struct mana_ib_dev *mdev);
|
||||
|
||||
int mana_ib_create_eqs(struct mana_ib_dev *mdev);
|
||||
|
||||
|
|
|
|||
|
|
@ -5,8 +5,8 @@
|
|||
|
||||
#include "mana_ib.h"
|
||||
|
||||
#define VALID_MR_FLAGS \
|
||||
(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ)
|
||||
#define VALID_MR_FLAGS (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ |\
|
||||
IB_ACCESS_REMOTE_ATOMIC | IB_ZERO_BASED)
|
||||
|
||||
#define VALID_DMA_MR_FLAGS (IB_ACCESS_LOCAL_WRITE)
|
||||
|
||||
|
|
@ -24,6 +24,9 @@ mana_ib_verbs_to_gdma_access_flags(int access_flags)
|
|||
if (access_flags & IB_ACCESS_REMOTE_READ)
|
||||
flags |= GDMA_ACCESS_FLAG_REMOTE_READ;
|
||||
|
||||
if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
|
||||
flags |= GDMA_ACCESS_FLAG_REMOTE_ATOMIC;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
|
|
@ -48,7 +51,10 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
|
|||
req.gva.virtual_address = mr_params->gva.virtual_address;
|
||||
req.gva.access_flags = mr_params->gva.access_flags;
|
||||
break;
|
||||
|
||||
case GDMA_MR_TYPE_ZBVA:
|
||||
req.zbva.dma_region_handle = mr_params->zbva.dma_region_handle;
|
||||
req.zbva.access_flags = mr_params->zbva.access_flags;
|
||||
break;
|
||||
default:
|
||||
ibdev_dbg(&dev->ib_dev,
|
||||
"invalid param (GDMA_MR_TYPE) passed, type %d\n",
|
||||
|
|
@ -144,11 +150,18 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
|
|||
dma_region_handle);
|
||||
|
||||
mr_params.pd_handle = pd->pd_handle;
|
||||
mr_params.mr_type = GDMA_MR_TYPE_GVA;
|
||||
mr_params.gva.dma_region_handle = dma_region_handle;
|
||||
mr_params.gva.virtual_address = iova;
|
||||
mr_params.gva.access_flags =
|
||||
mana_ib_verbs_to_gdma_access_flags(access_flags);
|
||||
if (access_flags & IB_ZERO_BASED) {
|
||||
mr_params.mr_type = GDMA_MR_TYPE_ZBVA;
|
||||
mr_params.zbva.dma_region_handle = dma_region_handle;
|
||||
mr_params.zbva.access_flags =
|
||||
mana_ib_verbs_to_gdma_access_flags(access_flags);
|
||||
} else {
|
||||
mr_params.mr_type = GDMA_MR_TYPE_GVA;
|
||||
mr_params.gva.dma_region_handle = dma_region_handle;
|
||||
mr_params.gva.virtual_address = iova;
|
||||
mr_params.gva.access_flags =
|
||||
mana_ib_verbs_to_gdma_access_flags(access_flags);
|
||||
}
|
||||
|
||||
err = mana_ib_gd_create_mr(dev, mr, &mr_params);
|
||||
if (err)
|
||||
|
|
|
|||
|
|
@ -635,7 +635,6 @@ static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
|
|||
{
|
||||
struct mana_ib_dev *mdev = container_of(ibpd->device, struct mana_ib_dev, ib_dev);
|
||||
struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
|
||||
struct gdma_context *gc = mdev_to_gc(mdev);
|
||||
u32 doorbell, queue_size;
|
||||
int i, err;
|
||||
|
||||
|
|
@ -654,7 +653,7 @@ static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
|
|||
goto destroy_queues;
|
||||
}
|
||||
}
|
||||
doorbell = gc->mana_ib.doorbell;
|
||||
doorbell = mdev->gdma_dev->doorbell;
|
||||
|
||||
err = create_shadow_queue(&qp->shadow_rq, attr->cap.max_recv_wr,
|
||||
sizeof(struct ud_rq_shadow_wqe));
|
||||
|
|
@ -736,7 +735,7 @@ static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
|
|||
int err;
|
||||
|
||||
mana_gd_init_req_hdr(&req.hdr, MANA_IB_SET_QP_STATE, sizeof(req), sizeof(resp));
|
||||
req.hdr.dev_id = gc->mana_ib.dev_id;
|
||||
req.hdr.dev_id = mdev->gdma_dev->dev_id;
|
||||
req.adapter = mdev->adapter_handle;
|
||||
req.qp_handle = qp->qp_handle;
|
||||
req.qp_state = attr->qp_state;
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@
|
|||
|
||||
#define MAX_VFS 80
|
||||
#define MAX_PEND_REQS_PER_FUNC 4
|
||||
#define MAD_TIMEOUT_MS 2000
|
||||
#define MAD_TIMEOUT_SEC 2
|
||||
|
||||
#define mcg_warn(fmt, arg...) pr_warn("MCG WARNING: " fmt, ##arg)
|
||||
#define mcg_error(fmt, arg...) pr_err(fmt, ##arg)
|
||||
|
|
@ -270,7 +270,7 @@ static int send_join_to_wire(struct mcast_group *group, struct ib_sa_mad *sa_mad
|
|||
if (!ret) {
|
||||
/* calls mlx4_ib_mcg_timeout_handler */
|
||||
queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
|
||||
msecs_to_jiffies(MAD_TIMEOUT_MS));
|
||||
secs_to_jiffies(MAD_TIMEOUT_SEC));
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
|
@ -309,7 +309,7 @@ static int send_leave_to_wire(struct mcast_group *group, u8 join_state)
|
|||
if (!ret) {
|
||||
/* calls mlx4_ib_mcg_timeout_handler */
|
||||
queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
|
||||
msecs_to_jiffies(MAD_TIMEOUT_MS));
|
||||
secs_to_jiffies(MAD_TIMEOUT_SEC));
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
|
@ -1091,7 +1091,7 @@ static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy
|
|||
for (i = 0; i < MAX_VFS; ++i)
|
||||
clean_vf_mcast(ctx, i);
|
||||
|
||||
end = jiffies + msecs_to_jiffies(MAD_TIMEOUT_MS + 3000);
|
||||
end = jiffies + secs_to_jiffies(MAD_TIMEOUT_SEC + 3);
|
||||
do {
|
||||
count = 0;
|
||||
mutex_lock(&ctx->mcg_table_lock);
|
||||
|
|
|
|||
|
|
@ -1645,11 +1645,6 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
|
|||
return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
|
||||
}
|
||||
|
||||
enum {
|
||||
LEFTOVERS_MC,
|
||||
LEFTOVERS_UC,
|
||||
};
|
||||
|
||||
static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
|
||||
struct mlx5_ib_flow_prio *ft_prio,
|
||||
struct ib_flow_attr *flow_attr,
|
||||
|
|
@ -1659,43 +1654,32 @@ static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *de
|
|||
struct mlx5_ib_flow_handler *handler = NULL;
|
||||
|
||||
static struct {
|
||||
struct ib_flow_attr flow_attr;
|
||||
struct ib_flow_spec_eth eth_flow;
|
||||
} leftovers_specs[] = {
|
||||
[LEFTOVERS_MC] = {
|
||||
.flow_attr = {
|
||||
.num_of_specs = 1,
|
||||
.size = sizeof(leftovers_specs[0])
|
||||
},
|
||||
.eth_flow = {
|
||||
.type = IB_FLOW_SPEC_ETH,
|
||||
.size = sizeof(struct ib_flow_spec_eth),
|
||||
.mask = {.dst_mac = {0x1} },
|
||||
.val = {.dst_mac = {0x1} }
|
||||
}
|
||||
},
|
||||
[LEFTOVERS_UC] = {
|
||||
.flow_attr = {
|
||||
.num_of_specs = 1,
|
||||
.size = sizeof(leftovers_specs[0])
|
||||
},
|
||||
.eth_flow = {
|
||||
.type = IB_FLOW_SPEC_ETH,
|
||||
.size = sizeof(struct ib_flow_spec_eth),
|
||||
.mask = {.dst_mac = {0x1} },
|
||||
.val = {.dst_mac = {} }
|
||||
}
|
||||
}
|
||||
};
|
||||
struct ib_flow_attr flow_attr;
|
||||
} leftovers_wc = { .flow_attr = { .num_of_specs = 1,
|
||||
.size = sizeof(leftovers_wc) },
|
||||
.eth_flow = {
|
||||
.type = IB_FLOW_SPEC_ETH,
|
||||
.size = sizeof(struct ib_flow_spec_eth),
|
||||
.mask = { .dst_mac = { 0x1 } },
|
||||
.val = { .dst_mac = { 0x1 } } } };
|
||||
|
||||
handler = create_flow_rule(dev, ft_prio,
|
||||
&leftovers_specs[LEFTOVERS_MC].flow_attr,
|
||||
dst);
|
||||
static struct {
|
||||
struct ib_flow_spec_eth eth_flow;
|
||||
struct ib_flow_attr flow_attr;
|
||||
} leftovers_uc = { .flow_attr = { .num_of_specs = 1,
|
||||
.size = sizeof(leftovers_uc) },
|
||||
.eth_flow = {
|
||||
.type = IB_FLOW_SPEC_ETH,
|
||||
.size = sizeof(struct ib_flow_spec_eth),
|
||||
.mask = { .dst_mac = { 0x1 } },
|
||||
.val = { .dst_mac = {} } } };
|
||||
|
||||
handler = create_flow_rule(dev, ft_prio, &leftovers_wc.flow_attr, dst);
|
||||
if (!IS_ERR(handler) &&
|
||||
flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
|
||||
handler_ucast = create_flow_rule(dev, ft_prio,
|
||||
&leftovers_specs[LEFTOVERS_UC].flow_attr,
|
||||
dst);
|
||||
&leftovers_uc.flow_attr, dst);
|
||||
if (IS_ERR(handler_ucast)) {
|
||||
mlx5_del_flow_rules(handler->rule);
|
||||
ft_prio->refcount--;
|
||||
|
|
|
|||
|
|
@ -485,6 +485,10 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
|
|||
*active_width = IB_WIDTH_2X;
|
||||
*active_speed = IB_SPEED_NDR;
|
||||
break;
|
||||
case MLX5E_PROT_MASK(MLX5E_200GAUI_1_200GBASE_CR1_KR1):
|
||||
*active_width = IB_WIDTH_1X;
|
||||
*active_speed = IB_SPEED_XDR;
|
||||
break;
|
||||
case MLX5E_PROT_MASK(MLX5E_400GAUI_8_400GBASE_CR8):
|
||||
*active_width = IB_WIDTH_8X;
|
||||
*active_speed = IB_SPEED_HDR;
|
||||
|
|
@ -493,10 +497,18 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
|
|||
*active_width = IB_WIDTH_4X;
|
||||
*active_speed = IB_SPEED_NDR;
|
||||
break;
|
||||
case MLX5E_PROT_MASK(MLX5E_400GAUI_2_400GBASE_CR2_KR2):
|
||||
*active_width = IB_WIDTH_2X;
|
||||
*active_speed = IB_SPEED_XDR;
|
||||
break;
|
||||
case MLX5E_PROT_MASK(MLX5E_800GAUI_8_800GBASE_CR8_KR8):
|
||||
*active_width = IB_WIDTH_8X;
|
||||
*active_speed = IB_SPEED_NDR;
|
||||
break;
|
||||
case MLX5E_PROT_MASK(MLX5E_800GAUI_4_800GBASE_CR4_KR4):
|
||||
*active_width = IB_WIDTH_4X;
|
||||
*active_speed = IB_SPEED_XDR;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
|
@ -4422,17 +4434,6 @@ static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev)
|
|||
mlx5_core_native_port_num(dev->mdev) - 1);
|
||||
}
|
||||
|
||||
static int mlx5_ib_stage_uar_init(struct mlx5_ib_dev *dev)
|
||||
{
|
||||
dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
|
||||
return PTR_ERR_OR_ZERO(dev->mdev->priv.uar);
|
||||
}
|
||||
|
||||
static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
|
||||
{
|
||||
mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
|
||||
}
|
||||
|
||||
static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
|
||||
{
|
||||
int err;
|
||||
|
|
@ -4662,9 +4663,6 @@ static const struct mlx5_ib_profile pf_profile = {
|
|||
STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
|
||||
mlx5_ib_stage_cong_debugfs_init,
|
||||
mlx5_ib_stage_cong_debugfs_cleanup),
|
||||
STAGE_CREATE(MLX5_IB_STAGE_UAR,
|
||||
mlx5_ib_stage_uar_init,
|
||||
mlx5_ib_stage_uar_cleanup),
|
||||
STAGE_CREATE(MLX5_IB_STAGE_BFREG,
|
||||
mlx5_ib_stage_bfrag_init,
|
||||
mlx5_ib_stage_bfrag_cleanup),
|
||||
|
|
@ -4722,9 +4720,6 @@ const struct mlx5_ib_profile raw_eth_profile = {
|
|||
STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
|
||||
mlx5_ib_stage_cong_debugfs_init,
|
||||
mlx5_ib_stage_cong_debugfs_cleanup),
|
||||
STAGE_CREATE(MLX5_IB_STAGE_UAR,
|
||||
mlx5_ib_stage_uar_init,
|
||||
mlx5_ib_stage_uar_cleanup),
|
||||
STAGE_CREATE(MLX5_IB_STAGE_BFREG,
|
||||
mlx5_ib_stage_bfrag_init,
|
||||
mlx5_ib_stage_bfrag_cleanup),
|
||||
|
|
|
|||
|
|
@ -351,6 +351,7 @@ struct mlx5_ib_flow_db {
|
|||
#define MLX5_IB_UPD_XLT_PD BIT(4)
|
||||
#define MLX5_IB_UPD_XLT_ACCESS BIT(5)
|
||||
#define MLX5_IB_UPD_XLT_INDIRECT BIT(6)
|
||||
#define MLX5_IB_UPD_XLT_DOWNGRADE BIT(7)
|
||||
|
||||
/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
|
||||
*
|
||||
|
|
@ -1005,7 +1006,6 @@ enum mlx5_ib_stages {
|
|||
MLX5_IB_STAGE_ODP,
|
||||
MLX5_IB_STAGE_COUNTERS,
|
||||
MLX5_IB_STAGE_CONG_DEBUGFS,
|
||||
MLX5_IB_STAGE_UAR,
|
||||
MLX5_IB_STAGE_BFREG,
|
||||
MLX5_IB_STAGE_PRE_IB_REG_UMR,
|
||||
MLX5_IB_STAGE_WHITELIST_UID,
|
||||
|
|
@ -1473,8 +1473,8 @@ void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
|
|||
int __init mlx5_ib_odp_init(void);
|
||||
void mlx5_ib_odp_cleanup(void);
|
||||
int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev);
|
||||
void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
|
||||
struct mlx5_ib_mr *mr, int flags);
|
||||
int mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
|
||||
struct mlx5_ib_mr *mr, int flags);
|
||||
|
||||
int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
|
||||
enum ib_uverbs_advise_mr_advice advice,
|
||||
|
|
@ -1495,8 +1495,11 @@ static inline int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev)
|
|||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
|
||||
struct mlx5_ib_mr *mr, int flags) {}
|
||||
static inline int mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
|
||||
struct mlx5_ib_mr *mr, int flags)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline int
|
||||
mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
|
||||
|
|
|
|||
|
|
@ -525,7 +525,7 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
|
|||
ent->fill_to_high_water = false;
|
||||
if (ent->pending)
|
||||
queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
|
||||
msecs_to_jiffies(1000));
|
||||
secs_to_jiffies(1));
|
||||
else
|
||||
mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
|
||||
}
|
||||
|
|
@ -576,7 +576,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
|
|||
"add keys command failed, err %d\n",
|
||||
err);
|
||||
queue_delayed_work(cache->wq, &ent->dwork,
|
||||
msecs_to_jiffies(1000));
|
||||
secs_to_jiffies(1));
|
||||
}
|
||||
}
|
||||
} else if (ent->mkeys_queue.ci > 2 * ent->limit) {
|
||||
|
|
@ -2051,7 +2051,7 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
|
|||
ent->in_use--;
|
||||
if (ent->is_tmp && !ent->tmp_cleanup_scheduled) {
|
||||
mod_delayed_work(ent->dev->cache.wq, &ent->dwork,
|
||||
msecs_to_jiffies(30 * 1000));
|
||||
secs_to_jiffies(30));
|
||||
ent->tmp_cleanup_scheduled = true;
|
||||
}
|
||||
spin_unlock_irq(&ent->mkeys_queue.lock);
|
||||
|
|
|
|||
|
|
@ -34,6 +34,9 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/dma-resv.h>
|
||||
#include <linux/hmm.h>
|
||||
#include <linux/hmm-dma.h>
|
||||
#include <linux/pci-p2pdma.h>
|
||||
|
||||
#include "mlx5_ib.h"
|
||||
#include "cmd.h"
|
||||
|
|
@ -158,41 +161,50 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
|
|||
}
|
||||
}
|
||||
|
||||
static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
|
||||
{
|
||||
u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK;
|
||||
|
||||
if (umem_dma & ODP_READ_ALLOWED_BIT)
|
||||
mtt_entry |= MLX5_IB_MTT_READ;
|
||||
if (umem_dma & ODP_WRITE_ALLOWED_BIT)
|
||||
mtt_entry |= MLX5_IB_MTT_WRITE;
|
||||
|
||||
return mtt_entry;
|
||||
}
|
||||
|
||||
static void populate_mtt(__be64 *pas, size_t idx, size_t nentries,
|
||||
struct mlx5_ib_mr *mr, int flags)
|
||||
static int populate_mtt(__be64 *pas, size_t start, size_t nentries,
|
||||
struct mlx5_ib_mr *mr, int flags)
|
||||
{
|
||||
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
|
||||
dma_addr_t pa;
|
||||
bool downgrade = flags & MLX5_IB_UPD_XLT_DOWNGRADE;
|
||||
struct pci_p2pdma_map_state p2pdma_state = {};
|
||||
struct ib_device *dev = odp->umem.ibdev;
|
||||
size_t i;
|
||||
|
||||
if (flags & MLX5_IB_UPD_XLT_ZAP)
|
||||
return;
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < nentries; i++) {
|
||||
pa = odp->dma_list[idx + i];
|
||||
pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
|
||||
unsigned long pfn = odp->map.pfn_list[start + i];
|
||||
dma_addr_t dma_addr;
|
||||
|
||||
pfn = odp->map.pfn_list[start + i];
|
||||
if (!(pfn & HMM_PFN_VALID))
|
||||
/* ODP initialization */
|
||||
continue;
|
||||
|
||||
dma_addr = hmm_dma_map_pfn(dev->dma_device, &odp->map,
|
||||
start + i, &p2pdma_state);
|
||||
if (ib_dma_mapping_error(dev, dma_addr))
|
||||
return -EFAULT;
|
||||
|
||||
dma_addr |= MLX5_IB_MTT_READ;
|
||||
if ((pfn & HMM_PFN_WRITE) && !downgrade)
|
||||
dma_addr |= MLX5_IB_MTT_WRITE;
|
||||
|
||||
pas[i] = cpu_to_be64(dma_addr);
|
||||
odp->npages++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
|
||||
struct mlx5_ib_mr *mr, int flags)
|
||||
int mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
|
||||
struct mlx5_ib_mr *mr, int flags)
|
||||
{
|
||||
if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
|
||||
populate_klm(xlt, idx, nentries, mr, flags);
|
||||
return 0;
|
||||
} else {
|
||||
populate_mtt(xlt, idx, nentries, mr, flags);
|
||||
return populate_mtt(xlt, idx, nentries, mr, flags);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -303,8 +315,7 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
|
|||
* estimate the cost of another UMR vs. the cost of bigger
|
||||
* UMR.
|
||||
*/
|
||||
if (umem_odp->dma_list[idx] &
|
||||
(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) {
|
||||
if (umem_odp->map.pfn_list[idx] & HMM_PFN_VALID) {
|
||||
if (!in_block) {
|
||||
blk_start_idx = idx;
|
||||
in_block = 1;
|
||||
|
|
@ -687,7 +698,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
|
|||
{
|
||||
int page_shift, ret, np;
|
||||
bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
|
||||
u64 access_mask;
|
||||
u64 access_mask = 0;
|
||||
u64 start_idx;
|
||||
bool fault = !(flags & MLX5_PF_FLAGS_SNAPSHOT);
|
||||
u32 xlt_flags = MLX5_IB_UPD_XLT_ATOMIC;
|
||||
|
|
@ -695,12 +706,14 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
|
|||
if (flags & MLX5_PF_FLAGS_ENABLE)
|
||||
xlt_flags |= MLX5_IB_UPD_XLT_ENABLE;
|
||||
|
||||
if (flags & MLX5_PF_FLAGS_DOWNGRADE)
|
||||
xlt_flags |= MLX5_IB_UPD_XLT_DOWNGRADE;
|
||||
|
||||
page_shift = odp->page_shift;
|
||||
start_idx = (user_va - ib_umem_start(odp)) >> page_shift;
|
||||
access_mask = ODP_READ_ALLOWED_BIT;
|
||||
|
||||
if (odp->umem.writable && !downgrade)
|
||||
access_mask |= ODP_WRITE_ALLOWED_BIT;
|
||||
access_mask |= HMM_PFN_WRITE;
|
||||
|
||||
np = ib_umem_odp_map_dma_and_lock(odp, user_va, bcnt, access_mask, fault);
|
||||
if (np < 0)
|
||||
|
|
|
|||
|
|
@ -21,8 +21,10 @@ mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
|
|||
spin_lock_irqsave(&table->lock, flags);
|
||||
|
||||
common = radix_tree_lookup(&table->tree, rsn);
|
||||
if (common)
|
||||
if (common && !common->invalid)
|
||||
refcount_inc(&common->refcount);
|
||||
else
|
||||
common = NULL;
|
||||
|
||||
spin_unlock_irqrestore(&table->lock, flags);
|
||||
|
||||
|
|
@ -178,6 +180,18 @@ static int create_resource_common(struct mlx5_ib_dev *dev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void modify_resource_common_state(struct mlx5_ib_dev *dev,
|
||||
struct mlx5_core_qp *qp,
|
||||
bool invalid)
|
||||
{
|
||||
struct mlx5_qp_table *table = &dev->qp_table;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&table->lock, flags);
|
||||
qp->common.invalid = invalid;
|
||||
spin_unlock_irqrestore(&table->lock, flags);
|
||||
}
|
||||
|
||||
static void destroy_resource_common(struct mlx5_ib_dev *dev,
|
||||
struct mlx5_core_qp *qp)
|
||||
{
|
||||
|
|
@ -609,8 +623,20 @@ int mlx5_core_create_rq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen,
|
|||
int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
|
||||
struct mlx5_core_qp *rq)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* The rq destruction can be called again in case it fails, hence we
|
||||
* mark the common resource as invalid and only once FW destruction
|
||||
* is completed successfully we actually destroy the resources.
|
||||
*/
|
||||
modify_resource_common_state(dev, rq, true);
|
||||
ret = destroy_rq_tracked(dev, rq->qpn, rq->uid);
|
||||
if (ret) {
|
||||
modify_resource_common_state(dev, rq, false);
|
||||
return ret;
|
||||
}
|
||||
destroy_resource_common(dev, rq);
|
||||
return destroy_rq_tracked(dev, rq->qpn, rq->uid);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid)
|
||||
|
|
|
|||
|
|
@ -840,7 +840,17 @@ int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
|
|||
size_to_map = npages * desc_size;
|
||||
dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
|
||||
DMA_TO_DEVICE);
|
||||
mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
|
||||
/*
|
||||
* npages is the maximum number of pages to map, but we
|
||||
* can't guarantee that all pages are actually mapped.
|
||||
*
|
||||
* For example, if page is p2p of type which is not supported
|
||||
* for mapping, the number of pages mapped will be less than
|
||||
* requested.
|
||||
*/
|
||||
err = mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
|
||||
if (err)
|
||||
return err;
|
||||
dma_sync_single_for_device(ddev, sg.addr, sg.length,
|
||||
DMA_TO_DEVICE);
|
||||
sg.length = ALIGN(size_to_map, MLX5_UMR_FLEX_ALIGNMENT);
|
||||
|
|
|
|||
|
|
@ -144,7 +144,7 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
|
|||
buddy->max_order = max_order;
|
||||
spin_lock_init(&buddy->lock);
|
||||
|
||||
buddy->bits = kcalloc(buddy->max_order + 1, sizeof(long *),
|
||||
buddy->bits = kcalloc(buddy->max_order + 1, sizeof(*buddy->bits),
|
||||
GFP_KERNEL);
|
||||
buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free,
|
||||
GFP_KERNEL);
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ static int usnic_uiom_dma_fault(struct iommu_domain *domain,
|
|||
unsigned long iova, int flags,
|
||||
void *token)
|
||||
{
|
||||
usnic_err("Device %s iommu fault domain 0x%pK va 0x%lx flags 0x%x\n",
|
||||
usnic_err("Device %s iommu fault domain 0x%p va 0x%lx flags 0x%x\n",
|
||||
dev_name(dev),
|
||||
domain, iova, flags);
|
||||
return -ENOSYS;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config RDMA_RXE
|
||||
tristate "Software RDMA over Ethernet (RoCE) driver"
|
||||
depends on INET && PCI && INFINIBAND
|
||||
depends on INET && PCI && INFINIBAND && 64BIT
|
||||
depends on INFINIBAND_VIRT_DMA
|
||||
select NET_UDP_TUNNEL
|
||||
select CRC32
|
||||
|
|
|
|||
|
|
@ -101,6 +101,8 @@ static void rxe_init_device_param(struct rxe_dev *rxe, struct net_device *ndev)
|
|||
rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_READ;
|
||||
rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
|
||||
rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
|
||||
rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_FLUSH;
|
||||
rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC_WRITE;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -70,9 +70,9 @@ int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
|
|||
void *addr, int length, enum rxe_mr_copy_dir dir);
|
||||
int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
|
||||
int sg_nents, unsigned int *sg_offset);
|
||||
int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
||||
u64 compare, u64 swap_add, u64 *orig_val);
|
||||
int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value);
|
||||
enum resp_states rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
||||
u64 compare, u64 swap_add, u64 *orig_val);
|
||||
enum resp_states rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value);
|
||||
struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
|
||||
enum rxe_mr_lookup_type type);
|
||||
int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length);
|
||||
|
|
@ -193,13 +193,16 @@ static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
|
|||
/* rxe_odp.c */
|
||||
extern const struct mmu_interval_notifier_ops rxe_mn_ops;
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
#if defined CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
|
||||
u64 iova, int access_flags, struct rxe_mr *mr);
|
||||
int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
|
||||
enum rxe_mr_copy_dir dir);
|
||||
int rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
||||
u64 compare, u64 swap_add, u64 *orig_val);
|
||||
enum resp_states rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
||||
u64 compare, u64 swap_add, u64 *orig_val);
|
||||
int rxe_odp_flush_pmem_iova(struct rxe_mr *mr, u64 iova,
|
||||
unsigned int length);
|
||||
enum resp_states rxe_odp_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value);
|
||||
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
|
||||
static inline int
|
||||
rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
|
||||
|
|
@ -212,9 +215,19 @@ static inline int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
|
|||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static inline int
|
||||
static inline enum resp_states
|
||||
rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
||||
u64 compare, u64 swap_add, u64 *orig_val)
|
||||
u64 compare, u64 swap_add, u64 *orig_val)
|
||||
{
|
||||
return RESPST_ERR_UNSUPPORTED_OPCODE;
|
||||
}
|
||||
static inline int rxe_odp_flush_pmem_iova(struct rxe_mr *mr, u64 iova,
|
||||
unsigned int length)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static inline enum resp_states rxe_odp_do_atomic_write(struct rxe_mr *mr,
|
||||
u64 iova, u64 value)
|
||||
{
|
||||
return RESPST_ERR_UNSUPPORTED_OPCODE;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -424,7 +424,7 @@ int copy_data(
|
|||
return err;
|
||||
}
|
||||
|
||||
int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
|
||||
static int rxe_mr_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
|
||||
{
|
||||
unsigned int page_offset;
|
||||
unsigned long index;
|
||||
|
|
@ -433,16 +433,6 @@ int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
|
|||
int err;
|
||||
u8 *va;
|
||||
|
||||
/* mr must be valid even if length is zero */
|
||||
if (WARN_ON(!mr))
|
||||
return -EINVAL;
|
||||
|
||||
if (length == 0)
|
||||
return 0;
|
||||
|
||||
if (mr->ibmr.type == IB_MR_TYPE_DMA)
|
||||
return -EFAULT;
|
||||
|
||||
err = mr_check_range(mr, iova, length);
|
||||
if (err)
|
||||
return err;
|
||||
|
|
@ -454,7 +444,7 @@ int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
|
|||
if (!page)
|
||||
return -EFAULT;
|
||||
bytes = min_t(unsigned int, length,
|
||||
mr_page_size(mr) - page_offset);
|
||||
mr_page_size(mr) - page_offset);
|
||||
|
||||
va = kmap_local_page(page);
|
||||
arch_wb_cache_pmem(va + page_offset, bytes);
|
||||
|
|
@ -468,11 +458,33 @@ int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 start, unsigned int length)
|
||||
{
|
||||
int err;
|
||||
|
||||
/* mr must be valid even if length is zero */
|
||||
if (WARN_ON(!mr))
|
||||
return -EINVAL;
|
||||
|
||||
if (length == 0)
|
||||
return 0;
|
||||
|
||||
if (mr->ibmr.type == IB_MR_TYPE_DMA)
|
||||
return -EFAULT;
|
||||
|
||||
if (is_odp_mr(mr))
|
||||
err = rxe_odp_flush_pmem_iova(mr, start, length);
|
||||
else
|
||||
err = rxe_mr_flush_pmem_iova(mr, start, length);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Guarantee atomicity of atomic operations at the machine level. */
|
||||
DEFINE_SPINLOCK(atomic_ops_lock);
|
||||
|
||||
int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
||||
u64 compare, u64 swap_add, u64 *orig_val)
|
||||
enum resp_states rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
||||
u64 compare, u64 swap_add, u64 *orig_val)
|
||||
{
|
||||
unsigned int page_offset;
|
||||
struct page *page;
|
||||
|
|
@ -524,27 +536,15 @@ int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
|||
|
||||
kunmap_local(va);
|
||||
|
||||
return 0;
|
||||
return RESPST_NONE;
|
||||
}
|
||||
|
||||
#if defined CONFIG_64BIT
|
||||
/* only implemented or called for 64 bit architectures */
|
||||
int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
|
||||
enum resp_states rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
|
||||
{
|
||||
unsigned int page_offset;
|
||||
struct page *page;
|
||||
u64 *va;
|
||||
|
||||
/* ODP is not supported right now. WIP. */
|
||||
if (is_odp_mr(mr))
|
||||
return RESPST_ERR_UNSUPPORTED_OPCODE;
|
||||
|
||||
/* See IBA oA19-28 */
|
||||
if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
|
||||
rxe_dbg_mr(mr, "mr not in valid state\n");
|
||||
return RESPST_ERR_RKEY_VIOLATION;
|
||||
}
|
||||
|
||||
if (mr->ibmr.type == IB_MR_TYPE_DMA) {
|
||||
page_offset = iova & (PAGE_SIZE - 1);
|
||||
page = ib_virt_dma_to_page(iova);
|
||||
|
|
@ -572,20 +572,12 @@ int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
|
|||
}
|
||||
|
||||
va = kmap_local_page(page);
|
||||
|
||||
/* Do atomic write after all prior operations have completed */
|
||||
smp_store_release(&va[page_offset >> 3], value);
|
||||
|
||||
kunmap_local(va);
|
||||
|
||||
return 0;
|
||||
return RESPST_NONE;
|
||||
}
|
||||
#else
|
||||
int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
|
||||
{
|
||||
return RESPST_ERR_UNSUPPORTED_OPCODE;
|
||||
}
|
||||
#endif
|
||||
|
||||
int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
*/
|
||||
|
||||
#include <linux/hmm.h>
|
||||
#include <linux/libnvdimm.h>
|
||||
|
||||
#include <rdma/ib_umem_odp.h>
|
||||
|
||||
|
|
@ -26,7 +27,7 @@ static bool rxe_ib_invalidate_range(struct mmu_interval_notifier *mni,
|
|||
start = max_t(u64, ib_umem_start(umem_odp), range->start);
|
||||
end = min_t(u64, ib_umem_end(umem_odp), range->end);
|
||||
|
||||
/* update umem_odp->dma_list */
|
||||
/* update umem_odp->map.pfn_list */
|
||||
ib_umem_odp_unmap_dma_pages(umem_odp, start, end);
|
||||
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
|
|
@ -44,12 +45,11 @@ static int rxe_odp_do_pagefault_and_lock(struct rxe_mr *mr, u64 user_va, int bcn
|
|||
{
|
||||
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
|
||||
bool fault = !(flags & RXE_PAGEFAULT_SNAPSHOT);
|
||||
u64 access_mask;
|
||||
u64 access_mask = 0;
|
||||
int np;
|
||||
|
||||
access_mask = ODP_READ_ALLOWED_BIT;
|
||||
if (umem_odp->umem.writable && !(flags & RXE_PAGEFAULT_RDONLY))
|
||||
access_mask |= ODP_WRITE_ALLOWED_BIT;
|
||||
access_mask |= HMM_PFN_WRITE;
|
||||
|
||||
/*
|
||||
* ib_umem_odp_map_dma_and_lock() locks umem_mutex on success.
|
||||
|
|
@ -124,8 +124,8 @@ int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
|
|||
return err;
|
||||
}
|
||||
|
||||
static inline bool rxe_check_pagefault(struct ib_umem_odp *umem_odp,
|
||||
u64 iova, int length, u32 perm)
|
||||
static inline bool rxe_check_pagefault(struct ib_umem_odp *umem_odp, u64 iova,
|
||||
int length)
|
||||
{
|
||||
bool need_fault = false;
|
||||
u64 addr;
|
||||
|
|
@ -137,7 +137,7 @@ static inline bool rxe_check_pagefault(struct ib_umem_odp *umem_odp,
|
|||
while (addr < iova + length) {
|
||||
idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
|
||||
|
||||
if (!(umem_odp->dma_list[idx] & perm)) {
|
||||
if (!(umem_odp->map.pfn_list[idx] & HMM_PFN_VALID)) {
|
||||
need_fault = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -147,23 +147,28 @@ static inline bool rxe_check_pagefault(struct ib_umem_odp *umem_odp,
|
|||
return need_fault;
|
||||
}
|
||||
|
||||
static unsigned long rxe_odp_iova_to_index(struct ib_umem_odp *umem_odp, u64 iova)
|
||||
{
|
||||
return (iova - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
|
||||
}
|
||||
|
||||
static unsigned long rxe_odp_iova_to_page_offset(struct ib_umem_odp *umem_odp, u64 iova)
|
||||
{
|
||||
return iova & (BIT(umem_odp->page_shift) - 1);
|
||||
}
|
||||
|
||||
static int rxe_odp_map_range_and_lock(struct rxe_mr *mr, u64 iova, int length, u32 flags)
|
||||
{
|
||||
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
|
||||
bool need_fault;
|
||||
u64 perm;
|
||||
int err;
|
||||
|
||||
if (unlikely(length < 1))
|
||||
return -EINVAL;
|
||||
|
||||
perm = ODP_READ_ALLOWED_BIT;
|
||||
if (!(flags & RXE_PAGEFAULT_RDONLY))
|
||||
perm |= ODP_WRITE_ALLOWED_BIT;
|
||||
|
||||
mutex_lock(&umem_odp->umem_mutex);
|
||||
|
||||
need_fault = rxe_check_pagefault(umem_odp, iova, length, perm);
|
||||
need_fault = rxe_check_pagefault(umem_odp, iova, length);
|
||||
if (need_fault) {
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
|
||||
|
|
@ -173,7 +178,7 @@ static int rxe_odp_map_range_and_lock(struct rxe_mr *mr, u64 iova, int length, u
|
|||
if (err < 0)
|
||||
return err;
|
||||
|
||||
need_fault = rxe_check_pagefault(umem_odp, iova, length, perm);
|
||||
need_fault = rxe_check_pagefault(umem_odp, iova, length);
|
||||
if (need_fault)
|
||||
return -EFAULT;
|
||||
}
|
||||
|
|
@ -190,13 +195,13 @@ static int __rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
|
|||
size_t offset;
|
||||
u8 *user_va;
|
||||
|
||||
idx = (iova - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
|
||||
offset = iova & (BIT(umem_odp->page_shift) - 1);
|
||||
idx = rxe_odp_iova_to_index(umem_odp, iova);
|
||||
offset = rxe_odp_iova_to_page_offset(umem_odp, iova);
|
||||
|
||||
while (length > 0) {
|
||||
u8 *src, *dest;
|
||||
|
||||
page = hmm_pfn_to_page(umem_odp->pfn_list[idx]);
|
||||
page = hmm_pfn_to_page(umem_odp->map.pfn_list[idx]);
|
||||
user_va = kmap_local_page(page);
|
||||
if (!user_va)
|
||||
return -EFAULT;
|
||||
|
|
@ -255,8 +260,9 @@ int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
|
|||
return err;
|
||||
}
|
||||
|
||||
static int rxe_odp_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
||||
u64 compare, u64 swap_add, u64 *orig_val)
|
||||
static enum resp_states rxe_odp_do_atomic_op(struct rxe_mr *mr, u64 iova,
|
||||
int opcode, u64 compare,
|
||||
u64 swap_add, u64 *orig_val)
|
||||
{
|
||||
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
|
||||
unsigned int page_offset;
|
||||
|
|
@ -277,9 +283,9 @@ static int rxe_odp_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
|||
return RESPST_ERR_RKEY_VIOLATION;
|
||||
}
|
||||
|
||||
idx = (iova - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
|
||||
page_offset = iova & (BIT(umem_odp->page_shift) - 1);
|
||||
page = hmm_pfn_to_page(umem_odp->pfn_list[idx]);
|
||||
idx = rxe_odp_iova_to_index(umem_odp, iova);
|
||||
page_offset = rxe_odp_iova_to_page_offset(umem_odp, iova);
|
||||
page = hmm_pfn_to_page(umem_odp->map.pfn_list[idx]);
|
||||
if (!page)
|
||||
return RESPST_ERR_RKEY_VIOLATION;
|
||||
|
||||
|
|
@ -304,11 +310,11 @@ static int rxe_odp_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
|||
|
||||
kunmap_local(va);
|
||||
|
||||
return 0;
|
||||
return RESPST_NONE;
|
||||
}
|
||||
|
||||
int rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
||||
u64 compare, u64 swap_add, u64 *orig_val)
|
||||
enum resp_states rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
||||
u64 compare, u64 swap_add, u64 *orig_val)
|
||||
{
|
||||
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
|
||||
int err;
|
||||
|
|
@ -324,3 +330,91 @@ int rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
|||
|
||||
return err;
|
||||
}
|
||||
|
||||
int rxe_odp_flush_pmem_iova(struct rxe_mr *mr, u64 iova,
|
||||
unsigned int length)
|
||||
{
|
||||
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
|
||||
unsigned int page_offset;
|
||||
unsigned long index;
|
||||
struct page *page;
|
||||
unsigned int bytes;
|
||||
int err;
|
||||
u8 *va;
|
||||
|
||||
err = rxe_odp_map_range_and_lock(mr, iova, length,
|
||||
RXE_PAGEFAULT_DEFAULT);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
while (length > 0) {
|
||||
index = rxe_odp_iova_to_index(umem_odp, iova);
|
||||
page_offset = rxe_odp_iova_to_page_offset(umem_odp, iova);
|
||||
|
||||
page = hmm_pfn_to_page(umem_odp->map.pfn_list[index]);
|
||||
if (!page) {
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
bytes = min_t(unsigned int, length,
|
||||
mr_page_size(mr) - page_offset);
|
||||
|
||||
va = kmap_local_page(page);
|
||||
arch_wb_cache_pmem(va + page_offset, bytes);
|
||||
kunmap_local(va);
|
||||
|
||||
length -= bytes;
|
||||
iova += bytes;
|
||||
page_offset = 0;
|
||||
}
|
||||
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
enum resp_states rxe_odp_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
|
||||
{
|
||||
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
|
||||
unsigned int page_offset;
|
||||
unsigned long index;
|
||||
struct page *page;
|
||||
int err;
|
||||
u64 *va;
|
||||
|
||||
/* See IBA oA19-28 */
|
||||
err = mr_check_range(mr, iova, sizeof(value));
|
||||
if (unlikely(err)) {
|
||||
rxe_dbg_mr(mr, "iova out of range\n");
|
||||
return RESPST_ERR_RKEY_VIOLATION;
|
||||
}
|
||||
|
||||
err = rxe_odp_map_range_and_lock(mr, iova, sizeof(value),
|
||||
RXE_PAGEFAULT_DEFAULT);
|
||||
if (err)
|
||||
return RESPST_ERR_RKEY_VIOLATION;
|
||||
|
||||
page_offset = rxe_odp_iova_to_page_offset(umem_odp, iova);
|
||||
index = rxe_odp_iova_to_index(umem_odp, iova);
|
||||
page = hmm_pfn_to_page(umem_odp->map.pfn_list[index]);
|
||||
if (!page) {
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
return RESPST_ERR_RKEY_VIOLATION;
|
||||
}
|
||||
/* See IBA A19.4.2 */
|
||||
if (unlikely(page_offset & 0x7)) {
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
rxe_dbg_mr(mr, "misaligned address\n");
|
||||
return RESPST_ERR_MISALIGNED_ATOMIC;
|
||||
}
|
||||
|
||||
va = kmap_local_page(page);
|
||||
/* Do atomic write after all prior operations have completed */
|
||||
smp_store_release(&va[page_offset >> 3], value);
|
||||
kunmap_local(va);
|
||||
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
|
||||
return RESPST_NONE;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,12 +53,9 @@ enum rxe_device_param {
|
|||
| IB_DEVICE_MEM_WINDOW
|
||||
| IB_DEVICE_FLUSH_GLOBAL
|
||||
| IB_DEVICE_FLUSH_PERSISTENT
|
||||
#ifdef CONFIG_64BIT
|
||||
| IB_DEVICE_MEM_WINDOW_TYPE_2B
|
||||
| IB_DEVICE_ATOMIC_WRITE,
|
||||
#else
|
||||
| IB_DEVICE_MEM_WINDOW_TYPE_2B,
|
||||
#endif /* CONFIG_64BIT */
|
||||
|
||||
RXE_MAX_SGE = 32,
|
||||
RXE_MAX_WQE_SIZE = sizeof(struct rxe_send_wqe) +
|
||||
sizeof(struct ib_sge) * RXE_MAX_SGE,
|
||||
|
|
|
|||
|
|
@ -811,7 +811,12 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
|
|||
spin_unlock_irqrestore(&qp->state_lock, flags);
|
||||
qp->qp_timeout_jiffies = 0;
|
||||
|
||||
if (qp_type(qp) == IB_QPT_RC) {
|
||||
/* In the function timer_setup, .function is initialized. If .function
|
||||
* is NULL, it indicates the function timer_setup is not called, the
|
||||
* timer is not initialized. Or else, the timer is initialized.
|
||||
*/
|
||||
if (qp_type(qp) == IB_QPT_RC && qp->retrans_timer.function &&
|
||||
qp->rnr_nak_timer.function) {
|
||||
timer_delete_sync(&qp->retrans_timer);
|
||||
timer_delete_sync(&qp->rnr_nak_timer);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -649,10 +649,6 @@ static enum resp_states process_flush(struct rxe_qp *qp,
|
|||
struct rxe_mr *mr = qp->resp.mr;
|
||||
struct resp_res *res = qp->resp.res;
|
||||
|
||||
/* ODP is not supported right now. WIP. */
|
||||
if (is_odp_mr(mr))
|
||||
return RESPST_ERR_UNSUPPORTED_OPCODE;
|
||||
|
||||
/* oA19-14, oA19-15 */
|
||||
if (res && res->replay)
|
||||
return RESPST_ACKNOWLEDGE;
|
||||
|
|
@ -753,7 +749,16 @@ static enum resp_states atomic_write_reply(struct rxe_qp *qp,
|
|||
value = *(u64 *)payload_addr(pkt);
|
||||
iova = qp->resp.va + qp->resp.offset;
|
||||
|
||||
err = rxe_mr_do_atomic_write(mr, iova, value);
|
||||
/* See IBA oA19-28 */
|
||||
if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
|
||||
rxe_dbg_mr(mr, "mr not in valid state\n");
|
||||
return RESPST_ERR_RKEY_VIOLATION;
|
||||
}
|
||||
|
||||
if (is_odp_mr(mr))
|
||||
err = rxe_odp_do_atomic_write(mr, iova, value);
|
||||
else
|
||||
err = rxe_mr_do_atomic_write(mr, iova, value);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
|
|
|
|||
|
|
@ -85,17 +85,17 @@ static bool is_done(struct rxe_task *task)
|
|||
|
||||
/* do_task is a wrapper for the three tasks (requester,
|
||||
* completer, responder) and calls them in a loop until
|
||||
* they return a non-zero value. It is called either
|
||||
* directly by rxe_run_task or indirectly if rxe_sched_task
|
||||
* schedules the task. They must call __reserve_if_idle to
|
||||
* move the task to busy before calling or scheduling.
|
||||
* The task can also be moved to drained or invalid
|
||||
* by calls to rxe_cleanup_task or rxe_disable_task.
|
||||
* In that case tasks which get here are not executed but
|
||||
* just flushed. The tasks are designed to look to see if
|
||||
* there is work to do and then do part of it before returning
|
||||
* here with a return value of zero until all the work
|
||||
* has been consumed then it returns a non-zero value.
|
||||
* they return a non-zero value. It is called indirectly
|
||||
* when rxe_sched_task schedules the task. They must
|
||||
* call __reserve_if_idle to move the task to busy before
|
||||
* calling or scheduling. The task can also be moved to
|
||||
* drained or invalid by calls to rxe_cleanup_task or
|
||||
* rxe_disable_task. In that case tasks which get here
|
||||
* are not executed but just flushed. The tasks are
|
||||
* designed to look to see if there is work to do and
|
||||
* then do part of it before returning here with a return
|
||||
* value of zero until all the work has been consumed then
|
||||
* it returns a non-zero value.
|
||||
* The number of times the task can be run is limited by
|
||||
* max iterations so one task cannot hold the cpu forever.
|
||||
* If the limit is hit and work remains the task is rescheduled.
|
||||
|
|
@ -234,24 +234,6 @@ void rxe_cleanup_task(struct rxe_task *task)
|
|||
spin_unlock_irqrestore(&task->lock, flags);
|
||||
}
|
||||
|
||||
/* run the task inline if it is currently idle
|
||||
* cannot call do_task holding the lock
|
||||
*/
|
||||
void rxe_run_task(struct rxe_task *task)
|
||||
{
|
||||
unsigned long flags;
|
||||
bool run;
|
||||
|
||||
WARN_ON(rxe_read(task->qp) <= 0);
|
||||
|
||||
spin_lock_irqsave(&task->lock, flags);
|
||||
run = __reserve_if_idle(task);
|
||||
spin_unlock_irqrestore(&task->lock, flags);
|
||||
|
||||
if (run)
|
||||
do_task(task);
|
||||
}
|
||||
|
||||
/* schedule the task to run later as a work queue entry.
|
||||
* the queue_work call can be called holding
|
||||
* the lock.
|
||||
|
|
|
|||
|
|
@ -47,8 +47,6 @@ int rxe_init_task(struct rxe_task *task, struct rxe_qp *qp,
|
|||
/* cleanup task */
|
||||
void rxe_cleanup_task(struct rxe_task *task);
|
||||
|
||||
void rxe_run_task(struct rxe_task *task);
|
||||
|
||||
void rxe_sched_task(struct rxe_task *task);
|
||||
|
||||
/* keep a task from scheduling */
|
||||
|
|
|
|||
|
|
@ -718,7 +718,7 @@ static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len)
|
|||
"MEM[0x%08x] %s: " fmt, mem->stag, __func__, ##__VA_ARGS__)
|
||||
|
||||
#define siw_dbg_cep(cep, fmt, ...) \
|
||||
ibdev_dbg(&cep->sdev->base_dev, "CEP[0x%pK] %s: " fmt, \
|
||||
ibdev_dbg(&cep->sdev->base_dev, "CEP[0x%p] %s: " fmt, \
|
||||
cep, __func__, ##__VA_ARGS__)
|
||||
|
||||
void siw_cq_flush(struct siw_cq *cq);
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
|
|||
wc->opcode = map_wc_opcode[cqe->opcode];
|
||||
wc->status = map_cqe_status[cqe->status].ib;
|
||||
siw_dbg_cq(cq,
|
||||
"idx %u, type %d, flags %2x, id 0x%pK\n",
|
||||
"idx %u, type %d, flags %2x, id 0x%p\n",
|
||||
cq->cq_get % cq->num_cqe, cqe->opcode,
|
||||
cqe->flags, (void *)(uintptr_t)cqe->id);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -17,30 +17,6 @@
|
|||
/* Stag lookup is based on its index part only (24 bits). */
|
||||
#define SIW_STAG_MAX_INDEX 0x00ffffff
|
||||
|
||||
/*
|
||||
* The code avoids special Stag of zero and tries to randomize
|
||||
* STag values between 1 and SIW_STAG_MAX_INDEX.
|
||||
*/
|
||||
int siw_mem_add(struct siw_device *sdev, struct siw_mem *m)
|
||||
{
|
||||
struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX);
|
||||
u32 id, next;
|
||||
|
||||
get_random_bytes(&next, 4);
|
||||
next &= SIW_STAG_MAX_INDEX;
|
||||
|
||||
if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next,
|
||||
GFP_KERNEL) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Set the STag index part */
|
||||
m->stag = id << 8;
|
||||
|
||||
siw_dbg_mem(m, "new MEM object\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* siw_mem_id2obj()
|
||||
*
|
||||
|
|
@ -181,10 +157,10 @@ int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
|
|||
*/
|
||||
if (addr < mem->va || addr + len > mem->va + mem->len) {
|
||||
siw_dbg_pd(pd, "MEM interval len %d\n", len);
|
||||
siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n",
|
||||
siw_dbg_pd(pd, "[0x%p, 0x%p] out of bounds\n",
|
||||
(void *)(uintptr_t)addr,
|
||||
(void *)(uintptr_t)(addr + len));
|
||||
siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n",
|
||||
siw_dbg_pd(pd, "[0x%p, 0x%p] STag=0x%08x\n",
|
||||
(void *)(uintptr_t)mem->va,
|
||||
(void *)(uintptr_t)(mem->va + mem->len),
|
||||
mem->stag);
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ void siw_umem_release(struct siw_umem *umem);
|
|||
struct siw_pbl *siw_pbl_alloc(u32 num_buf);
|
||||
dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx);
|
||||
struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index);
|
||||
int siw_mem_add(struct siw_device *sdev, struct siw_mem *m);
|
||||
int siw_invalidate_stag(struct ib_pd *pd, u32 stag);
|
||||
int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
|
||||
enum ib_access_flags perms, int len);
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem,
|
|||
|
||||
p = siw_get_upage(umem, dest_addr);
|
||||
if (unlikely(!p)) {
|
||||
pr_warn("siw: %s: [QP %u]: bogus addr: %pK, %pK\n",
|
||||
pr_warn("siw: %s: [QP %u]: bogus addr: %p, %p\n",
|
||||
__func__, qp_id(rx_qp(srx)),
|
||||
(void *)(uintptr_t)dest_addr,
|
||||
(void *)(uintptr_t)umem->fp_addr);
|
||||
|
|
@ -51,7 +51,7 @@ static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem,
|
|||
pg_off = dest_addr & ~PAGE_MASK;
|
||||
bytes = min(len, (int)PAGE_SIZE - pg_off);
|
||||
|
||||
siw_dbg_qp(rx_qp(srx), "page %pK, bytes=%u\n", p, bytes);
|
||||
siw_dbg_qp(rx_qp(srx), "page %p, bytes=%u\n", p, bytes);
|
||||
|
||||
dest = kmap_atomic(p);
|
||||
rv = skb_copy_bits(srx->skb, srx->skb_offset, dest + pg_off,
|
||||
|
|
@ -105,11 +105,11 @@ static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len)
|
|||
{
|
||||
int rv;
|
||||
|
||||
siw_dbg_qp(rx_qp(srx), "kva: 0x%pK, len: %u\n", kva, len);
|
||||
siw_dbg_qp(rx_qp(srx), "kva: 0x%p, len: %u\n", kva, len);
|
||||
|
||||
rv = skb_copy_bits(srx->skb, srx->skb_offset, kva, len);
|
||||
if (unlikely(rv)) {
|
||||
pr_warn("siw: [QP %u]: %s, len %d, kva 0x%pK, rv %d\n",
|
||||
pr_warn("siw: [QP %u]: %s, len %d, kva 0x%p, rv %d\n",
|
||||
qp_id(rx_qp(srx)), __func__, len, kva, rv);
|
||||
|
||||
return rv;
|
||||
|
|
|
|||
|
|
@ -936,7 +936,7 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr,
|
|||
rv = -EINVAL;
|
||||
break;
|
||||
}
|
||||
siw_dbg_qp(qp, "opcode %d, flags 0x%x, wr_id 0x%pK\n",
|
||||
siw_dbg_qp(qp, "opcode %d, flags 0x%x, wr_id 0x%p\n",
|
||||
sqe->opcode, sqe->flags,
|
||||
(void *)(uintptr_t)sqe->id);
|
||||
|
||||
|
|
@ -1102,7 +1102,7 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr,
|
|||
siw_dbg_qp(qp, "error %d\n", rv);
|
||||
*bad_wr = wr;
|
||||
}
|
||||
return rv > 0 ? 0 : rv;
|
||||
return rv;
|
||||
}
|
||||
|
||||
int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata)
|
||||
|
|
@ -1332,7 +1332,7 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
|
|||
struct siw_device *sdev = to_siw_dev(pd->device);
|
||||
int rv;
|
||||
|
||||
siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n",
|
||||
siw_dbg_pd(pd, "start: 0x%p, va: 0x%p, len: %llu\n",
|
||||
(void *)(uintptr_t)start, (void *)(uintptr_t)rnic_va,
|
||||
(unsigned long long)len);
|
||||
|
||||
|
|
@ -1525,7 +1525,7 @@ int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle,
|
|||
mem->len = base_mr->length;
|
||||
mem->va = base_mr->iova;
|
||||
siw_dbg_mem(mem,
|
||||
"%llu bytes, start 0x%pK, %u SLE to %u entries\n",
|
||||
"%llu bytes, start 0x%p, %u SLE to %u entries\n",
|
||||
mem->len, (void *)(uintptr_t)mem->va, num_sle,
|
||||
pbl->num_buf);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -391,6 +391,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
|
|||
case GDMA_EQE_HWC_INIT_EQ_ID_DB:
|
||||
case GDMA_EQE_HWC_INIT_DATA:
|
||||
case GDMA_EQE_HWC_INIT_DONE:
|
||||
case GDMA_EQE_HWC_SOC_SERVICE:
|
||||
case GDMA_EQE_RNIC_QP_FATAL:
|
||||
if (!eq->eq.callback)
|
||||
break;
|
||||
|
|
@ -964,6 +965,7 @@ int mana_gd_verify_vf_version(struct pci_dev *pdev)
|
|||
err, resp.hdr.status);
|
||||
return err ? err : -EPROTO;
|
||||
}
|
||||
gc->pf_cap_flags1 = resp.pf_cap_flags1;
|
||||
if (resp.pf_cap_flags1 & GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG) {
|
||||
err = mana_gd_query_hwc_timeout(pdev, &hwc->hwc_timeout);
|
||||
if (err) {
|
||||
|
|
@ -1004,7 +1006,6 @@ int mana_gd_register_device(struct gdma_dev *gd)
|
|||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_NS(mana_gd_register_device, "NET_MANA");
|
||||
|
||||
int mana_gd_deregister_device(struct gdma_dev *gd)
|
||||
{
|
||||
|
|
@ -1035,7 +1036,6 @@ int mana_gd_deregister_device(struct gdma_dev *gd)
|
|||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_NS(mana_gd_deregister_device, "NET_MANA");
|
||||
|
||||
u32 mana_gd_wq_avail_space(struct gdma_queue *wq)
|
||||
{
|
||||
|
|
@ -1469,10 +1469,14 @@ static int mana_gd_setup(struct pci_dev *pdev)
|
|||
mana_gd_init_registers(pdev);
|
||||
mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base);
|
||||
|
||||
gc->service_wq = alloc_ordered_workqueue("gdma_service_wq", 0);
|
||||
if (!gc->service_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
err = mana_gd_setup_irqs(pdev);
|
||||
if (err) {
|
||||
dev_err(gc->dev, "Failed to setup IRQs: %d\n", err);
|
||||
return err;
|
||||
goto free_workqueue;
|
||||
}
|
||||
|
||||
err = mana_hwc_create_channel(gc);
|
||||
|
|
@ -1498,6 +1502,8 @@ static int mana_gd_setup(struct pci_dev *pdev)
|
|||
mana_hwc_destroy_channel(gc);
|
||||
remove_irq:
|
||||
mana_gd_remove_irqs(pdev);
|
||||
free_workqueue:
|
||||
destroy_workqueue(gc->service_wq);
|
||||
dev_err(&pdev->dev, "%s failed (error %d)\n", __func__, err);
|
||||
return err;
|
||||
}
|
||||
|
|
@ -1509,6 +1515,8 @@ static void mana_gd_cleanup(struct pci_dev *pdev)
|
|||
mana_hwc_destroy_channel(gc);
|
||||
|
||||
mana_gd_remove_irqs(pdev);
|
||||
|
||||
destroy_workqueue(gc->service_wq);
|
||||
dev_dbg(&pdev->dev, "mana gdma cleanup successful\n");
|
||||
}
|
||||
|
||||
|
|
@ -1578,8 +1586,14 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
|||
if (err)
|
||||
goto cleanup_gd;
|
||||
|
||||
err = mana_rdma_probe(&gc->mana_ib);
|
||||
if (err)
|
||||
goto cleanup_mana;
|
||||
|
||||
return 0;
|
||||
|
||||
cleanup_mana:
|
||||
mana_remove(&gc->mana, false);
|
||||
cleanup_gd:
|
||||
mana_gd_cleanup(pdev);
|
||||
unmap_bar:
|
||||
|
|
@ -1607,6 +1621,7 @@ static void mana_gd_remove(struct pci_dev *pdev)
|
|||
{
|
||||
struct gdma_context *gc = pci_get_drvdata(pdev);
|
||||
|
||||
mana_rdma_remove(&gc->mana_ib);
|
||||
mana_remove(&gc->mana, false);
|
||||
|
||||
mana_gd_cleanup(pdev);
|
||||
|
|
@ -1630,6 +1645,7 @@ static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
|
|||
{
|
||||
struct gdma_context *gc = pci_get_drvdata(pdev);
|
||||
|
||||
mana_rdma_remove(&gc->mana_ib);
|
||||
mana_remove(&gc->mana, true);
|
||||
|
||||
mana_gd_cleanup(pdev);
|
||||
|
|
@ -1654,6 +1670,10 @@ static int mana_gd_resume(struct pci_dev *pdev)
|
|||
if (err)
|
||||
return err;
|
||||
|
||||
err = mana_rdma_probe(&gc->mana_ib);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -1664,6 +1684,7 @@ static void mana_gd_shutdown(struct pci_dev *pdev)
|
|||
|
||||
dev_info(&pdev->dev, "Shutdown was called\n");
|
||||
|
||||
mana_rdma_remove(&gc->mana_ib);
|
||||
mana_remove(&gc->mana, true);
|
||||
|
||||
mana_gd_cleanup(pdev);
|
||||
|
|
|
|||
|
|
@ -112,11 +112,13 @@ static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len,
|
|||
static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self,
|
||||
struct gdma_event *event)
|
||||
{
|
||||
union hwc_init_soc_service_type service_data;
|
||||
struct hw_channel_context *hwc = ctx;
|
||||
struct gdma_dev *gd = hwc->gdma_dev;
|
||||
union hwc_init_type_data type_data;
|
||||
union hwc_init_eq_id_db eq_db;
|
||||
u32 type, val;
|
||||
int ret;
|
||||
|
||||
switch (event->type) {
|
||||
case GDMA_EQE_HWC_INIT_EQ_ID_DB:
|
||||
|
|
@ -199,7 +201,24 @@ static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self,
|
|||
}
|
||||
|
||||
break;
|
||||
case GDMA_EQE_HWC_SOC_SERVICE:
|
||||
service_data.as_uint32 = event->details[0];
|
||||
type = service_data.type;
|
||||
|
||||
switch (type) {
|
||||
case GDMA_SERVICE_TYPE_RDMA_SUSPEND:
|
||||
case GDMA_SERVICE_TYPE_RDMA_RESUME:
|
||||
ret = mana_rdma_service_event(gd->gdma_context, type);
|
||||
if (ret)
|
||||
dev_err(hwc->dev, "Failed to schedule adev service event: %d\n",
|
||||
ret);
|
||||
break;
|
||||
default:
|
||||
dev_warn(hwc->dev, "Received unknown SOC service type %u\n", type);
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
default:
|
||||
dev_warn(hwc->dev, "Received unknown gdma event %u\n", event->type);
|
||||
/* Ignore unknown events, which should never happen. */
|
||||
|
|
|
|||
|
|
@ -2950,7 +2950,7 @@ static void remove_adev(struct gdma_dev *gd)
|
|||
gd->adev = NULL;
|
||||
}
|
||||
|
||||
static int add_adev(struct gdma_dev *gd)
|
||||
static int add_adev(struct gdma_dev *gd, const char *name)
|
||||
{
|
||||
struct auxiliary_device *adev;
|
||||
struct mana_adev *madev;
|
||||
|
|
@ -2966,7 +2966,7 @@ static int add_adev(struct gdma_dev *gd)
|
|||
goto idx_fail;
|
||||
adev->id = ret;
|
||||
|
||||
adev->name = "rdma";
|
||||
adev->name = name;
|
||||
adev->dev.parent = gd->gdma_context->dev;
|
||||
adev->dev.release = adev_release;
|
||||
madev->mdev = gd;
|
||||
|
|
@ -2998,6 +2998,70 @@ static int add_adev(struct gdma_dev *gd)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void mana_rdma_service_handle(struct work_struct *work)
|
||||
{
|
||||
struct mana_service_work *serv_work =
|
||||
container_of(work, struct mana_service_work, work);
|
||||
struct gdma_dev *gd = serv_work->gdma_dev;
|
||||
struct device *dev = gd->gdma_context->dev;
|
||||
int ret;
|
||||
|
||||
if (READ_ONCE(gd->rdma_teardown))
|
||||
goto out;
|
||||
|
||||
switch (serv_work->event) {
|
||||
case GDMA_SERVICE_TYPE_RDMA_SUSPEND:
|
||||
if (!gd->adev || gd->is_suspended)
|
||||
break;
|
||||
|
||||
remove_adev(gd);
|
||||
gd->is_suspended = true;
|
||||
break;
|
||||
|
||||
case GDMA_SERVICE_TYPE_RDMA_RESUME:
|
||||
if (!gd->is_suspended)
|
||||
break;
|
||||
|
||||
ret = add_adev(gd, "rdma");
|
||||
if (ret)
|
||||
dev_err(dev, "Failed to add adev on resume: %d\n", ret);
|
||||
else
|
||||
gd->is_suspended = false;
|
||||
break;
|
||||
|
||||
default:
|
||||
dev_warn(dev, "unknown adev service event %u\n",
|
||||
serv_work->event);
|
||||
break;
|
||||
}
|
||||
|
||||
out:
|
||||
kfree(serv_work);
|
||||
}
|
||||
|
||||
int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event)
|
||||
{
|
||||
struct gdma_dev *gd = &gc->mana_ib;
|
||||
struct mana_service_work *serv_work;
|
||||
|
||||
if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) {
|
||||
/* RDMA device is not detected on pci */
|
||||
return 0;
|
||||
}
|
||||
|
||||
serv_work = kzalloc(sizeof(*serv_work), GFP_ATOMIC);
|
||||
if (!serv_work)
|
||||
return -ENOMEM;
|
||||
|
||||
serv_work->event = event;
|
||||
serv_work->gdma_dev = gd;
|
||||
|
||||
INIT_WORK(&serv_work->work, mana_rdma_service_handle);
|
||||
queue_work(gc->service_wq, &serv_work->work);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mana_probe(struct gdma_dev *gd, bool resuming)
|
||||
{
|
||||
struct gdma_context *gc = gd->gdma_context;
|
||||
|
|
@ -3085,7 +3149,7 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
|
|||
}
|
||||
}
|
||||
|
||||
err = add_adev(gd);
|
||||
err = add_adev(gd, "eth");
|
||||
out:
|
||||
if (err) {
|
||||
mana_remove(gd, false);
|
||||
|
|
@ -3159,6 +3223,44 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
|
|||
dev_dbg(dev, "%s succeeded\n", __func__);
|
||||
}
|
||||
|
||||
int mana_rdma_probe(struct gdma_dev *gd)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) {
|
||||
/* RDMA device is not detected on pci */
|
||||
return err;
|
||||
}
|
||||
|
||||
err = mana_gd_register_device(gd);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = add_adev(gd, "rdma");
|
||||
if (err)
|
||||
mana_gd_deregister_device(gd);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void mana_rdma_remove(struct gdma_dev *gd)
|
||||
{
|
||||
struct gdma_context *gc = gd->gdma_context;
|
||||
|
||||
if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) {
|
||||
/* RDMA device is not detected on pci */
|
||||
return;
|
||||
}
|
||||
|
||||
WRITE_ONCE(gd->rdma_teardown, true);
|
||||
flush_workqueue(gc->service_wq);
|
||||
|
||||
if (gd->adev)
|
||||
remove_adev(gd);
|
||||
|
||||
mana_gd_deregister_device(gd);
|
||||
}
|
||||
|
||||
struct net_device *mana_get_primary_netdev(struct mana_context *ac,
|
||||
u32 port_index,
|
||||
netdevice_tracker *tracker)
|
||||
|
|
|
|||
33
include/linux/hmm-dma.h
Normal file
33
include/linux/hmm-dma.h
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
|
||||
#ifndef LINUX_HMM_DMA_H
|
||||
#define LINUX_HMM_DMA_H
|
||||
|
||||
#include <linux/dma-mapping.h>
|
||||
|
||||
struct dma_iova_state;
|
||||
struct pci_p2pdma_map_state;
|
||||
|
||||
/*
|
||||
* struct hmm_dma_map - array of PFNs and DMA addresses
|
||||
*
|
||||
* @state: DMA IOVA state
|
||||
* @pfns: array of PFNs
|
||||
* @dma_list: array of DMA addresses
|
||||
* @dma_entry_size: size of each DMA entry in the array
|
||||
*/
|
||||
struct hmm_dma_map {
|
||||
struct dma_iova_state state;
|
||||
unsigned long *pfn_list;
|
||||
dma_addr_t *dma_list;
|
||||
size_t dma_entry_size;
|
||||
};
|
||||
|
||||
int hmm_dma_map_alloc(struct device *dev, struct hmm_dma_map *map,
|
||||
size_t nr_entries, size_t dma_entry_size);
|
||||
void hmm_dma_map_free(struct device *dev, struct hmm_dma_map *map);
|
||||
dma_addr_t hmm_dma_map_pfn(struct device *dev, struct hmm_dma_map *map,
|
||||
size_t idx,
|
||||
struct pci_p2pdma_map_state *p2pdma_state);
|
||||
bool hmm_dma_unmap_pfn(struct device *dev, struct hmm_dma_map *map, size_t idx);
|
||||
#endif /* LINUX_HMM_DMA_H */
|
||||
|
|
@ -23,6 +23,10 @@ struct mmu_interval_notifier;
|
|||
* HMM_PFN_WRITE - if the page memory can be written to (requires HMM_PFN_VALID)
|
||||
* HMM_PFN_ERROR - accessing the pfn is impossible and the device should
|
||||
* fail. ie poisoned memory, special pages, no vma, etc
|
||||
* HMM_PFN_P2PDMA - P2P page
|
||||
* HMM_PFN_P2PDMA_BUS - Bus mapped P2P transfer
|
||||
* HMM_PFN_DMA_MAPPED - Flag preserved on input-to-output transformation
|
||||
* to mark that page is already DMA mapped
|
||||
*
|
||||
* On input:
|
||||
* 0 - Return the current state of the page, do not fault it.
|
||||
|
|
@ -36,13 +40,21 @@ enum hmm_pfn_flags {
|
|||
HMM_PFN_VALID = 1UL << (BITS_PER_LONG - 1),
|
||||
HMM_PFN_WRITE = 1UL << (BITS_PER_LONG - 2),
|
||||
HMM_PFN_ERROR = 1UL << (BITS_PER_LONG - 3),
|
||||
HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 8),
|
||||
/*
|
||||
* Sticky flags, carried from input to output,
|
||||
* don't forget to update HMM_PFN_INOUT_FLAGS
|
||||
*/
|
||||
HMM_PFN_DMA_MAPPED = 1UL << (BITS_PER_LONG - 4),
|
||||
HMM_PFN_P2PDMA = 1UL << (BITS_PER_LONG - 5),
|
||||
HMM_PFN_P2PDMA_BUS = 1UL << (BITS_PER_LONG - 6),
|
||||
|
||||
HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 11),
|
||||
|
||||
/* Input flags */
|
||||
HMM_PFN_REQ_FAULT = HMM_PFN_VALID,
|
||||
HMM_PFN_REQ_WRITE = HMM_PFN_WRITE,
|
||||
|
||||
HMM_PFN_FLAGS = 0xFFUL << HMM_PFN_ORDER_SHIFT,
|
||||
HMM_PFN_FLAGS = ~((1UL << HMM_PFN_ORDER_SHIFT) - 1),
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
@ -57,6 +69,14 @@ static inline struct page *hmm_pfn_to_page(unsigned long hmm_pfn)
|
|||
return pfn_to_page(hmm_pfn & ~HMM_PFN_FLAGS);
|
||||
}
|
||||
|
||||
/*
|
||||
* hmm_pfn_to_phys() - return physical address pointed to by a device entry
|
||||
*/
|
||||
static inline phys_addr_t hmm_pfn_to_phys(unsigned long hmm_pfn)
|
||||
{
|
||||
return __pfn_to_phys(hmm_pfn & ~HMM_PFN_FLAGS);
|
||||
}
|
||||
|
||||
/*
|
||||
* hmm_pfn_to_map_order() - return the CPU mapping size order
|
||||
*
|
||||
|
|
|
|||
|
|
@ -398,6 +398,7 @@ struct mlx5_core_rsc_common {
|
|||
enum mlx5_res_type res;
|
||||
refcount_t refcount;
|
||||
struct completion free;
|
||||
bool invalid;
|
||||
};
|
||||
|
||||
struct mlx5_uars_page {
|
||||
|
|
|
|||
|
|
@ -60,6 +60,7 @@ enum gdma_eqe_type {
|
|||
GDMA_EQE_HWC_INIT_DONE = 131,
|
||||
GDMA_EQE_HWC_SOC_RECONFIG = 132,
|
||||
GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
|
||||
GDMA_EQE_HWC_SOC_SERVICE = 134,
|
||||
GDMA_EQE_RNIC_QP_FATAL = 176,
|
||||
};
|
||||
|
||||
|
|
@ -70,6 +71,18 @@ enum {
|
|||
GDMA_DEVICE_MANA_IB = 3,
|
||||
};
|
||||
|
||||
enum gdma_service_type {
|
||||
GDMA_SERVICE_TYPE_NONE = 0,
|
||||
GDMA_SERVICE_TYPE_RDMA_SUSPEND = 1,
|
||||
GDMA_SERVICE_TYPE_RDMA_RESUME = 2,
|
||||
};
|
||||
|
||||
struct mana_service_work {
|
||||
struct work_struct work;
|
||||
struct gdma_dev *gdma_dev;
|
||||
enum gdma_service_type event;
|
||||
};
|
||||
|
||||
struct gdma_resource {
|
||||
/* Protect the bitmap */
|
||||
spinlock_t lock;
|
||||
|
|
@ -224,6 +237,8 @@ struct gdma_dev {
|
|||
void *driver_data;
|
||||
|
||||
struct auxiliary_device *adev;
|
||||
bool is_suspended;
|
||||
bool rdma_teardown;
|
||||
};
|
||||
|
||||
/* MANA_PAGE_SIZE is the DMA unit */
|
||||
|
|
@ -407,6 +422,10 @@ struct gdma_context {
|
|||
|
||||
/* Azure RDMA adapter */
|
||||
struct gdma_dev mana_ib;
|
||||
|
||||
u64 pf_cap_flags1;
|
||||
|
||||
struct workqueue_struct *service_wq;
|
||||
};
|
||||
|
||||
static inline bool mana_gd_is_mana(struct gdma_dev *gd)
|
||||
|
|
@ -553,6 +572,7 @@ enum {
|
|||
*/
|
||||
#define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2)
|
||||
#define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3)
|
||||
#define GDMA_DRV_CAP_FLAG_1_GDMA_PAGES_4MB_1GB_2GB BIT(4)
|
||||
#define GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT BIT(5)
|
||||
|
||||
/* Driver can handle holes (zeros) in the device list */
|
||||
|
|
@ -707,20 +727,6 @@ struct gdma_query_hwc_timeout_resp {
|
|||
u32 reserved;
|
||||
};
|
||||
|
||||
enum atb_page_size {
|
||||
ATB_PAGE_SIZE_4K,
|
||||
ATB_PAGE_SIZE_8K,
|
||||
ATB_PAGE_SIZE_16K,
|
||||
ATB_PAGE_SIZE_32K,
|
||||
ATB_PAGE_SIZE_64K,
|
||||
ATB_PAGE_SIZE_128K,
|
||||
ATB_PAGE_SIZE_256K,
|
||||
ATB_PAGE_SIZE_512K,
|
||||
ATB_PAGE_SIZE_1M,
|
||||
ATB_PAGE_SIZE_2M,
|
||||
ATB_PAGE_SIZE_MAX,
|
||||
};
|
||||
|
||||
enum gdma_mr_access_flags {
|
||||
GDMA_ACCESS_FLAG_LOCAL_READ = BIT_ULL(0),
|
||||
GDMA_ACCESS_FLAG_LOCAL_WRITE = BIT_ULL(1),
|
||||
|
|
@ -815,6 +821,8 @@ enum gdma_mr_type {
|
|||
* address that is set up in the MST
|
||||
*/
|
||||
GDMA_MR_TYPE_GVA = 2,
|
||||
/* Guest zero-based address MRs */
|
||||
GDMA_MR_TYPE_ZBVA = 4,
|
||||
};
|
||||
|
||||
struct gdma_create_mr_params {
|
||||
|
|
@ -826,6 +834,10 @@ struct gdma_create_mr_params {
|
|||
u64 virtual_address;
|
||||
enum gdma_mr_access_flags access_flags;
|
||||
} gva;
|
||||
struct {
|
||||
u64 dma_region_handle;
|
||||
enum gdma_mr_access_flags access_flags;
|
||||
} zbva;
|
||||
};
|
||||
};
|
||||
|
||||
|
|
@ -841,7 +853,10 @@ struct gdma_create_mr_request {
|
|||
u64 virtual_address;
|
||||
enum gdma_mr_access_flags access_flags;
|
||||
} gva;
|
||||
|
||||
struct {
|
||||
u64 dma_region_handle;
|
||||
enum gdma_mr_access_flags access_flags;
|
||||
} zbva;
|
||||
};
|
||||
u32 reserved_2;
|
||||
};/* HW DATA */
|
||||
|
|
@ -893,4 +908,6 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle);
|
|||
void mana_register_debugfs(void);
|
||||
void mana_unregister_debugfs(void);
|
||||
|
||||
int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event);
|
||||
|
||||
#endif /* _GDMA_H */
|
||||
|
|
|
|||
|
|
@ -49,6 +49,15 @@ union hwc_init_type_data {
|
|||
};
|
||||
}; /* HW DATA */
|
||||
|
||||
union hwc_init_soc_service_type {
|
||||
u32 as_uint32;
|
||||
|
||||
struct {
|
||||
u32 value : 28;
|
||||
u32 type : 4;
|
||||
};
|
||||
}; /* HW DATA */
|
||||
|
||||
struct hwc_rx_oob {
|
||||
u32 type : 6;
|
||||
u32 eom : 1;
|
||||
|
|
|
|||
|
|
@ -489,6 +489,9 @@ int mana_detach(struct net_device *ndev, bool from_close);
|
|||
int mana_probe(struct gdma_dev *gd, bool resuming);
|
||||
void mana_remove(struct gdma_dev *gd, bool suspending);
|
||||
|
||||
int mana_rdma_probe(struct gdma_dev *gd);
|
||||
void mana_rdma_remove(struct gdma_dev *gd);
|
||||
|
||||
void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev);
|
||||
int mana_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame **frames,
|
||||
u32 flags);
|
||||
|
|
|
|||
|
|
@ -480,23 +480,12 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
|
|||
const void *private_data,
|
||||
u8 private_data_len);
|
||||
|
||||
#define IB_CM_MRA_FLAG_DELAY 0x80 /* Send MRA only after a duplicate msg */
|
||||
|
||||
/**
|
||||
* ib_send_cm_mra - Sends a message receipt acknowledgement to a connection
|
||||
* message.
|
||||
* ib_prepare_cm_mra - Prepares to send a message receipt acknowledgment to a
|
||||
connection message in case duplicates are received.
|
||||
* @cm_id: Connection identifier associated with the connection message.
|
||||
* @service_timeout: The lower 5-bits specify the maximum time required for
|
||||
* the sender to reply to the connection message. The upper 3-bits
|
||||
* specify additional control flags.
|
||||
* @private_data: Optional user-defined private data sent with the
|
||||
* message receipt acknowledgement.
|
||||
* @private_data_len: Size of the private data buffer, in bytes.
|
||||
*/
|
||||
int ib_send_cm_mra(struct ib_cm_id *cm_id,
|
||||
u8 service_timeout,
|
||||
const void *private_data,
|
||||
u8 private_data_len);
|
||||
int ib_prepare_cm_mra(struct ib_cm_id *cm_id);
|
||||
|
||||
/**
|
||||
* ib_cm_init_qp_attr - Initializes the QP attributes for use in transitioning
|
||||
|
|
|
|||
|
|
@ -8,23 +8,17 @@
|
|||
|
||||
#include <rdma/ib_umem.h>
|
||||
#include <rdma/ib_verbs.h>
|
||||
#include <linux/hmm-dma.h>
|
||||
|
||||
struct ib_umem_odp {
|
||||
struct ib_umem umem;
|
||||
struct mmu_interval_notifier notifier;
|
||||
struct pid *tgid;
|
||||
|
||||
/* An array of the pfns included in the on-demand paging umem. */
|
||||
unsigned long *pfn_list;
|
||||
struct hmm_dma_map map;
|
||||
|
||||
/*
|
||||
* An array with DMA addresses mapped for pfns in pfn_list.
|
||||
* The lower two bits designate access permissions.
|
||||
* See ODP_READ_ALLOWED_BIT and ODP_WRITE_ALLOWED_BIT.
|
||||
*/
|
||||
dma_addr_t *dma_list;
|
||||
/*
|
||||
* The umem_mutex protects the page_list and dma_list fields of an ODP
|
||||
* The umem_mutex protects the page_list field of an ODP
|
||||
* umem, allowing only a single thread to map/unmap pages. The mutex
|
||||
* also protects access to the mmu notifier counters.
|
||||
*/
|
||||
|
|
@ -67,19 +61,6 @@ static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp)
|
|||
umem_odp->page_shift;
|
||||
}
|
||||
|
||||
/*
|
||||
* The lower 2 bits of the DMA address signal the R/W permissions for
|
||||
* the entry. To upgrade the permissions, provide the appropriate
|
||||
* bitmask to the map_dma_pages function.
|
||||
*
|
||||
* Be aware that upgrading a mapped address might result in change of
|
||||
* the DMA address for the page.
|
||||
*/
|
||||
#define ODP_READ_ALLOWED_BIT (1<<0ULL)
|
||||
#define ODP_WRITE_ALLOWED_BIT (1<<1ULL)
|
||||
|
||||
#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
|
||||
struct ib_umem_odp *
|
||||
|
|
|
|||
|
|
@ -314,17 +314,19 @@ enum ib_atomic_cap {
|
|||
};
|
||||
|
||||
enum ib_odp_general_cap_bits {
|
||||
IB_ODP_SUPPORT = 1 << 0,
|
||||
IB_ODP_SUPPORT_IMPLICIT = 1 << 1,
|
||||
IB_ODP_SUPPORT = IB_UVERBS_ODP_SUPPORT,
|
||||
IB_ODP_SUPPORT_IMPLICIT = IB_UVERBS_ODP_SUPPORT_IMPLICIT,
|
||||
};
|
||||
|
||||
enum ib_odp_transport_cap_bits {
|
||||
IB_ODP_SUPPORT_SEND = 1 << 0,
|
||||
IB_ODP_SUPPORT_RECV = 1 << 1,
|
||||
IB_ODP_SUPPORT_WRITE = 1 << 2,
|
||||
IB_ODP_SUPPORT_READ = 1 << 3,
|
||||
IB_ODP_SUPPORT_ATOMIC = 1 << 4,
|
||||
IB_ODP_SUPPORT_SRQ_RECV = 1 << 5,
|
||||
IB_ODP_SUPPORT_SEND = IB_UVERBS_ODP_SUPPORT_SEND,
|
||||
IB_ODP_SUPPORT_RECV = IB_UVERBS_ODP_SUPPORT_RECV,
|
||||
IB_ODP_SUPPORT_WRITE = IB_UVERBS_ODP_SUPPORT_WRITE,
|
||||
IB_ODP_SUPPORT_READ = IB_UVERBS_ODP_SUPPORT_READ,
|
||||
IB_ODP_SUPPORT_ATOMIC = IB_UVERBS_ODP_SUPPORT_ATOMIC,
|
||||
IB_ODP_SUPPORT_SRQ_RECV = IB_UVERBS_ODP_SUPPORT_SRQ_RECV,
|
||||
IB_ODP_SUPPORT_FLUSH = IB_UVERBS_ODP_SUPPORT_FLUSH,
|
||||
IB_ODP_SUPPORT_ATOMIC_WRITE = IB_UVERBS_ODP_SUPPORT_ATOMIC_WRITE,
|
||||
};
|
||||
|
||||
struct ib_odp_caps {
|
||||
|
|
|
|||
|
|
@ -388,6 +388,5 @@ void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid,
|
|||
union ib_gid *dgid);
|
||||
|
||||
struct iw_cm_id *rdma_iw_cm_id(struct rdma_cm_id *cm_id);
|
||||
struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res);
|
||||
|
||||
#endif /* RDMA_CM_H */
|
||||
|
|
|
|||
|
|
@ -233,6 +233,22 @@ struct ib_uverbs_ex_query_device {
|
|||
__u32 reserved;
|
||||
};
|
||||
|
||||
enum ib_uverbs_odp_general_cap_bits {
|
||||
IB_UVERBS_ODP_SUPPORT = 1 << 0,
|
||||
IB_UVERBS_ODP_SUPPORT_IMPLICIT = 1 << 1,
|
||||
};
|
||||
|
||||
enum ib_uverbs_odp_transport_cap_bits {
|
||||
IB_UVERBS_ODP_SUPPORT_SEND = 1 << 0,
|
||||
IB_UVERBS_ODP_SUPPORT_RECV = 1 << 1,
|
||||
IB_UVERBS_ODP_SUPPORT_WRITE = 1 << 2,
|
||||
IB_UVERBS_ODP_SUPPORT_READ = 1 << 3,
|
||||
IB_UVERBS_ODP_SUPPORT_ATOMIC = 1 << 4,
|
||||
IB_UVERBS_ODP_SUPPORT_SRQ_RECV = 1 << 5,
|
||||
IB_UVERBS_ODP_SUPPORT_FLUSH = 1 << 6,
|
||||
IB_UVERBS_ODP_SUPPORT_ATOMIC_WRITE = 1 << 7,
|
||||
};
|
||||
|
||||
struct ib_uverbs_odp_caps {
|
||||
__aligned_u64 general_caps;
|
||||
struct {
|
||||
|
|
|
|||
262
mm/hmm.c
262
mm/hmm.c
|
|
@ -10,6 +10,7 @@
|
|||
*/
|
||||
#include <linux/pagewalk.h>
|
||||
#include <linux/hmm.h>
|
||||
#include <linux/hmm-dma.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/rmap.h>
|
||||
#include <linux/swap.h>
|
||||
|
|
@ -23,6 +24,7 @@
|
|||
#include <linux/sched/mm.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/pci-p2pdma.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/memory_hotplug.h>
|
||||
|
||||
|
|
@ -39,13 +41,21 @@ enum {
|
|||
HMM_NEED_ALL_BITS = HMM_NEED_FAULT | HMM_NEED_WRITE_FAULT,
|
||||
};
|
||||
|
||||
enum {
|
||||
/* These flags are carried from input-to-output */
|
||||
HMM_PFN_INOUT_FLAGS = HMM_PFN_DMA_MAPPED | HMM_PFN_P2PDMA |
|
||||
HMM_PFN_P2PDMA_BUS,
|
||||
};
|
||||
|
||||
static int hmm_pfns_fill(unsigned long addr, unsigned long end,
|
||||
struct hmm_range *range, unsigned long cpu_flags)
|
||||
{
|
||||
unsigned long i = (addr - range->start) >> PAGE_SHIFT;
|
||||
|
||||
for (; addr < end; addr += PAGE_SIZE, i++)
|
||||
range->hmm_pfns[i] = cpu_flags;
|
||||
for (; addr < end; addr += PAGE_SIZE, i++) {
|
||||
range->hmm_pfns[i] &= HMM_PFN_INOUT_FLAGS;
|
||||
range->hmm_pfns[i] |= cpu_flags;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -202,8 +212,10 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
|
|||
return hmm_vma_fault(addr, end, required_fault, walk);
|
||||
|
||||
pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
|
||||
for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++)
|
||||
hmm_pfns[i] = pfn | cpu_flags;
|
||||
for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++) {
|
||||
hmm_pfns[i] &= HMM_PFN_INOUT_FLAGS;
|
||||
hmm_pfns[i] |= pfn | cpu_flags;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
|
|
@ -230,14 +242,14 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
|
|||
unsigned long cpu_flags;
|
||||
pte_t pte = ptep_get(ptep);
|
||||
uint64_t pfn_req_flags = *hmm_pfn;
|
||||
uint64_t new_pfn_flags = 0;
|
||||
|
||||
if (pte_none_mostly(pte)) {
|
||||
required_fault =
|
||||
hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0);
|
||||
if (required_fault)
|
||||
goto fault;
|
||||
*hmm_pfn = 0;
|
||||
return 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!pte_present(pte)) {
|
||||
|
|
@ -253,16 +265,14 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
|
|||
cpu_flags = HMM_PFN_VALID;
|
||||
if (is_writable_device_private_entry(entry))
|
||||
cpu_flags |= HMM_PFN_WRITE;
|
||||
*hmm_pfn = swp_offset_pfn(entry) | cpu_flags;
|
||||
return 0;
|
||||
new_pfn_flags = swp_offset_pfn(entry) | cpu_flags;
|
||||
goto out;
|
||||
}
|
||||
|
||||
required_fault =
|
||||
hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0);
|
||||
if (!required_fault) {
|
||||
*hmm_pfn = 0;
|
||||
return 0;
|
||||
}
|
||||
if (!required_fault)
|
||||
goto out;
|
||||
|
||||
if (!non_swap_entry(entry))
|
||||
goto fault;
|
||||
|
|
@ -304,11 +314,13 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
|
|||
pte_unmap(ptep);
|
||||
return -EFAULT;
|
||||
}
|
||||
*hmm_pfn = HMM_PFN_ERROR;
|
||||
return 0;
|
||||
new_pfn_flags = HMM_PFN_ERROR;
|
||||
goto out;
|
||||
}
|
||||
|
||||
*hmm_pfn = pte_pfn(pte) | cpu_flags;
|
||||
new_pfn_flags = pte_pfn(pte) | cpu_flags;
|
||||
out:
|
||||
*hmm_pfn = (*hmm_pfn & HMM_PFN_INOUT_FLAGS) | new_pfn_flags;
|
||||
return 0;
|
||||
|
||||
fault:
|
||||
|
|
@ -448,8 +460,10 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
|
|||
}
|
||||
|
||||
pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
|
||||
for (i = 0; i < npages; ++i, ++pfn)
|
||||
hmm_pfns[i] = pfn | cpu_flags;
|
||||
for (i = 0; i < npages; ++i, ++pfn) {
|
||||
hmm_pfns[i] &= HMM_PFN_INOUT_FLAGS;
|
||||
hmm_pfns[i] |= pfn | cpu_flags;
|
||||
}
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
|
|
@ -507,8 +521,10 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
|
|||
}
|
||||
|
||||
pfn = pte_pfn(entry) + ((start & ~hmask) >> PAGE_SHIFT);
|
||||
for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
|
||||
range->hmm_pfns[i] = pfn | cpu_flags;
|
||||
for (; addr < end; addr += PAGE_SIZE, i++, pfn++) {
|
||||
range->hmm_pfns[i] &= HMM_PFN_INOUT_FLAGS;
|
||||
range->hmm_pfns[i] |= pfn | cpu_flags;
|
||||
}
|
||||
|
||||
spin_unlock(ptl);
|
||||
return 0;
|
||||
|
|
@ -607,3 +623,211 @@ int hmm_range_fault(struct hmm_range *range)
|
|||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(hmm_range_fault);
|
||||
|
||||
/**
|
||||
* hmm_dma_map_alloc - Allocate HMM map structure
|
||||
* @dev: device to allocate structure for
|
||||
* @map: HMM map to allocate
|
||||
* @nr_entries: number of entries in the map
|
||||
* @dma_entry_size: size of the DMA entry in the map
|
||||
*
|
||||
* Allocate the HMM map structure and all the lists it contains.
|
||||
* Return 0 on success, -ENOMEM on failure.
|
||||
*/
|
||||
int hmm_dma_map_alloc(struct device *dev, struct hmm_dma_map *map,
|
||||
size_t nr_entries, size_t dma_entry_size)
|
||||
{
|
||||
bool dma_need_sync = false;
|
||||
bool use_iova;
|
||||
|
||||
WARN_ON_ONCE(!(nr_entries * PAGE_SIZE / dma_entry_size));
|
||||
|
||||
/*
|
||||
* The HMM API violates our normal DMA buffer ownership rules and can't
|
||||
* transfer buffer ownership. The dma_addressing_limited() check is a
|
||||
* best approximation to ensure no swiotlb buffering happens.
|
||||
*/
|
||||
#ifdef CONFIG_DMA_NEED_SYNC
|
||||
dma_need_sync = !dev->dma_skip_sync;
|
||||
#endif /* CONFIG_DMA_NEED_SYNC */
|
||||
if (dma_need_sync || dma_addressing_limited(dev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
map->dma_entry_size = dma_entry_size;
|
||||
map->pfn_list = kvcalloc(nr_entries, sizeof(*map->pfn_list),
|
||||
GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!map->pfn_list)
|
||||
return -ENOMEM;
|
||||
|
||||
use_iova = dma_iova_try_alloc(dev, &map->state, 0,
|
||||
nr_entries * PAGE_SIZE);
|
||||
if (!use_iova && dma_need_unmap(dev)) {
|
||||
map->dma_list = kvcalloc(nr_entries, sizeof(*map->dma_list),
|
||||
GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!map->dma_list)
|
||||
goto err_dma;
|
||||
}
|
||||
return 0;
|
||||
|
||||
err_dma:
|
||||
kvfree(map->pfn_list);
|
||||
return -ENOMEM;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hmm_dma_map_alloc);
|
||||
|
||||
/**
|
||||
* hmm_dma_map_free - iFree HMM map structure
|
||||
* @dev: device to free structure from
|
||||
* @map: HMM map containing the various lists and state
|
||||
*
|
||||
* Free the HMM map structure and all the lists it contains.
|
||||
*/
|
||||
void hmm_dma_map_free(struct device *dev, struct hmm_dma_map *map)
|
||||
{
|
||||
if (dma_use_iova(&map->state))
|
||||
dma_iova_free(dev, &map->state);
|
||||
kvfree(map->pfn_list);
|
||||
kvfree(map->dma_list);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hmm_dma_map_free);
|
||||
|
||||
/**
|
||||
* hmm_dma_map_pfn - Map a physical HMM page to DMA address
|
||||
* @dev: Device to map the page for
|
||||
* @map: HMM map
|
||||
* @idx: Index into the PFN and dma address arrays
|
||||
* @p2pdma_state: PCI P2P state.
|
||||
*
|
||||
* dma_alloc_iova() allocates IOVA based on the size specified by their use in
|
||||
* iova->size. Call this function after IOVA allocation to link whole @page
|
||||
* to get the DMA address. Note that very first call to this function
|
||||
* will have @offset set to 0 in the IOVA space allocated from
|
||||
* dma_alloc_iova(). For subsequent calls to this function on same @iova,
|
||||
* @offset needs to be advanced by the caller with the size of previous
|
||||
* page that was linked + DMA address returned for the previous page that was
|
||||
* linked by this function.
|
||||
*/
|
||||
dma_addr_t hmm_dma_map_pfn(struct device *dev, struct hmm_dma_map *map,
|
||||
size_t idx,
|
||||
struct pci_p2pdma_map_state *p2pdma_state)
|
||||
{
|
||||
struct dma_iova_state *state = &map->state;
|
||||
dma_addr_t *dma_addrs = map->dma_list;
|
||||
unsigned long *pfns = map->pfn_list;
|
||||
struct page *page = hmm_pfn_to_page(pfns[idx]);
|
||||
phys_addr_t paddr = hmm_pfn_to_phys(pfns[idx]);
|
||||
size_t offset = idx * map->dma_entry_size;
|
||||
unsigned long attrs = 0;
|
||||
dma_addr_t dma_addr;
|
||||
int ret;
|
||||
|
||||
if ((pfns[idx] & HMM_PFN_DMA_MAPPED) &&
|
||||
!(pfns[idx] & HMM_PFN_P2PDMA_BUS)) {
|
||||
/*
|
||||
* We are in this flow when there is a need to resync flags,
|
||||
* for example when page was already linked in prefetch call
|
||||
* with READ flag and now we need to add WRITE flag
|
||||
*
|
||||
* This page was already programmed to HW and we don't want/need
|
||||
* to unlink and link it again just to resync flags.
|
||||
*/
|
||||
if (dma_use_iova(state))
|
||||
return state->addr + offset;
|
||||
|
||||
/*
|
||||
* Without dma_need_unmap, the dma_addrs array is NULL, thus we
|
||||
* need to regenerate the address below even if there already
|
||||
* was a mapping. But !dma_need_unmap implies that the
|
||||
* mapping stateless, so this is fine.
|
||||
*/
|
||||
if (dma_need_unmap(dev))
|
||||
return dma_addrs[idx];
|
||||
|
||||
/* Continue to remapping */
|
||||
}
|
||||
|
||||
switch (pci_p2pdma_state(p2pdma_state, dev, page)) {
|
||||
case PCI_P2PDMA_MAP_NONE:
|
||||
break;
|
||||
case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
|
||||
attrs |= DMA_ATTR_SKIP_CPU_SYNC;
|
||||
pfns[idx] |= HMM_PFN_P2PDMA;
|
||||
break;
|
||||
case PCI_P2PDMA_MAP_BUS_ADDR:
|
||||
pfns[idx] |= HMM_PFN_P2PDMA_BUS | HMM_PFN_DMA_MAPPED;
|
||||
return pci_p2pdma_bus_addr_map(p2pdma_state, paddr);
|
||||
default:
|
||||
return DMA_MAPPING_ERROR;
|
||||
}
|
||||
|
||||
if (dma_use_iova(state)) {
|
||||
ret = dma_iova_link(dev, state, paddr, offset,
|
||||
map->dma_entry_size, DMA_BIDIRECTIONAL,
|
||||
attrs);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
ret = dma_iova_sync(dev, state, offset, map->dma_entry_size);
|
||||
if (ret) {
|
||||
dma_iova_unlink(dev, state, offset, map->dma_entry_size,
|
||||
DMA_BIDIRECTIONAL, attrs);
|
||||
goto error;
|
||||
}
|
||||
|
||||
dma_addr = state->addr + offset;
|
||||
} else {
|
||||
if (WARN_ON_ONCE(dma_need_unmap(dev) && !dma_addrs))
|
||||
goto error;
|
||||
|
||||
dma_addr = dma_map_page(dev, page, 0, map->dma_entry_size,
|
||||
DMA_BIDIRECTIONAL);
|
||||
if (dma_mapping_error(dev, dma_addr))
|
||||
goto error;
|
||||
|
||||
if (dma_need_unmap(dev))
|
||||
dma_addrs[idx] = dma_addr;
|
||||
}
|
||||
pfns[idx] |= HMM_PFN_DMA_MAPPED;
|
||||
return dma_addr;
|
||||
error:
|
||||
pfns[idx] &= ~HMM_PFN_P2PDMA;
|
||||
return DMA_MAPPING_ERROR;
|
||||
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hmm_dma_map_pfn);
|
||||
|
||||
/**
|
||||
* hmm_dma_unmap_pfn - Unmap a physical HMM page from DMA address
|
||||
* @dev: Device to unmap the page from
|
||||
* @map: HMM map
|
||||
* @idx: Index of the PFN to unmap
|
||||
*
|
||||
* Returns true if the PFN was mapped and has been unmapped, false otherwise.
|
||||
*/
|
||||
bool hmm_dma_unmap_pfn(struct device *dev, struct hmm_dma_map *map, size_t idx)
|
||||
{
|
||||
const unsigned long valid_dma = HMM_PFN_VALID | HMM_PFN_DMA_MAPPED;
|
||||
struct dma_iova_state *state = &map->state;
|
||||
dma_addr_t *dma_addrs = map->dma_list;
|
||||
unsigned long *pfns = map->pfn_list;
|
||||
unsigned long attrs = 0;
|
||||
|
||||
if ((pfns[idx] & valid_dma) != valid_dma)
|
||||
return false;
|
||||
|
||||
if (pfns[idx] & HMM_PFN_P2PDMA_BUS)
|
||||
; /* no need to unmap bus address P2P mappings */
|
||||
else if (dma_use_iova(state)) {
|
||||
if (pfns[idx] & HMM_PFN_P2PDMA)
|
||||
attrs |= DMA_ATTR_SKIP_CPU_SYNC;
|
||||
dma_iova_unlink(dev, state, idx * map->dma_entry_size,
|
||||
map->dma_entry_size, DMA_BIDIRECTIONAL, attrs);
|
||||
} else if (dma_need_unmap(dev))
|
||||
dma_unmap_page(dev, dma_addrs[idx], map->dma_entry_size,
|
||||
DMA_BIDIRECTIONAL);
|
||||
|
||||
pfns[idx] &=
|
||||
~(HMM_PFN_DMA_MAPPED | HMM_PFN_P2PDMA | HMM_PFN_P2PDMA_BUS);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hmm_dma_unmap_pfn);
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user