diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 79d93d6c7d7a..745bcc25c6f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -483,8 +483,6 @@ struct mlx5e_xdp_mpwqe { struct mlx5e_tx_wqe *wqe; u8 ds_count; u8 pkt_count; - u8 max_ds_count; - u8 complete; u8 inline_on; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index b0b982cf69bb..8cb98326531f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -179,34 +179,22 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; struct mlx5e_xdpsq_stats *stats = sq->stats; struct mlx5_wq_cyc *wq = &sq->wq; - u8 wqebbs; - u16 pi; + u16 pi, contig_wqebbs; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + + if (unlikely(contig_wqebbs < MLX5_SEND_WQE_MAX_WQEBBS)) + mlx5e_fill_xdpsq_frag_edge(sq, wq, pi, contig_wqebbs); mlx5e_xdpsq_fetch_wqe(sq, &session->wqe); prefetchw(session->wqe->data); session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; session->pkt_count = 0; - session->complete = 0; pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); -/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS - * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. - * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a - * full-session WQE be cache-aligned. - */ -#if L1_CACHE_BYTES < 128 -#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) -#else -#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) -#endif - - wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi), - MLX5E_XDP_MPW_MAX_WQEBBS); - - session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs; - mlx5e_xdp_update_inline_state(sq); stats->mpwqe++; @@ -244,7 +232,7 @@ static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) { if (unlikely(!sq->mpwqe.wqe)) { if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, - MLX5_SEND_WQE_MAX_WQEBBS))) { + MLX5E_XDPSQ_STOP_ROOM))) { /* SQ is full, ring doorbell */ mlx5e_xmit_xdp_doorbell(sq); sq->stats->full++; @@ -285,8 +273,8 @@ static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); - if (unlikely(session->complete || - session->ds_count == session->max_ds_count)) + if (unlikely(mlx5e_xdp_no_room_for_inline_pkt(session) || + session->ds_count == MLX5E_XDP_MPW_MAX_NUM_DS)) mlx5e_xdp_mpwqe_complete(sq); mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index b90923932668..e0ed7710f5f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -40,6 +40,26 @@ (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */) +#define MLX5E_XDPSQ_STOP_ROOM (MLX5E_SQ_STOP_ROOM) + +#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg)) +#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \ + DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS) + +/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS + * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. + * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a + * full-session WQE be cache-aligned. + */ +#if L1_CACHE_BYTES < 128 +#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) +#else +#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) +#endif + +#define MLX5E_XDP_MPW_MAX_NUM_DS \ + (MLX5E_XDP_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS) + struct mlx5e_xsk_param; int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, @@ -114,6 +134,30 @@ static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq) session->inline_on = 1; } +static inline bool +mlx5e_xdp_no_room_for_inline_pkt(struct mlx5e_xdp_mpwqe *session) +{ + return session->inline_on && + session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > MLX5E_XDP_MPW_MAX_NUM_DS; +} + +static inline void +mlx5e_fill_xdpsq_frag_edge(struct mlx5e_xdpsq *sq, struct mlx5_wq_cyc *wq, + u16 pi, u16 nnops) +{ + struct mlx5e_xdp_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi]; + + edge_wi = wi + nnops; + /* fill sq frag edge with nops to avoid wqe wrapping two pages */ + for (; wi < edge_wi; wi++) { + wi->num_wqebbs = 1; + wi->num_pkts = 0; + mlx5e_post_nop(wq, sq->sqn, &sq->pc); + } + + sq->stats->nops += nnops; +} + static inline void mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd, @@ -126,20 +170,12 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, session->pkt_count++; -#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg)) - if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) { struct mlx5_wqe_inline_seg *inline_dseg = (struct mlx5_wqe_inline_seg *)dseg; u16 ds_len = sizeof(*inline_dseg) + dma_len; u16 ds_cnt = DIV_ROUND_UP(ds_len, MLX5_SEND_WQE_DS); - if (unlikely(session->ds_count + ds_cnt > session->max_ds_count)) { - /* Not enough space for inline wqe, send with memory pointer */ - session->complete = true; - goto no_inline; - } - inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG); memcpy(inline_dseg->data, xdptxd->data, dma_len); @@ -148,7 +184,6 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, return; } -no_inline: dseg->addr = cpu_to_be64(xdptxd->dma_addr); dseg->byte_count = cpu_to_be32(dma_len); dseg->lkey = sq->mkey_be; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 539b4d3656da..6eee3c7d4b06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -74,6 +74,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_xmit) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_mpwqe) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_inlnw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_nops) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_cqe) }, @@ -90,6 +91,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_xmit) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_mpwqe) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_inlnw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_nops) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_cqes) }, @@ -200,6 +202,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->rx_xdp_tx_xmit += xdpsq_stats->xmit; s->rx_xdp_tx_mpwqe += xdpsq_stats->mpwqe; s->rx_xdp_tx_inlnw += xdpsq_stats->inlnw; + s->rx_xdp_tx_nops += xdpsq_stats->nops; s->rx_xdp_tx_full += xdpsq_stats->full; s->rx_xdp_tx_err += xdpsq_stats->err; s->rx_xdp_tx_cqe += xdpsq_stats->cqes; @@ -227,6 +230,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_xdp_xmit += xdpsq_red_stats->xmit; s->tx_xdp_mpwqe += xdpsq_red_stats->mpwqe; s->tx_xdp_inlnw += xdpsq_red_stats->inlnw; + s->tx_xdp_nops += xdpsq_red_stats->nops; s->tx_xdp_full += xdpsq_red_stats->full; s->tx_xdp_err += xdpsq_red_stats->err; s->tx_xdp_cqes += xdpsq_red_stats->cqes; @@ -1331,6 +1335,7 @@ static const struct counter_desc rq_xdpsq_stats_desc[] = { { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, nops) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, @@ -1340,6 +1345,7 @@ static const struct counter_desc xdpsq_stats_desc[] = { { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, nops) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 76ac111e14d0..bf645d42c833 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -81,6 +81,7 @@ struct mlx5e_sw_stats { u64 rx_xdp_tx_xmit; u64 rx_xdp_tx_mpwqe; u64 rx_xdp_tx_inlnw; + u64 rx_xdp_tx_nops; u64 rx_xdp_tx_full; u64 rx_xdp_tx_err; u64 rx_xdp_tx_cqe; @@ -97,6 +98,7 @@ struct mlx5e_sw_stats { u64 tx_xdp_xmit; u64 tx_xdp_mpwqe; u64 tx_xdp_inlnw; + u64 tx_xdp_nops; u64 tx_xdp_full; u64 tx_xdp_err; u64 tx_xdp_cqes; @@ -288,6 +290,7 @@ struct mlx5e_xdpsq_stats { u64 xmit; u64 mpwqe; u64 inlnw; + u64 nops; u64 full; u64 err; /* dirtied @completion */