From ba6c4c094470dc83a7275000bac2fbd46bd5ab69 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 12 Nov 2015 19:35:25 +0200 Subject: [PATCH 1/6] net/mlx5e: Fix inline header size calculation mlx5e_get_inline_hdr_size didn't take into account the vlan insertion into the inline WQE segment. This could lead to max inline violation in cases where skb_headlen(skb) + VLAN_HLEN >= sq->max_inline. Fixes: 3ea4891db8d0 ("net/mlx5e: Fix LSO vlan insertion") Signed-off-by: Saeed Mahameed Signed-off-by: Achiad Shochat Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index cd8f85a251d7..f687ebf20d9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -118,8 +118,15 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, */ #define MLX5E_MIN_INLINE ETH_HLEN - if (bf && (skb_headlen(skb) <= sq->max_inline)) - return skb_headlen(skb); + if (bf) { + u16 ihs = skb_headlen(skb); + + if (skb_vlan_tag_present(skb)) + ihs += VLAN_HLEN; + + if (ihs <= sq->max_inline) + return skb_headlen(skb); + } return MLX5E_MIN_INLINE; } From 66189961e986e53ae39822898fc2ce88f44c61bb Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 12 Nov 2015 19:35:26 +0200 Subject: [PATCH 2/6] net/mlx5e: Added self loopback prevention Prevent outgoing multicast frames from looping back to the RX queue. By introducing new HW capability self_lb_en_modifiable, which indicates the support to modify self_lb_en bit in modify_tir command. When this capability is set we can prevent TIRs from sending back loopback multicast traffic to their own RQs, by "refreshing TIRs" with modify_tir command, on every time new channels (SQs/RQs) are created at device open. This is needed since TIRs are static and only allocated once on driver load, and the loopback decision is under their responsibility. Fixes issues of the kind: "IPv6: eth2: IPv6 duplicate address fe80::e61d:2dff:fe5c:f2e9 detected!" The issue is seen since the IPv6 solicitations multicast messages are loopedback and the network stack thinks they are coming from another host. Fixes: 5c50368f3831 ("net/mlx5e: Light-weight netdev open/stop") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 48 +++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 24 ++++++---- 2 files changed, 62 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5fc4d2d78cdf..df001754bcd1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1332,6 +1332,42 @@ static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt) return err; } +static int mlx5e_refresh_tir_self_loopback_enable(struct mlx5_core_dev *mdev, + u32 tirn) +{ + void *in; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); + + err = mlx5_core_modify_tir(mdev, tirn, in, inlen); + + kvfree(in); + + return err; +} + +static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv) +{ + int err; + int i; + + for (i = 0; i < MLX5E_NUM_TT; i++) { + err = mlx5e_refresh_tir_self_loopback_enable(priv->mdev, + priv->tirn[i]); + if (err) + return err; + } + + return 0; +} + static int mlx5e_set_dev_port_mtu(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -1376,6 +1412,13 @@ int mlx5e_open_locked(struct net_device *netdev) goto err_clear_state_opened_flag; } + err = mlx5e_refresh_tirs_self_loopback_enable(priv); + if (err) { + netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n", + __func__, err); + goto err_close_channels; + } + mlx5e_update_carrier(priv); mlx5e_redirect_rqts(priv); @@ -1383,6 +1426,8 @@ int mlx5e_open_locked(struct net_device *netdev) return 0; +err_close_channels: + mlx5e_close_channels(priv); err_clear_state_opened_flag: clear_bit(MLX5E_STATE_OPENED, &priv->state); return err; @@ -1909,6 +1954,9 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) "Not creating net device, some required device capabilities are missing\n"); return -ENOTSUPP; } + if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable)) + mlx5_core_warn(mdev, "Self loop back prevention is not supported\n"); + return 0; } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index dd2097455a2e..1565324eb620 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -453,26 +453,28 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 lro_cap[0x1]; u8 lro_psh_flag[0x1]; u8 lro_time_stamp[0x1]; - u8 reserved_0[0x6]; + u8 reserved_0[0x3]; + u8 self_lb_en_modifiable[0x1]; + u8 reserved_1[0x2]; u8 max_lso_cap[0x5]; - u8 reserved_1[0x4]; + u8 reserved_2[0x4]; u8 rss_ind_tbl_cap[0x4]; - u8 reserved_2[0x3]; + u8 reserved_3[0x3]; u8 tunnel_lso_const_out_ip_id[0x1]; - u8 reserved_3[0x2]; + u8 reserved_4[0x2]; u8 tunnel_statless_gre[0x1]; u8 tunnel_stateless_vxlan[0x1]; - u8 reserved_4[0x20]; + u8 reserved_5[0x20]; - u8 reserved_5[0x10]; + u8 reserved_6[0x10]; u8 lro_min_mss_size[0x10]; - u8 reserved_6[0x120]; + u8 reserved_7[0x120]; u8 lro_timer_supported_periods[4][0x20]; - u8 reserved_7[0x600]; + u8 reserved_8[0x600]; }; struct mlx5_ifc_roce_cap_bits { @@ -4051,9 +4053,11 @@ struct mlx5_ifc_modify_tis_in_bits { }; struct mlx5_ifc_modify_tir_bitmask_bits { - u8 reserved[0x20]; + u8 reserved_0[0x20]; - u8 reserved1[0x1f]; + u8 reserved_1[0x1b]; + u8 self_lb_en[0x1]; + u8 reserved_2[0x3]; u8 lro[0x1]; }; From 50a9eea694ab8e0779069e0a4e0b12e145521468 Mon Sep 17 00:00:00 2001 From: Doron Tsur Date: Thu, 12 Nov 2015 19:35:27 +0200 Subject: [PATCH 3/6] net/mlx5e: Max mtu comparison fix On change mtu the driver compares between hardware queried mtu and software requested mtu. We need to compare between software representation of the queried mtu and the requested mtu. Fixes: facc9699f0fe ('net/mlx5e: Fix HW MTU settings') Signed-off-by: Doron Tsur Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index df001754bcd1..1e52db32c73d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1901,6 +1901,8 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) mlx5_query_port_max_mtu(mdev, &max_mtu, 1); + max_mtu = MLX5E_HW2SW_MTU(max_mtu); + if (new_mtu > max_mtu) { netdev_err(netdev, "%s: Bad MTU (%d) > (%d) Max\n", From d4e28cbd24c8cb004960ddb8b22124953f6c220c Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Thu, 12 Nov 2015 19:35:28 +0200 Subject: [PATCH 4/6] net/mlx5e: Use the right DMA free function on TX path On xmit path we use skb_frag_dma_map() which is using dma_map_page(), while upon completion we dma-unmap the skb fragments using dma_unmap_single() rather than dma_unmap_page(). To fix this, we now save the dma map type on xmit path and use this info to call the right dma unmap method upon TX completion. Signed-off-by: Achiad Shochat Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 10 ++- .../net/ethernet/mellanox/mlx5/core/en_tx.c | 69 ++++++++++--------- 2 files changed, 45 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f2ae62dd8c09..22e72bf1ae48 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -334,9 +334,15 @@ struct mlx5e_tx_skb_cb { #define MLX5E_TX_SKB_CB(__skb) ((struct mlx5e_tx_skb_cb *)__skb->cb) +enum mlx5e_dma_map_type { + MLX5E_DMA_MAP_SINGLE, + MLX5E_DMA_MAP_PAGE +}; + struct mlx5e_sq_dma { - dma_addr_t addr; - u32 size; + dma_addr_t addr; + u32 size; + enum mlx5e_dma_map_type type; }; enum { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index f687ebf20d9c..1341b1d3c421 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -61,41 +61,49 @@ void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw) } } -static void mlx5e_dma_pop_last_pushed(struct mlx5e_sq *sq, dma_addr_t *addr, - u32 *size) +static inline void mlx5e_tx_dma_unmap(struct device *pdev, + struct mlx5e_sq_dma *dma) { - sq->dma_fifo_pc--; - *addr = sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr; - *size = sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size; + switch (dma->type) { + case MLX5E_DMA_MAP_SINGLE: + dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE); + break; + case MLX5E_DMA_MAP_PAGE: + dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE); + break; + default: + WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n"); + } +} + +static inline void mlx5e_dma_push(struct mlx5e_sq *sq, + dma_addr_t addr, + u32 size, + enum mlx5e_dma_map_type map_type) +{ + sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr = addr; + sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size = size; + sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].type = map_type; + sq->dma_fifo_pc++; +} + +static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i) +{ + return &sq->dma_fifo[i & sq->dma_fifo_mask]; } static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, struct sk_buff *skb) { - dma_addr_t addr; - u32 size; int i; for (i = 0; i < MLX5E_TX_SKB_CB(skb)->num_dma; i++) { - mlx5e_dma_pop_last_pushed(sq, &addr, &size); - dma_unmap_single(sq->pdev, addr, size, DMA_TO_DEVICE); + struct mlx5e_sq_dma *last_pushed_dma = + mlx5e_dma_get(sq, --sq->dma_fifo_pc); + + mlx5e_tx_dma_unmap(sq->pdev, last_pushed_dma); } } -static inline void mlx5e_dma_push(struct mlx5e_sq *sq, dma_addr_t addr, - u32 size) -{ - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr = addr; - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size = size; - sq->dma_fifo_pc++; -} - -static inline void mlx5e_dma_get(struct mlx5e_sq *sq, u32 i, dma_addr_t *addr, - u32 *size) -{ - *addr = sq->dma_fifo[i & sq->dma_fifo_mask].addr; - *size = sq->dma_fifo[i & sq->dma_fifo_mask].size; -} - u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback) { @@ -225,7 +233,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32(headlen); - mlx5e_dma_push(sq, dma_addr, headlen); + mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE); MLX5E_TX_SKB_CB(skb)->num_dma++; dseg++; @@ -244,7 +252,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32(fsz); - mlx5e_dma_push(sq, dma_addr, fsz); + mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); MLX5E_TX_SKB_CB(skb)->num_dma++; dseg++; @@ -360,13 +368,10 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq) } for (j = 0; j < MLX5E_TX_SKB_CB(skb)->num_dma; j++) { - dma_addr_t addr; - u32 size; + struct mlx5e_sq_dma *dma = + mlx5e_dma_get(sq, dma_fifo_cc++); - mlx5e_dma_get(sq, dma_fifo_cc, &addr, &size); - dma_fifo_cc++; - dma_unmap_single(sq->pdev, addr, size, - DMA_TO_DEVICE); + mlx5e_tx_dma_unmap(sq->pdev, dma); } npkts++; From f5adbfee72282bb1f456d52b04adacd4fe6ac502 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 12 Nov 2015 19:35:29 +0200 Subject: [PATCH 5/6] net/mlx4_core: Fix sleeping while holding spinlock at rem_slave_counters When cleaning slave's counter resources, we hold a spinlock that protects the slave's counters list. As part of the clean, we call __mlx4_clear_if_stat which calls mlx4_alloc_cmd_mailbox which is a sleepable function. In order to fix this issue, hold the spinlock, and copy all counter indices into a temporary array, and release the spinlock. Afterwards, iterate over this array and free every counter. Repeat this scenario until the original list is empty (a new counter might have been added while releasing the counters from the temporary array). Fixes: b72ca7e96acf ("net/mlx4_core: Reset counters data when freed") Reported-by: Moni Shoua Tested-by: Moni Shoua Signed-off-by: Jack Morgenstein Signed-off-by: Eran Ben Elisha Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlx4/resource_tracker.c | 39 +++++++++++++------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 9813d34f3e5b..6fec3e993d02 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -4952,26 +4952,41 @@ static void rem_slave_counters(struct mlx4_dev *dev, int slave) struct res_counter *counter; struct res_counter *tmp; int err; - int index; + int *counters_arr = NULL; + int i, j; err = move_all_busy(dev, slave, RES_COUNTER); if (err) mlx4_warn(dev, "rem_slave_counters: Could not move all counters - too busy for slave %d\n", slave); - spin_lock_irq(mlx4_tlock(dev)); - list_for_each_entry_safe(counter, tmp, counter_list, com.list) { - if (counter->com.owner == slave) { - index = counter->com.res_id; - rb_erase(&counter->com.node, - &tracker->res_tree[RES_COUNTER]); - list_del(&counter->com.list); - kfree(counter); - __mlx4_counter_free(dev, index); + counters_arr = kmalloc_array(dev->caps.max_counters, + sizeof(*counters_arr), GFP_KERNEL); + if (!counters_arr) + return; + + do { + i = 0; + j = 0; + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry_safe(counter, tmp, counter_list, com.list) { + if (counter->com.owner == slave) { + counters_arr[i++] = counter->com.res_id; + rb_erase(&counter->com.node, + &tracker->res_tree[RES_COUNTER]); + list_del(&counter->com.list); + kfree(counter); + } + } + spin_unlock_irq(mlx4_tlock(dev)); + + while (j < i) { + __mlx4_counter_free(dev, counters_arr[j++]); mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0); } - } - spin_unlock_irq(mlx4_tlock(dev)); + } while (i); + + kfree(counters_arr); } static void rem_slave_xrcdns(struct mlx4_dev *dev, int slave) From d49c2197fd70c37d57982804465268440a33183a Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Thu, 12 Nov 2015 19:35:30 +0200 Subject: [PATCH 6/6] net/mlx4_core: Avoid returning success in case of an error flow The err variable wasn't set with the correct error value in some cases. Fixes: 47605df95398 ('mlx4: Modify proxy/tunnel QP mechanism [..]') Signed-off-by: Noa Osherovich Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 85f1b1e7e505..31c491e02e69 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -892,9 +892,10 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; dev->caps.port_mask[i] = dev->caps.port_type[i]; dev->caps.phys_port_id[i] = func_cap.phys_port_id; - if (mlx4_get_slave_pkey_gid_tbl_len(dev, i, - &dev->caps.gid_table_len[i], - &dev->caps.pkey_table_len[i])) + err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, + &dev->caps.gid_table_len[i], + &dev->caps.pkey_table_len[i]); + if (err) goto err_mem; } @@ -906,6 +907,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) dev->caps.uar_page_size * dev->caps.num_uars, (unsigned long long) pci_resource_len(dev->persist->pdev, 2)); + err = -ENOMEM; goto err_mem; }