From aeb660171b0663847fa04806a96302ac6112ad26 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Tue, 4 Jul 2023 15:06:40 +0800 Subject: [PATCH 01/15] net/mlx5e: fix double free in macsec_fs_tx_create_crypto_table_groups In function macsec_fs_tx_create_crypto_table_groups(), when the ft->g memory is successfully allocated but the 'in' memory fails to be allocated, the memory pointed to by ft->g is released once. And in function macsec_fs_tx_create(), macsec_fs_tx_destroy() is called to release the memory pointed to by ft->g again. This will cause double free problem. Fixes: e467b283ffd5 ("net/mlx5e: Add MACsec TX steering rules") Signed-off-by: Zhengchao Shao Reviewed-by: Simon Horman Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c index 7fc901a6ec5f..414e28584881 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c @@ -161,6 +161,7 @@ static int macsec_fs_tx_create_crypto_table_groups(struct mlx5e_flow_table *ft) if (!in) { kfree(ft->g); + ft->g = NULL; return -ENOMEM; } From 5dd77585dd9d0e03dd1bceb95f0269a7eaf6b936 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 5 Jul 2023 20:15:27 +0800 Subject: [PATCH 02/15] net/mlx5: DR, fix memory leak in mlx5dr_cmd_create_reformat_ctx when mlx5_cmd_exec failed in mlx5dr_cmd_create_reformat_ctx, the memory pointed by 'in' is not released, which will cause memory leak. Move memory release after mlx5_cmd_exec. Fixes: 1d9186476e12 ("net/mlx5: DR, Add direct rule command utilities") Signed-off-by: Zhengchao Shao Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index 7491911ebcb5..8c2a34a0d6be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -564,11 +564,12 @@ int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev, err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); if (err) - return err; + goto err_free_in; *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id); - kvfree(in); +err_free_in: + kvfree(in); return err; } From c6cf0b6097bf1bf1b2a89b521e9ecd26b581a93a Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Sat, 8 Jul 2023 15:13:07 +0800 Subject: [PATCH 03/15] net/mlx5: fix potential memory leak in mlx5e_init_rep_rx The memory pointed to by the priv->rx_res pointer is not freed in the error path of mlx5e_init_rep_rx, which can lead to a memory leak. Fix by freeing the memory in the error path, thereby making the error path identical to mlx5e_cleanup_rep_rx(). Fixes: af8bbf730068 ("net/mlx5e: Convert mlx5e_flow_steering member of mlx5e_priv to pointer") Signed-off-by: Zhengchao Shao Reviewed-by: Simon Horman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 152b62138450..0b265a3f9b76 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1012,7 +1012,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) err = mlx5e_open_drop_rq(priv, &priv->drop_rq); if (err) { mlx5_core_err(mdev, "open drop rq failed, %d\n", err); - return err; + goto err_rx_res_free; } err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0, @@ -1046,6 +1046,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) mlx5e_rx_res_destroy(priv->rx_res); err_close_drop_rq: mlx5e_close_drop_rq(&priv->drop_rq); +err_rx_res_free: mlx5e_rx_res_free(priv->rx_res); priv->rx_res = NULL; err_free_fs: From e5bcb7564d3bd0c88613c76963c5349be9c511c5 Mon Sep 17 00:00:00 2001 From: Yuanjun Gong Date: Tue, 25 Jul 2023 14:56:55 +0800 Subject: [PATCH 04/15] net/mlx5e: fix return value check in mlx5e_ipsec_remove_trailer() mlx5e_ipsec_remove_trailer() should return an error code if function pskb_trim() returns an unexpected value. Fixes: 2ac9cfe78223 ("net/mlx5e: IPSec, Add Innova IPSec offload TX data path") Signed-off-by: Yuanjun Gong Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index eab5bc718771..8d995e304869 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -58,7 +58,9 @@ static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x) trailer_len = alen + plen + 2; - pskb_trim(skb, skb->len - trailer_len); + ret = pskb_trim(skb, skb->len - trailer_len); + if (unlikely(ret)) + return ret; if (skb->protocol == htons(ETH_P_IP)) { ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len); ip_send_check(ipv4hdr); From 0507f2c8be0d345fe7014147c027cea6dc1c00a4 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 3 Jul 2023 17:34:44 +0300 Subject: [PATCH 05/15] net/mlx5: Honor user input for migratable port fn attr Currently, whenever a user is setting migratable port fn attr, the driver is always turn migratable capability on. Fix it by honor the user input Fixes: e5b9642a33be ("net/mlx5: E-Switch, Implement devlink port function cmds to control migratable") Signed-off-by: Shay Drory Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index bdfe609cc9ec..93b2b94d41cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -4196,7 +4196,7 @@ int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable, } hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); - MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, 1); + MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, enable); err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport->vport, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2); From 93a331939d1d1c6c3422bc09ec43cac658594b34 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 29 Jun 2023 11:32:03 +0300 Subject: [PATCH 06/15] net/mlx5e: Don't hold encap tbl lock if there is no encap action The cited commit holds encap tbl lock unconditionally when setting up dests. But it may cause the following deadlock: PID: 1063722 TASK: ffffa062ca5d0000 CPU: 13 COMMAND: "handler8" #0 [ffffb14de05b7368] __schedule at ffffffffa1d5aa91 #1 [ffffb14de05b7410] schedule at ffffffffa1d5afdb #2 [ffffb14de05b7430] schedule_preempt_disabled at ffffffffa1d5b528 #3 [ffffb14de05b7440] __mutex_lock at ffffffffa1d5d6cb #4 [ffffb14de05b74e8] mutex_lock_nested at ffffffffa1d5ddeb #5 [ffffb14de05b74f8] mlx5e_tc_tun_encap_dests_set at ffffffffc12f2096 [mlx5_core] #6 [ffffb14de05b7568] post_process_attr at ffffffffc12d9fc5 [mlx5_core] #7 [ffffb14de05b75a0] mlx5e_tc_add_fdb_flow at ffffffffc12de877 [mlx5_core] #8 [ffffb14de05b75f0] __mlx5e_add_fdb_flow at ffffffffc12e0eef [mlx5_core] #9 [ffffb14de05b7660] mlx5e_tc_add_flow at ffffffffc12e12f7 [mlx5_core] #10 [ffffb14de05b76b8] mlx5e_configure_flower at ffffffffc12e1686 [mlx5_core] #11 [ffffb14de05b7720] mlx5e_rep_indr_offload at ffffffffc12e3817 [mlx5_core] #12 [ffffb14de05b7730] mlx5e_rep_indr_setup_tc_cb at ffffffffc12e388a [mlx5_core] #13 [ffffb14de05b7740] tc_setup_cb_add at ffffffffa1ab2ba8 #14 [ffffb14de05b77a0] fl_hw_replace_filter at ffffffffc0bdec2f [cls_flower] #15 [ffffb14de05b7868] fl_change at ffffffffc0be6caa [cls_flower] #16 [ffffb14de05b7908] tc_new_tfilter at ffffffffa1ab71f0 [1031218.028143] wait_for_completion+0x24/0x30 [1031218.028589] mlx5e_update_route_decap_flows+0x9a/0x1e0 [mlx5_core] [1031218.029256] mlx5e_tc_fib_event_work+0x1ad/0x300 [mlx5_core] [1031218.029885] process_one_work+0x24e/0x510 Actually no need to hold encap tbl lock if there is no encap action. Fix it by checking if encap action exists or not before holding encap tbl lock. Fixes: 37c3b9fa7ccf ("net/mlx5e: Prevent encap offload when neigh update is running") Signed-off-by: Chris Mi Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/tc_tun_encap.c | 3 --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 21 ++++++++++++++++--- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index f0c3464f037f..0c88cf47af01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -1030,9 +1030,6 @@ int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, int out_index; int err = 0; - if (!mlx5e_is_eswitch_flow(flow)) - return 0; - parse_attr = attr->parse_attr; esw_attr = attr->esw_attr; *vf_tun = false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 8d0a3f69693e..92377632f9e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1725,6 +1725,19 @@ verify_attr_actions(u32 actions, struct netlink_ext_ack *extack) return 0; } +static bool +has_encap_dests(struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int out_index; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) + if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) + return true; + + return false; +} + static int post_process_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr, @@ -1737,9 +1750,11 @@ post_process_attr(struct mlx5e_tc_flow *flow, if (err) goto err_out; - err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun); - if (err) - goto err_out; + if (mlx5e_is_eswitch_flow(flow) && has_encap_dests(attr)) { + err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun); + if (err) + goto err_out; + } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr); From 3ec43c1b082a8804472430e1253544d75f4b540e Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Tue, 30 May 2023 20:11:14 +0300 Subject: [PATCH 07/15] net/mlx5e: Fix crash moving to switchdev mode when ntuple offload is set Moving to switchdev mode with ntuple offload on causes the kernel to crash since fs->arfs is freed during nic profile cleanup flow. Ntuple offload is not supported in switchdev mode and it is already unset by mlx5 fix feature ndo in switchdev mode. Verify fs->arfs is valid before disabling it. trace: [] RIP: 0010:_raw_spin_lock_bh+0x17/0x30 [] arfs_del_rules+0x44/0x1a0 [mlx5_core] [] mlx5e_arfs_disable+0xe/0x20 [mlx5_core] [] mlx5e_handle_feature+0x3d/0xb0 [mlx5_core] [] ? __rtnl_unlock+0x25/0x50 [] mlx5e_set_features+0xfe/0x160 [mlx5_core] [] __netdev_update_features+0x278/0xa50 [] ? netdev_run_todo+0x5e/0x2a0 [] netdev_update_features+0x22/0x70 [] ? _cond_resched+0x15/0x30 [] mlx5e_attach_netdev+0x12a/0x1e0 [mlx5_core] [] mlx5e_netdev_attach_profile+0xa1/0xc0 [mlx5_core] [] mlx5e_netdev_change_profile+0x77/0xe0 [mlx5_core] [] mlx5e_vport_rep_load+0x1ed/0x290 [mlx5_core] [] mlx5_esw_offloads_rep_load+0x88/0xd0 [mlx5_core] [] esw_offloads_load_rep.part.38+0x31/0x50 [mlx5_core] [] esw_offloads_enable+0x6c5/0x710 [mlx5_core] [] mlx5_eswitch_enable_locked+0x1bb/0x290 [mlx5_core] [] mlx5_devlink_eswitch_mode_set+0x14f/0x320 [mlx5_core] [] devlink_nl_cmd_eswitch_set_doit+0x94/0x120 [] genl_family_rcv_msg_doit.isra.17+0x113/0x150 [] genl_family_rcv_msg+0xb7/0x170 [] ? devlink_nl_cmd_port_split_doit+0x100/0x100 [] genl_rcv_msg+0x47/0xa0 [] ? genl_family_rcv_msg+0x170/0x170 [] netlink_rcv_skb+0x4c/0x130 [] genl_rcv+0x24/0x40 [] netlink_unicast+0x19a/0x230 [] netlink_sendmsg+0x204/0x3d0 [] sock_sendmsg+0x50/0x60 Fixes: 90b22b9bcd24 ("net/mlx5e: Disable Rx ntuple offload for uplink representor") Signed-off-by: Amir Tzin Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 933a7772a7a3..5aa51d74f8b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -135,6 +135,16 @@ static void arfs_del_rules(struct mlx5e_flow_steering *fs); int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs) { + /* Moving to switchdev mode, fs->arfs is freed by mlx5e_nic_profile + * cleanup_rx callback and it is not recreated when + * mlx5e_uplink_rep_profile is loaded as mlx5e_create_flow_steering() + * is not called by the uplink_rep profile init_rx callback. Thus, if + * ntuple is set, moving to switchdev flow will enter this function + * with fs->arfs nullified. + */ + if (!mlx5e_fs_get_arfs(fs)) + return 0; + arfs_del_rules(fs); return arfs_disable(fs); From d03b6e6f31820b84f7449cca022047f36c42bc3f Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Mon, 3 Jul 2023 08:28:16 +0000 Subject: [PATCH 08/15] net/mlx5e: Move representor neigh cleanup to profile cleanup_tx For IP tunnel encapsulation in ECMP (Equal-Cost Multipath) mode, as the flow is duplicated to the peer eswitch, the related neighbour information on the peer uplink representor is created as well. In the cited commit, eswitch devcom unpair is moved to uplink unload API, specifically the profile->cleanup_tx. If there is a encap rule offloaded in ECMP mode, when one eswitch does unpair (because of unloading the driver, for instance), and the peer rule from the peer eswitch is going to be deleted, the use-after-free error is triggered while accessing neigh info, as it is already cleaned up in uplink's profile->disable, which is before its profile->cleanup_tx. To fix this issue, move the neigh cleanup to profile's cleanup_tx callback, and after mlx5e_cleanup_uplink_rep_tx is called. The neigh init is moved to init_tx for symmeter. [ 2453.376299] BUG: KASAN: slab-use-after-free in mlx5e_rep_neigh_entry_release+0x109/0x3a0 [mlx5_core] [ 2453.379125] Read of size 4 at addr ffff888127af9008 by task modprobe/2496 [ 2453.381542] CPU: 7 PID: 2496 Comm: modprobe Tainted: G B 6.4.0-rc7+ #15 [ 2453.383386] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 2453.384335] Call Trace: [ 2453.384625] [ 2453.384891] dump_stack_lvl+0x33/0x50 [ 2453.385285] print_report+0xc2/0x610 [ 2453.385667] ? __virt_addr_valid+0xb1/0x130 [ 2453.386091] ? mlx5e_rep_neigh_entry_release+0x109/0x3a0 [mlx5_core] [ 2453.386757] kasan_report+0xae/0xe0 [ 2453.387123] ? mlx5e_rep_neigh_entry_release+0x109/0x3a0 [mlx5_core] [ 2453.387798] mlx5e_rep_neigh_entry_release+0x109/0x3a0 [mlx5_core] [ 2453.388465] mlx5e_rep_encap_entry_detach+0xa6/0xe0 [mlx5_core] [ 2453.389111] mlx5e_encap_dealloc+0xa7/0x100 [mlx5_core] [ 2453.389706] mlx5e_tc_tun_encap_dests_unset+0x61/0xb0 [mlx5_core] [ 2453.390361] mlx5_free_flow_attr_actions+0x11e/0x340 [mlx5_core] [ 2453.391015] ? complete_all+0x43/0xd0 [ 2453.391398] ? free_flow_post_acts+0x38/0x120 [mlx5_core] [ 2453.392004] mlx5e_tc_del_fdb_flow+0x4ae/0x690 [mlx5_core] [ 2453.392618] mlx5e_tc_del_fdb_peers_flow+0x308/0x370 [mlx5_core] [ 2453.393276] mlx5e_tc_clean_fdb_peer_flows+0xf5/0x140 [mlx5_core] [ 2453.393925] mlx5_esw_offloads_unpair+0x86/0x540 [mlx5_core] [ 2453.394546] ? mlx5_esw_offloads_set_ns_peer.isra.0+0x180/0x180 [mlx5_core] [ 2453.395268] ? down_write+0xaa/0x100 [ 2453.395652] mlx5_esw_offloads_devcom_event+0x203/0x530 [mlx5_core] [ 2453.396317] mlx5_devcom_send_event+0xbb/0x190 [mlx5_core] [ 2453.396917] mlx5_esw_offloads_devcom_cleanup+0xb0/0xd0 [mlx5_core] [ 2453.397582] mlx5e_tc_esw_cleanup+0x42/0x120 [mlx5_core] [ 2453.398182] mlx5e_rep_tc_cleanup+0x15/0x30 [mlx5_core] [ 2453.398768] mlx5e_cleanup_rep_tx+0x6c/0x80 [mlx5_core] [ 2453.399367] mlx5e_detach_netdev+0xee/0x120 [mlx5_core] [ 2453.399957] mlx5e_netdev_change_profile+0x84/0x170 [mlx5_core] [ 2453.400598] mlx5e_vport_rep_unload+0xe0/0xf0 [mlx5_core] [ 2453.403781] mlx5_eswitch_unregister_vport_reps+0x15e/0x190 [mlx5_core] [ 2453.404479] ? mlx5_eswitch_register_vport_reps+0x200/0x200 [mlx5_core] [ 2453.405170] ? up_write+0x39/0x60 [ 2453.405529] ? kernfs_remove_by_name_ns+0xb7/0xe0 [ 2453.405985] auxiliary_bus_remove+0x2e/0x40 [ 2453.406405] device_release_driver_internal+0x243/0x2d0 [ 2453.406900] ? kobject_put+0x42/0x2d0 [ 2453.407284] bus_remove_device+0x128/0x1d0 [ 2453.407687] device_del+0x240/0x550 [ 2453.408053] ? waiting_for_supplier_show+0xe0/0xe0 [ 2453.408511] ? kobject_put+0xfa/0x2d0 [ 2453.408889] ? __kmem_cache_free+0x14d/0x280 [ 2453.409310] mlx5_rescan_drivers_locked.part.0+0xcd/0x2b0 [mlx5_core] [ 2453.409973] mlx5_unregister_device+0x40/0x50 [mlx5_core] [ 2453.410561] mlx5_uninit_one+0x3d/0x110 [mlx5_core] [ 2453.411111] remove_one+0x89/0x130 [mlx5_core] [ 2453.411628] pci_device_remove+0x59/0xf0 [ 2453.412026] device_release_driver_internal+0x243/0x2d0 [ 2453.412511] ? parse_option_str+0x14/0x90 [ 2453.412915] driver_detach+0x7b/0xf0 [ 2453.413289] bus_remove_driver+0xb5/0x160 [ 2453.413685] pci_unregister_driver+0x3f/0xf0 [ 2453.414104] mlx5_cleanup+0xc/0x20 [mlx5_core] Fixes: 2be5bd42a5bb ("net/mlx5: Handle pairing of E-switch via uplink un/load APIs") Signed-off-by: Jianbo Liu Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 0b265a3f9b76..99b3843396f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1160,6 +1160,10 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) return err; } + err = mlx5e_rep_neigh_init(rpriv); + if (err) + goto err_neigh_init; + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { err = mlx5e_init_uplink_rep_tx(rpriv); if (err) @@ -1176,6 +1180,8 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) if (rpriv->rep->vport == MLX5_VPORT_UPLINK) mlx5e_cleanup_uplink_rep_tx(rpriv); err_init_tx: + mlx5e_rep_neigh_cleanup(rpriv); +err_neigh_init: mlx5e_destroy_tises(priv); return err; } @@ -1189,22 +1195,17 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) if (rpriv->rep->vport == MLX5_VPORT_UPLINK) mlx5e_cleanup_uplink_rep_tx(rpriv); + mlx5e_rep_neigh_cleanup(rpriv); mlx5e_destroy_tises(priv); } static void mlx5e_rep_enable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; - mlx5e_set_netdev_mtu_boundaries(priv); - mlx5e_rep_neigh_init(rpriv); } static void mlx5e_rep_disable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; - - mlx5e_rep_neigh_cleanup(rpriv); } static int mlx5e_update_rep_rx(struct mlx5e_priv *priv) @@ -1254,7 +1255,6 @@ static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; u16 max_mtu; @@ -1276,7 +1276,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) mlx5_notifier_register(mdev, &priv->events_nb); mlx5e_dcbnl_initialize(priv); mlx5e_dcbnl_init_app(priv); - mlx5e_rep_neigh_init(rpriv); mlx5e_rep_bridge_init(priv); netdev->wanted_features |= NETIF_F_HW_TC; @@ -1291,7 +1290,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_core_dev *mdev = priv->mdev; rtnl_lock(); @@ -1301,7 +1299,6 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) rtnl_unlock(); mlx5e_rep_bridge_cleanup(priv); - mlx5e_rep_neigh_cleanup(rpriv); mlx5e_dcbnl_delete_app(priv); mlx5_notifier_unregister(mdev, &priv->events_nb); mlx5e_rep_tc_disable(priv); From e0f52298fee449fec37e3e3c32df60008b509b16 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 18 Jul 2023 11:13:33 +0300 Subject: [PATCH 09/15] net/mlx5e: xsk: Fix invalid buffer access for legacy rq The below crash can be encountered when using xdpsock in rx mode for legacy rq: the buffer gets released in the XDP_REDIRECT path, and then once again in the driver. This fix sets the flag to avoid releasing on the driver side. XSK handling of buffers for legacy rq was relying on the caller to set the skip release flag. But the referenced fix started using fragment counts for pages instead of the skip flag. Crash log: general protection fault, probably for non-canonical address 0xffff8881217e3a: 0000 [#1] SMP CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 6.5.0-rc1+ #31 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:bpf_prog_03b13f331978c78c+0xf/0x28 Code: ... RSP: 0018:ffff88810082fc98 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff888138404901 RCX: c0ffffc900027cbc RDX: ffffffffa000b514 RSI: 00ffff8881217e32 RDI: ffff888138404901 RBP: ffff88810082fc98 R08: 0000000000091100 R09: 0000000000000006 R10: 0000000000000800 R11: 0000000000000800 R12: ffffc9000027a000 R13: ffff8881217e2dc0 R14: ffff8881217e2910 R15: ffff8881217e2f00 FS: 0000000000000000(0000) GS:ffff88852c800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000564cb2e2cde0 CR3: 000000010e603004 CR4: 0000000000370eb0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? die_addr+0x32/0x80 ? exc_general_protection+0x192/0x390 ? asm_exc_general_protection+0x22/0x30 ? 0xffffffffa000b514 ? bpf_prog_03b13f331978c78c+0xf/0x28 mlx5e_xdp_handle+0x48/0x670 [mlx5_core] ? dev_gro_receive+0x3b5/0x6e0 mlx5e_xsk_skb_from_cqe_linear+0x6e/0x90 [mlx5_core] mlx5e_handle_rx_cqe+0x55/0x100 [mlx5_core] mlx5e_poll_rx_cq+0x87/0x6e0 [mlx5_core] mlx5e_napi_poll+0x45e/0x6b0 [mlx5_core] __napi_poll+0x25/0x1a0 net_rx_action+0x28a/0x300 __do_softirq+0xcd/0x279 ? sort_range+0x20/0x20 run_ksoftirqd+0x1a/0x20 smpboot_thread_fn+0xa2/0x130 kthread+0xc9/0xf0 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 Modules linked in: mlx5_ib mlx5_core rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm ib_uverbs ib_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter overlay zram zsmalloc fuse [last unloaded: mlx5_core] ---[ end trace 0000000000000000 ]--- Fixes: 7abd955a58fb ("net/mlx5e: RX, Fix page_pool page fragment tracking for XDP") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index d97e6df66f45..b8dd74453655 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -323,8 +323,11 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, net_prefetch(mxbuf->xdp.data); prog = rcu_dereference(rq->xdp_prog); - if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) + if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) { + if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) + wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); return NULL; /* page/packet was consumed by XDP */ + } /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse * will be handled by mlx5e_free_rx_wqe. From 39646d9bcd1a65d2396328026626859a1dab59d7 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 24 Apr 2023 18:19:00 +0300 Subject: [PATCH 10/15] net/mlx5e: xsk: Fix crash on regular rq reactivation When the regular rq is reactivated after the XSK socket is closed it could be reading stale cqes which eventually corrupts the rq. This leads to no more traffic being received on the regular rq and a crash on the next close or deactivation of the rq. Kal Cuttler Conely reported this issue as a crash on the release path when the xdpsock sample program is stopped (killed) and restarted in sequence while traffic is running. This patch flushes all cqes when during the rq flush. The cqe flushing is done in the reset state of the rq. mlx5e_rq_to_ready code is moved into the flush function to allow for this. Fixes: 082a9edf12fe ("net/mlx5e: xsk: Flush RQ on XSK activation to save memory") Reported-by: Kal Cutter Conley Closes: https://lore.kernel.org/xdp-newbies/CAHApi-nUAs4TeFWUDV915CZJo07XVg2Vp63-no7UDfj6wur9nQ@mail.gmail.com Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index defb1efccb78..1c820119e438 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1036,7 +1036,23 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_s return err; } -static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) +static void mlx5e_flush_rq_cq(struct mlx5e_rq *rq) +{ + struct mlx5_cqwq *cqwq = &rq->cq.wq; + struct mlx5_cqe64 *cqe; + + if (test_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state)) { + while ((cqe = mlx5_cqwq_get_cqe_enahnced_comp(cqwq))) + mlx5_cqwq_pop(cqwq); + } else { + while ((cqe = mlx5_cqwq_get_cqe(cqwq))) + mlx5_cqwq_pop(cqwq); + } + + mlx5_cqwq_update_db_record(cqwq); +} + +int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state) { struct net_device *dev = rq->netdev; int err; @@ -1046,6 +1062,10 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn); return err; } + + mlx5e_free_rx_descs(rq); + mlx5e_flush_rq_cq(rq); + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err) { netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn); @@ -1055,13 +1075,6 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) return 0; } -int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state) -{ - mlx5e_free_rx_descs(rq); - - return mlx5e_rq_to_ready(rq, curr_state); -} - static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) { struct mlx5_core_dev *mdev = rq->mdev; From eb02b93aad952008f1692cee5c5b13001e908407 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 27 Jun 2023 09:26:56 +0200 Subject: [PATCH 11/15] net/mlx5: Bridge, set debugfs access right to root-only As suggested during code review set the access rights for bridge 'fdb' debugfs file to root-only. Fixes: 791eb78285e8 ("net/mlx5: Bridge, expose FDB state via debugfs") Reported-by: Jakub Kicinski Link: https://lore.kernel.org/netdev/20230619120515.5045132a@kernel.org/ Signed-off-by: Vlad Buslov Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c index b6a45eff28f5..dbd7cbe6cbf3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c @@ -64,7 +64,7 @@ void mlx5_esw_bridge_debugfs_init(struct net_device *br_netdev, struct mlx5_esw_ bridge->debugfs_dir = debugfs_create_dir(br_netdev->name, bridge->br_offloads->debugfs_root); - debugfs_create_file("fdb", 0444, bridge->debugfs_dir, bridge, + debugfs_create_file("fdb", 0400, bridge->debugfs_dir, bridge, &mlx5_esw_bridge_debugfs_fops); } From 3e4cf1dd2ce413f4be3e2c9062fb470e2ad2be88 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Mon, 8 May 2023 03:36:10 +0000 Subject: [PATCH 12/15] net/mlx5e: kTLS, Fix protection domain in use syndrome when devlink reload There are DEK objects cached in DEK pool after kTLS is used, and they are freed only in mlx5e_ktls_cleanup(). mlx5e_destroy_mdev_resources() is called in mlx5e_suspend() to free mdev resources, including protection domain (PD). However, PD is still referenced by the cached DEK objects in this case, because profile->cleanup() (and therefore mlx5e_ktls_cleanup()) is called after mlx5e_suspend() during devlink reload. So the following FW syndrome is generated: mlx5_cmd_out_err:803:(pid 12948): DEALLOC_PD(0x801) op_mod(0x0) failed, status bad resource state(0x9), syndrome (0xef0c8a), err(-22) To avoid this syndrome, move DEK pool destruction to mlx5e_ktls_cleanup_tx(), which is called by profile->cleanup_tx(). And move pool creation to mlx5e_ktls_init_tx() for symmetry. Fixes: f741db1a5171 ("net/mlx5e: kTLS, Improve connection rate by using fast update encryption key") Signed-off-by: Jianbo Liu Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en_accel/ktls.c | 8 ----- .../mellanox/mlx5/core/en_accel/ktls_tx.c | 29 +++++++++++++++++-- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c index cf704f106b7c..984fa04bd331 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -188,7 +188,6 @@ static void mlx5e_tls_debugfs_init(struct mlx5e_tls *tls, int mlx5e_ktls_init(struct mlx5e_priv *priv) { - struct mlx5_crypto_dek_pool *dek_pool; struct mlx5e_tls *tls; if (!mlx5e_is_ktls_device(priv->mdev)) @@ -199,12 +198,6 @@ int mlx5e_ktls_init(struct mlx5e_priv *priv) return -ENOMEM; tls->mdev = priv->mdev; - dek_pool = mlx5_crypto_dek_pool_create(priv->mdev, MLX5_ACCEL_OBJ_TLS_KEY); - if (IS_ERR(dek_pool)) { - kfree(tls); - return PTR_ERR(dek_pool); - } - tls->dek_pool = dek_pool; priv->tls = tls; mlx5e_tls_debugfs_init(tls, priv->dfs_root); @@ -222,7 +215,6 @@ void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) debugfs_remove_recursive(tls->debugfs.dfs); tls->debugfs.dfs = NULL; - mlx5_crypto_dek_pool_destroy(tls->dek_pool); kfree(priv->tls); priv->tls = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index efb2cf74ad6a..d61be26a4df1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -908,28 +908,51 @@ static void mlx5e_tls_tx_debugfs_init(struct mlx5e_tls *tls, int mlx5e_ktls_init_tx(struct mlx5e_priv *priv) { + struct mlx5_crypto_dek_pool *dek_pool; struct mlx5e_tls *tls = priv->tls; + int err; + + if (!mlx5e_is_ktls_device(priv->mdev)) + return 0; + + /* DEK pool could be used by either or both of TX and RX. But we have to + * put the creation here to avoid syndrome when doing devlink reload. + */ + dek_pool = mlx5_crypto_dek_pool_create(priv->mdev, MLX5_ACCEL_OBJ_TLS_KEY); + if (IS_ERR(dek_pool)) + return PTR_ERR(dek_pool); + tls->dek_pool = dek_pool; if (!mlx5e_is_ktls_tx(priv->mdev)) return 0; priv->tls->tx_pool = mlx5e_tls_tx_pool_init(priv->mdev, &priv->tls->sw_stats); - if (!priv->tls->tx_pool) - return -ENOMEM; + if (!priv->tls->tx_pool) { + err = -ENOMEM; + goto err_tx_pool_init; + } mlx5e_tls_tx_debugfs_init(tls, tls->debugfs.dfs); return 0; + +err_tx_pool_init: + mlx5_crypto_dek_pool_destroy(dek_pool); + return err; } void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv) { if (!mlx5e_is_ktls_tx(priv->mdev)) - return; + goto dek_pool_destroy; debugfs_remove_recursive(priv->tls->debugfs.dfs_tx); priv->tls->debugfs.dfs_tx = NULL; mlx5e_tls_tx_pool_cleanup(priv->tls->tx_pool); priv->tls->tx_pool = NULL; + +dek_pool_destroy: + if (mlx5e_is_ktls_device(priv->mdev)) + mlx5_crypto_dek_pool_destroy(priv->tls->dek_pool); } From 61eab651f6e96791cfad6db45f1107c398699b2d Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Mon, 17 Jul 2023 08:32:51 +0300 Subject: [PATCH 13/15] net/mlx5: fs_chains: Fix ft prio if ignore_flow_level is not supported The cited commit sets ft prio to fs_base_prio. But if ignore_flow_level it not supported, ft prio must be set based on tc filter prio. Otherwise, all the ft prio are the same on the same chain. It is invalid if ignore_flow_level is not supported. Fix it by setting ft prio based on tc filter prio and setting fs_base_prio to 0 for fdb. Fixes: 8e80e5648092 ("net/mlx5: fs_chains: Refactor to detach chains from tc usage") Signed-off-by: Chris Mi Reviewed-by: Paul Blakey Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 93b2b94d41cd..bc04abb31f9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1436,7 +1436,6 @@ esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb) esw_init_chains_offload_flags(esw, &attr.flags); attr.ns = MLX5_FLOW_NAMESPACE_FDB; - attr.fs_base_prio = FDB_TC_OFFLOAD; attr.max_grp_num = esw->params.large_group_num; attr.default_ft = miss_fdb; attr.mapping = esw->offloads.reg_c0_obj_pool; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index db9df9798ffa..a80ecb672f33 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -178,7 +178,7 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains, if (!mlx5_chains_ignore_flow_level_supported(chains) || (chain == 0 && prio == 1 && level == 0)) { ft_attr.level = chains->fs_base_level; - ft_attr.prio = chains->fs_base_prio; + ft_attr.prio = chains->fs_base_prio + prio - 1; ns = (chains->ns == MLX5_FLOW_NAMESPACE_FDB) ? mlx5_get_fdb_sub_ns(chains->dev, chain) : mlx5_get_flow_namespace(chains->dev, chains->ns); From 62752c0bc67f79f064cbe2605054f99d52809e7b Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 14 Jun 2023 09:03:32 +0300 Subject: [PATCH 14/15] net/mlx5: DR, Fix peer domain namespace setting The offending patch is based on the assumption that for PFs, mlx5_get_dev_index() is the same as vhca_id. However, this assumption is wrong in case of DPU (ECPF). Fix it by using vhca_id directly, and switch the array of peers to xarray. Fixes: 6d5b7321d8af ("net/mlx5: DR, handle more than one peer domain") Signed-off-by: Shay Drory Reviewed-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 14 +++++++------- .../net/ethernet/mellanox/mlx5/core/fs_cmd.c | 2 +- .../net/ethernet/mellanox/mlx5/core/fs_cmd.h | 2 +- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 4 ++-- .../net/ethernet/mellanox/mlx5/core/fs_core.h | 2 +- .../mellanox/mlx5/core/steering/dr_action.c | 2 +- .../mellanox/mlx5/core/steering/dr_domain.c | 19 +++++++++++++------ .../mellanox/mlx5/core/steering/dr_ste_v0.c | 7 ++++--- .../mellanox/mlx5/core/steering/dr_ste_v1.c | 7 ++++--- .../mellanox/mlx5/core/steering/dr_types.h | 2 +- .../mellanox/mlx5/core/steering/fs_dr.c | 4 ++-- .../mellanox/mlx5/core/steering/mlx5dr.h | 2 +- 12 files changed, 38 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index bc04abb31f9c..e59380ee1ead 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2778,9 +2778,9 @@ static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw, struct mlx5_eswitch *peer_esw, bool pair) { - u8 peer_idx = mlx5_get_dev_index(peer_esw->dev); + u16 peer_vhca_id = MLX5_CAP_GEN(peer_esw->dev, vhca_id); + u16 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); struct mlx5_flow_root_namespace *peer_ns; - u8 idx = mlx5_get_dev_index(esw->dev); struct mlx5_flow_root_namespace *ns; int err; @@ -2788,18 +2788,18 @@ static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw, ns = esw->dev->priv.steering->fdb_root_ns; if (pair) { - err = mlx5_flow_namespace_set_peer(ns, peer_ns, peer_idx); + err = mlx5_flow_namespace_set_peer(ns, peer_ns, peer_vhca_id); if (err) return err; - err = mlx5_flow_namespace_set_peer(peer_ns, ns, idx); + err = mlx5_flow_namespace_set_peer(peer_ns, ns, vhca_id); if (err) { - mlx5_flow_namespace_set_peer(ns, NULL, peer_idx); + mlx5_flow_namespace_set_peer(ns, NULL, peer_vhca_id); return err; } } else { - mlx5_flow_namespace_set_peer(ns, NULL, peer_idx); - mlx5_flow_namespace_set_peer(peer_ns, NULL, idx); + mlx5_flow_namespace_set_peer(ns, NULL, peer_vhca_id); + mlx5_flow_namespace_set_peer(peer_ns, NULL, vhca_id); } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 91dcb0dcad10..aab7059bf6e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -140,7 +140,7 @@ static void mlx5_cmd_stub_modify_header_dealloc(struct mlx5_flow_root_namespace static int mlx5_cmd_stub_set_peer(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_root_namespace *peer_ns, - u8 peer_idx) + u16 peer_vhca_id) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index b6b9a5a20591..7790ae5531e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -94,7 +94,7 @@ struct mlx5_flow_cmds { int (*set_peer)(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_root_namespace *peer_ns, - u8 peer_idx); + u16 peer_vhca_id); int (*create_ns)(struct mlx5_flow_root_namespace *ns); int (*destroy_ns)(struct mlx5_flow_root_namespace *ns); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 4ef04aa28771..b4eb27e7f28b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -3621,7 +3621,7 @@ void mlx5_destroy_match_definer(struct mlx5_core_dev *dev, int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_root_namespace *peer_ns, - u8 peer_idx) + u16 peer_vhca_id) { if (peer_ns && ns->mode != peer_ns->mode) { mlx5_core_err(ns->dev, @@ -3629,7 +3629,7 @@ int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, return -EINVAL; } - return ns->cmds->set_peer(ns, peer_ns, peer_idx); + return ns->cmds->set_peer(ns, peer_ns, peer_vhca_id); } /* This function should be called only at init stage of the namespace. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 03e64c4c245d..4aed1768b85f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -303,7 +303,7 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void); int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_root_namespace *peer_ns, - u8 peer_idx); + u16 peer_vhca_id); int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns, enum mlx5_flow_steering_mode mode); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index e739ec6cdf90..54bb0866ed72 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -2079,7 +2079,7 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn, peer_vport = vhca_id_valid && mlx5_core_is_pf(dmn->mdev) && (vhca_id != dmn->info.caps.gvmi); - vport_dmn = peer_vport ? dmn->peer_dmn[vhca_id] : dmn; + vport_dmn = peer_vport ? xa_load(&dmn->peer_dmn_xa, vhca_id) : dmn; if (!vport_dmn) { mlx5dr_dbg(dmn, "No peer vport domain for given vhca_id\n"); return NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 75dc85dc24ef..3d74109f8230 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -475,6 +475,7 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) mutex_init(&dmn->info.rx.mutex); mutex_init(&dmn->info.tx.mutex); xa_init(&dmn->definers_xa); + xa_init(&dmn->peer_dmn_xa); if (dr_domain_caps_init(mdev, dmn)) { mlx5dr_err(dmn, "Failed init domain, no caps\n"); @@ -507,6 +508,7 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) uninit_caps: dr_domain_caps_uninit(dmn); def_xa_destroy: + xa_destroy(&dmn->peer_dmn_xa); xa_destroy(&dmn->definers_xa); kfree(dmn); return NULL; @@ -547,6 +549,7 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) dr_domain_uninit_csum_recalc_fts(dmn); dr_domain_uninit_resources(dmn); dr_domain_caps_uninit(dmn); + xa_destroy(&dmn->peer_dmn_xa); xa_destroy(&dmn->definers_xa); mutex_destroy(&dmn->info.tx.mutex); mutex_destroy(&dmn->info.rx.mutex); @@ -556,17 +559,21 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, struct mlx5dr_domain *peer_dmn, - u8 peer_idx) + u16 peer_vhca_id) { + struct mlx5dr_domain *peer; + mlx5dr_domain_lock(dmn); - if (dmn->peer_dmn[peer_idx]) - refcount_dec(&dmn->peer_dmn[peer_idx]->refcount); + peer = xa_load(&dmn->peer_dmn_xa, peer_vhca_id); + if (peer) + refcount_dec(&peer->refcount); - dmn->peer_dmn[peer_idx] = peer_dmn; + WARN_ON(xa_err(xa_store(&dmn->peer_dmn_xa, peer_vhca_id, peer_dmn, GFP_KERNEL))); - if (dmn->peer_dmn[peer_idx]) - refcount_inc(&dmn->peer_dmn[peer_idx]->refcount); + peer = xa_load(&dmn->peer_dmn_xa, peer_vhca_id); + if (peer) + refcount_inc(&peer->refcount); mlx5dr_domain_unlock(dmn); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c index 69d7a8f3c402..f708b029425a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c @@ -1652,17 +1652,18 @@ dr_ste_v0_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, struct mlx5dr_domain *dmn = sb->dmn; struct mlx5dr_domain *vport_dmn; u8 *bit_mask = sb->bit_mask; + struct mlx5dr_domain *peer; bool source_gvmi_set; DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn); if (sb->vhca_id_valid) { + peer = xa_load(&dmn->peer_dmn_xa, id); /* Find port GVMI based on the eswitch_owner_vhca_id */ if (id == dmn->info.caps.gvmi) vport_dmn = dmn; - else if (id < MLX5_MAX_PORTS && dmn->peer_dmn[id] && - (id == dmn->peer_dmn[id]->info.caps.gvmi)) - vport_dmn = dmn->peer_dmn[id]; + else if (peer && (id == peer->info.caps.gvmi)) + vport_dmn = peer; else return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c index f4ef0b22b991..dd856cde188d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@ -1984,16 +1984,17 @@ static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, struct mlx5dr_domain *dmn = sb->dmn; struct mlx5dr_domain *vport_dmn; u8 *bit_mask = sb->bit_mask; + struct mlx5dr_domain *peer; DR_STE_SET_TAG(src_gvmi_qp_v1, tag, source_qp, misc, source_sqn); if (sb->vhca_id_valid) { + peer = xa_load(&dmn->peer_dmn_xa, id); /* Find port GVMI based on the eswitch_owner_vhca_id */ if (id == dmn->info.caps.gvmi) vport_dmn = dmn; - else if (id < MLX5_MAX_PORTS && dmn->peer_dmn[id] && - (id == dmn->peer_dmn[id]->info.caps.gvmi)) - vport_dmn = dmn->peer_dmn[id]; + else if (peer && (id == peer->info.caps.gvmi)) + vport_dmn = peer; else return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 1622dbbe6b97..6c59de3e28f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -935,7 +935,6 @@ struct mlx5dr_domain_info { }; struct mlx5dr_domain { - struct mlx5dr_domain *peer_dmn[MLX5_MAX_PORTS]; struct mlx5_core_dev *mdev; u32 pdn; struct mlx5_uars_page *uar; @@ -956,6 +955,7 @@ struct mlx5dr_domain { struct list_head dbg_tbl_list; struct mlx5dr_dbg_dump_info dump_info; struct xarray definers_xa; + struct xarray peer_dmn_xa; /* memory management statistics */ u32 num_buddies[DR_ICM_TYPE_MAX]; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 6aac5f006bf8..feb307fb3440 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -781,14 +781,14 @@ static int mlx5_cmd_dr_update_fte(struct mlx5_flow_root_namespace *ns, static int mlx5_cmd_dr_set_peer(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_root_namespace *peer_ns, - u8 peer_idx) + u16 peer_vhca_id) { struct mlx5dr_domain *peer_domain = NULL; if (peer_ns) peer_domain = peer_ns->fs_dr_domain.dr_domain; mlx5dr_domain_set_peer(ns->fs_dr_domain.dr_domain, - peer_domain, peer_idx); + peer_domain, peer_vhca_id); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index 24cbb33ecd6c..89fced86936f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -49,7 +49,7 @@ int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags); void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, struct mlx5dr_domain *peer_dmn, - u8 peer_idx); + u16 peer_vhca_id); struct mlx5dr_table * mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags, From 53d737dfd3d7b023fa9fa445ea3f3db0ac9da402 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 25 Jun 2023 11:07:38 +0300 Subject: [PATCH 15/15] net/mlx5: Unregister devlink params in case interface is down Currently, in case an interface is down, mlx5 driver doesn't unregister its devlink params, which leads to this WARN[1]. Fix it by unregistering devlink params in that case as well. [1] [ 295.244769 ] WARNING: CPU: 15 PID: 1 at net/core/devlink.c:9042 devlink_free+0x174/0x1fc [ 295.488379 ] CPU: 15 PID: 1 Comm: shutdown Tainted: G S OE 5.15.0-1017.19.3.g0677e61-bluefield #g0677e61 [ 295.509330 ] Hardware name: https://www.mellanox.com BlueField SoC/BlueField SoC, BIOS 4.2.0.12761 Jun 6 2023 [ 295.543096 ] pc : devlink_free+0x174/0x1fc [ 295.551104 ] lr : mlx5_devlink_free+0x18/0x2c [mlx5_core] [ 295.561816 ] sp : ffff80000809b850 [ 295.711155 ] Call trace: [ 295.716030 ] devlink_free+0x174/0x1fc [ 295.723346 ] mlx5_devlink_free+0x18/0x2c [mlx5_core] [ 295.733351 ] mlx5_sf_dev_remove+0x98/0xb0 [mlx5_core] [ 295.743534 ] auxiliary_bus_remove+0x2c/0x50 [ 295.751893 ] __device_release_driver+0x19c/0x280 [ 295.761120 ] device_release_driver+0x34/0x50 [ 295.769649 ] bus_remove_device+0xdc/0x170 [ 295.777656 ] device_del+0x17c/0x3a4 [ 295.784620 ] mlx5_sf_dev_remove+0x28/0xf0 [mlx5_core] [ 295.794800 ] mlx5_sf_dev_table_destroy+0x98/0x110 [mlx5_core] [ 295.806375 ] mlx5_unload+0x34/0xd0 [mlx5_core] [ 295.815339 ] mlx5_unload_one+0x70/0xe4 [mlx5_core] [ 295.824998 ] shutdown+0xb0/0xd8 [mlx5_core] [ 295.833439 ] pci_device_shutdown+0x3c/0xa0 [ 295.841651 ] device_shutdown+0x170/0x340 [ 295.849486 ] __do_sys_reboot+0x1f4/0x2a0 [ 295.857322 ] __arm64_sys_reboot+0x2c/0x40 [ 295.865329 ] invoke_syscall+0x78/0x100 [ 295.872817 ] el0_svc_common.constprop.0+0x54/0x184 [ 295.882392 ] do_el0_svc+0x30/0xac [ 295.889008 ] el0_svc+0x48/0x160 [ 295.895278 ] el0t_64_sync_handler+0xa4/0x130 [ 295.903807 ] el0t_64_sync+0x1a4/0x1a8 [ 295.911120 ] ---[ end trace 4f1d2381d00d9dce ]--- Fixes: fe578cbb2f05 ("net/mlx5: Move devlink registration before mlx5_load") Signed-off-by: Shay Drory Reviewed-by: Maher Sanalla Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 88dbea6631d5..f42abc2ea73c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1506,6 +1506,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev) if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { mlx5_core_warn(dev, "%s: interface is down, NOP\n", __func__); + mlx5_devlink_params_unregister(priv_to_devlink(dev)); mlx5_cleanup_once(dev); goto out; }