From 884abe45a9014d0de2e6edb0630dfd64f23f1d1b Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 28 Jun 2023 08:59:34 +0800 Subject: [PATCH 1/9] net/mlx5e: fix double free in mlx5e_destroy_flow_table In function accel_fs_tcp_create_groups(), when the ft->g memory is successfully allocated but the 'in' memory fails to be allocated, the memory pointed to by ft->g is released once. And in function accel_fs_tcp_create_table, mlx5e_destroy_flow_table is called to release the memory pointed to by ft->g again. This will cause double free problem. Fixes: c062d52ac24c ("net/mlx5e: Receive flow steering framework for accelerated TCP flows") Signed-off-by: Zhengchao Shao Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index 88a5aed9d678..c7d191f66ad1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -190,6 +190,7 @@ static int accel_fs_tcp_create_groups(struct mlx5e_flow_table *ft, in = kvzalloc(inlen, GFP_KERNEL); if (!in || !ft->g) { kfree(ft->g); + ft->g = NULL; kvfree(in); return -ENOMEM; } From 3250affdc658557a41df9c5fb567723e421f8bf2 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 30 Jun 2023 09:49:02 +0800 Subject: [PATCH 2/9] net/mlx5e: fix memory leak in mlx5e_fs_tt_redirect_any_create The memory pointed to by the fs->any pointer is not freed in the error path of mlx5e_fs_tt_redirect_any_create, which can lead to a memory leak. Fix by freeing the memory in the error path, thereby making the error path identical to mlx5e_fs_tt_redirect_any_destroy(). Fixes: 0f575c20bf06 ("net/mlx5e: Introduce Flow Steering ANY API") Signed-off-by: Zhengchao Shao Reviewed-by: Simon Horman Reviewed-by: Rahul Rameshbabu Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c index 03cb79adf912..be83ad9db82a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c @@ -594,7 +594,7 @@ int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs) err = fs_any_create_table(fs); if (err) - return err; + goto err_free_any; err = fs_any_enable(fs); if (err) @@ -606,8 +606,8 @@ int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs) err_destroy_table: fs_any_destroy_table(fs_any); - - kfree(fs_any); +err_free_any: mlx5e_fs_set_any(fs, NULL); + kfree(fs_any); return err; } From d543b649ffe58a0cb4b6948b3305069c5980a1fa Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 30 Jun 2023 09:49:03 +0800 Subject: [PATCH 3/9] net/mlx5e: fix memory leak in mlx5e_ptp_open When kvzalloc_node or kvzalloc failed in mlx5e_ptp_open, the memory pointed by "c" or "cparams" is not freed, which can lead to a memory leak. Fix by freeing the array in the error path. Fixes: 145e5637d941 ("net/mlx5e: Add TX PTP port object support") Signed-off-by: Zhengchao Shao Reviewed-by: Rahul Rameshbabu Reviewed-by: Gal Pressman Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 3cbebfba582b..b0b429a0321e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -729,8 +729,10 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, c = kvzalloc_node(sizeof(*c), GFP_KERNEL, dev_to_node(mlx5_core_dma_dev(mdev))); cparams = kvzalloc(sizeof(*cparams), GFP_KERNEL); - if (!c || !cparams) - return -ENOMEM; + if (!c || !cparams) { + err = -ENOMEM; + goto err_free; + } c->priv = priv; c->mdev = priv->mdev; From 2e2d1965794d22fbe86df45bf4f933216743577d Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 22 May 2023 21:18:53 +0300 Subject: [PATCH 4/9] net/mlx5e: RX, Fix flush and close release flow of regular rq for legacy rq Regular (non-XSK) RQs get flushed on XSK setup and re-activated on XSK close. If the same regular RQ is closed (a config change for example) soon after the XSK close, a double release occurs because the missing wqes get released a second time. Fixes: 3f93f82988bc ("net/mlx5e: RX, Defer page release in legacy rq for better recycling") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 704b022cd1f0..a9575219e455 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -390,10 +390,18 @@ static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix); - if (rq->xsk_pool) + if (rq->xsk_pool) { mlx5e_xsk_free_rx_wqe(wi); - else + } else { mlx5e_free_rx_wqe(rq, wi); + + /* Avoid a second release of the wqe pages: dealloc is called + * for the same missing wqes on regular RQ flush and on regular + * RQ close. This happens when XSK RQs come into play. + */ + for (int i = 0; i < rq->wqe.info.num_frags; i++, wi++) + wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + } } static void mlx5e_xsk_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) From 631079e08aa4a20b73e70de4cf457886194f029f Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Jun 2023 20:36:41 -0700 Subject: [PATCH 5/9] net/mlx5: Register a unique thermal zone per device Prior to this patch only one "mlx5" thermal zone could have been registered regardless of the number of individual mlx5 devices in the system. To fix this setup a unique name per device to register its own thermal zone. In order to not register a thermal zone for a virtual device (VF/SF) add a check for PF device type. The new name is a concatenation between "mlx5_" and "", which will also help associating a thermal zone with its PCI device. $ lspci | grep ConnectX 00:04.0 Ethernet controller: Mellanox Technologies MT2892 Family [ConnectX-6 Dx] 00:05.0 Ethernet controller: Mellanox Technologies MT2892 Family [ConnectX-6 Dx] $ cat /sys/devices/virtual/thermal/thermal_zone0/type mlx5_0000:00:04.0 $ cat /sys/devices/virtual/thermal/thermal_zone1/type mlx5_0000:00:05.0 Fixes: c1fef618d611 ("net/mlx5: Implement thermal zone") CC: Sandipan Patra Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/thermal.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/thermal.c b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c index 20bb5eb266c1..52199d39657e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/thermal.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c @@ -68,14 +68,19 @@ static struct thermal_zone_device_ops mlx5_thermal_ops = { int mlx5_thermal_init(struct mlx5_core_dev *mdev) { + char data[THERMAL_NAME_LENGTH]; struct mlx5_thermal *thermal; - struct thermal_zone_device *tzd; - const char *data = "mlx5"; + int err; - tzd = thermal_zone_get_zone_by_name(data); - if (!IS_ERR(tzd)) + if (!mlx5_core_is_pf(mdev) && !mlx5_core_is_ecpf(mdev)) return 0; + err = snprintf(data, sizeof(data), "mlx5_%s", dev_name(mdev->device)); + if (err < 0 || err >= sizeof(data)) { + mlx5_core_err(mdev, "Failed to setup thermal zone name, %d\n", err); + return -EINVAL; + } + thermal = kzalloc(sizeof(*thermal), GFP_KERNEL); if (!thermal) return -ENOMEM; @@ -89,10 +94,10 @@ int mlx5_thermal_init(struct mlx5_core_dev *mdev) &mlx5_thermal_ops, NULL, 0, MLX5_THERMAL_POLL_INT_MSEC); if (IS_ERR(thermal->tzdev)) { - dev_err(mdev->device, "Failed to register thermal zone device (%s) %ld\n", - data, PTR_ERR(thermal->tzdev)); + err = PTR_ERR(thermal->tzdev); + mlx5_core_err(mdev, "Failed to register thermal zone device (%s) %d\n", data, err); kfree(thermal); - return -EINVAL; + return err; } mdev->thermal = thermal; From 65e64640e97c0f223e77f9ea69b5a46186b93470 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 8 Jun 2023 09:32:10 +0200 Subject: [PATCH 6/9] net/mlx5e: Check for NOT_READY flag state after locking Currently the check for NOT_READY flag is performed before obtaining the necessary lock. This opens a possibility for race condition when the flow is concurrently removed from unready_flows list by the workqueue task, which causes a double-removal from the list and a crash[0]. Fix the issue by moving the flag check inside the section protected by uplink_priv->unready_flows_lock mutex. [0]: [44376.389654] general protection fault, probably for non-canonical address 0xdead000000000108: 0000 [#1] SMP [44376.391665] CPU: 7 PID: 59123 Comm: tc Not tainted 6.4.0-rc4+ #1 [44376.392984] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [44376.395342] RIP: 0010:mlx5e_tc_del_fdb_flow+0xb3/0x340 [mlx5_core] [44376.396857] Code: 00 48 8b b8 68 ce 02 00 e8 8a 4d 02 00 4c 8d a8 a8 01 00 00 4c 89 ef e8 8b 79 88 e1 48 8b 83 98 06 00 00 48 8b 93 90 06 00 00 <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 83 90 06 [44376.399167] RSP: 0018:ffff88812cc97570 EFLAGS: 00010246 [44376.399680] RAX: dead000000000122 RBX: ffff8881088e3800 RCX: ffff8881881bac00 [44376.400337] RDX: dead000000000100 RSI: ffff88812cc97500 RDI: ffff8881242f71b0 [44376.401001] RBP: ffff88811cbb0940 R08: 0000000000000400 R09: 0000000000000001 [44376.401663] R10: 0000000000000001 R11: 0000000000000000 R12: ffff88812c944000 [44376.402342] R13: ffff8881242f71a8 R14: ffff8881222b4000 R15: 0000000000000000 [44376.402999] FS: 00007f0451104800(0000) GS:ffff88852cb80000(0000) knlGS:0000000000000000 [44376.403787] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [44376.404343] CR2: 0000000000489108 CR3: 0000000123a79003 CR4: 0000000000370ea0 [44376.405004] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [44376.405665] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [44376.406339] Call Trace: [44376.406651] [44376.406939] ? die_addr+0x33/0x90 [44376.407311] ? exc_general_protection+0x192/0x390 [44376.407795] ? asm_exc_general_protection+0x22/0x30 [44376.408292] ? mlx5e_tc_del_fdb_flow+0xb3/0x340 [mlx5_core] [44376.408876] __mlx5e_tc_del_fdb_peer_flow+0xbc/0xe0 [mlx5_core] [44376.409482] mlx5e_tc_del_flow+0x42/0x210 [mlx5_core] [44376.410055] mlx5e_flow_put+0x25/0x50 [mlx5_core] [44376.410529] mlx5e_delete_flower+0x24b/0x350 [mlx5_core] [44376.411043] tc_setup_cb_reoffload+0x22/0x80 [44376.411462] fl_reoffload+0x261/0x2f0 [cls_flower] [44376.411907] ? mlx5e_rep_indr_setup_ft_cb+0x160/0x160 [mlx5_core] [44376.412481] ? mlx5e_rep_indr_setup_ft_cb+0x160/0x160 [mlx5_core] [44376.413044] tcf_block_playback_offloads+0x76/0x170 [44376.413497] tcf_block_unbind+0x7b/0xd0 [44376.413881] tcf_block_setup+0x17d/0x1c0 [44376.414269] tcf_block_offload_cmd.isra.0+0xf1/0x130 [44376.414725] tcf_block_offload_unbind+0x43/0x70 [44376.415153] __tcf_block_put+0x82/0x150 [44376.415532] ingress_destroy+0x22/0x30 [sch_ingress] [44376.415986] qdisc_destroy+0x3b/0xd0 [44376.416343] qdisc_graft+0x4d0/0x620 [44376.416706] tc_get_qdisc+0x1c9/0x3b0 [44376.417074] rtnetlink_rcv_msg+0x29c/0x390 [44376.419978] ? rep_movs_alternative+0x3a/0xa0 [44376.420399] ? rtnl_calcit.isra.0+0x120/0x120 [44376.420813] netlink_rcv_skb+0x54/0x100 [44376.421192] netlink_unicast+0x1f6/0x2c0 [44376.421573] netlink_sendmsg+0x232/0x4a0 [44376.421980] sock_sendmsg+0x38/0x60 [44376.422328] ____sys_sendmsg+0x1d0/0x1e0 [44376.422709] ? copy_msghdr_from_user+0x6d/0xa0 [44376.423127] ___sys_sendmsg+0x80/0xc0 [44376.423495] ? ___sys_recvmsg+0x8b/0xc0 [44376.423869] __sys_sendmsg+0x51/0x90 [44376.424226] do_syscall_64+0x3d/0x90 [44376.424587] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [44376.425046] RIP: 0033:0x7f045134f887 [44376.425403] Code: 0a 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 [44376.426914] RSP: 002b:00007ffd63a82b98 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [44376.427592] RAX: ffffffffffffffda RBX: 000000006481955f RCX: 00007f045134f887 [44376.428195] RDX: 0000000000000000 RSI: 00007ffd63a82c00 RDI: 0000000000000003 [44376.428796] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 [44376.429404] R10: 00007f0451208708 R11: 0000000000000246 R12: 0000000000000001 [44376.430039] R13: 0000000000409980 R14: 000000000047e538 R15: 0000000000485400 [44376.430644] [44376.430907] Modules linked in: mlx5_ib mlx5_core act_mirred act_tunnel_key cls_flower vxlan dummy sch_ingress openvswitch nsh rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm ib_uverbs ib_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_g ss_krb5 auth_rpcgss oid_registry overlay zram zsmalloc fuse [last unloaded: mlx5_core] [44376.433936] ---[ end trace 0000000000000000 ]--- [44376.434373] RIP: 0010:mlx5e_tc_del_fdb_flow+0xb3/0x340 [mlx5_core] [44376.434951] Code: 00 48 8b b8 68 ce 02 00 e8 8a 4d 02 00 4c 8d a8 a8 01 00 00 4c 89 ef e8 8b 79 88 e1 48 8b 83 98 06 00 00 48 8b 93 90 06 00 00 <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 83 90 06 [44376.436452] RSP: 0018:ffff88812cc97570 EFLAGS: 00010246 [44376.436924] RAX: dead000000000122 RBX: ffff8881088e3800 RCX: ffff8881881bac00 [44376.437530] RDX: dead000000000100 RSI: ffff88812cc97500 RDI: ffff8881242f71b0 [44376.438179] RBP: ffff88811cbb0940 R08: 0000000000000400 R09: 0000000000000001 [44376.438786] R10: 0000000000000001 R11: 0000000000000000 R12: ffff88812c944000 [44376.439393] R13: ffff8881242f71a8 R14: ffff8881222b4000 R15: 0000000000000000 [44376.439998] FS: 00007f0451104800(0000) GS:ffff88852cb80000(0000) knlGS:0000000000000000 [44376.440714] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [44376.441225] CR2: 0000000000489108 CR3: 0000000123a79003 CR4: 0000000000370ea0 [44376.441843] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [44376.442471] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: ad86755b18d5 ("net/mlx5e: Protect unready flows with dedicated lock") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 41dc26800f48..8d0a3f69693e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1639,7 +1639,8 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow) uplink_priv = &rpriv->uplink_priv; mutex_lock(&uplink_priv->unready_flows_lock); - unready_flow_del(flow); + if (flow_flag_test(flow, NOT_READY)) + unready_flow_del(flow); mutex_unlock(&uplink_priv->unready_flows_lock); } @@ -1932,8 +1933,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, esw_attr = attr->esw_attr; mlx5e_put_flow_tunnel_id(flow); - if (flow_flag_test(flow, NOT_READY)) - remove_unready_flow(flow); + remove_unready_flow(flow); if (mlx5e_is_offloaded_flow(flow)) { if (flow_flag_test(flow, SLOW)) From f7a485115ad4cfc560833942014bf791abf1f827 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 4 Jun 2023 12:45:38 +0300 Subject: [PATCH 7/9] net/mlx5e: TC, CT: Offload ct clear only once Non-clear CT action causes a flow rule split, while CT clear action doesn't and is just a header-rewrite to the current flow rule. But ct offload is done in post_parse and is per ct action instance, so ct clear offload is parsed multiple times, while its deleted once. Fix this by post_parsing the ct action only once per flow attribute (which is per flow rule) by using a offloaded ct_attr flag. Fixes: 08fe94ec5f77 ("net/mlx5e: TC, Remove special handling of CT action") Signed-off-by: Paul Blakey Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 14 +++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h | 1 + 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index a254e728ac95..fadfa8b50beb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -1545,7 +1545,8 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, attr->ct_attr.ct_action |= act->ct.action; /* So we can have clear + ct */ attr->ct_attr.zone = act->ct.zone; - attr->ct_attr.nf_ft = act->ct.flow_table; + if (!(act->ct.action & TCA_CT_ACT_CLEAR)) + attr->ct_attr.nf_ft = act->ct.flow_table; attr->ct_attr.act_miss_cookie = act->miss_cookie; return 0; @@ -1990,6 +1991,9 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *att if (!priv) return -EOPNOTSUPP; + if (attr->ct_attr.offloaded) + return 0; + if (attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR) { err = mlx5_tc_ct_entry_set_registers(priv, &attr->parse_attr->mod_hdr_acts, 0, 0, 0, 0); @@ -1999,11 +2003,15 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *att attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; } - if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */ + if (!attr->ct_attr.nf_ft) { /* means only ct clear action, and not ct_clear,ct() */ + attr->ct_attr.offloaded = true; return 0; + } mutex_lock(&priv->control_lock); err = __mlx5_tc_ct_flow_offload(priv, attr); + if (!err) + attr->ct_attr.offloaded = true; mutex_unlock(&priv->control_lock); return err; @@ -2021,7 +2029,7 @@ void mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr) { - if (!attr->ct_attr.ft) /* no ct action, return */ + if (!attr->ct_attr.offloaded) /* no ct action, return */ return; if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */ return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 8e9316fa46d4..b66c5f98067f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -29,6 +29,7 @@ struct mlx5_ct_attr { u32 ct_labels_id; u32 act_miss_mapping; u64 act_miss_cookie; + bool offloaded; struct mlx5_ct_ft *ft; }; From 6496357aa5f710eec96f91345b9da1b37c3231f6 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Tue, 20 Jun 2023 14:07:03 +0300 Subject: [PATCH 8/9] net/mlx5: Query hca_cap_2 only when supported On vport enable, where fw's hca caps are queried, the driver queries hca_caps_2 without checking if fw truly supports them, causing a false failure of vfs vport load and blocking SRIOV enablement on old devices such as CX4 where hca_caps_2 support is missing. Thus, add a check for the said caps support before accessing them. Fixes: e5b9642a33be ("net/mlx5: E-Switch, Implement devlink port function cmds to control migratable") Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index faec7d7a4400..243c455f1029 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -807,6 +807,9 @@ static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport * hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); vport->info.roce_enabled = MLX5_GET(cmd_hca_cap, hca_caps, roce); + if (!MLX5_CAP_GEN_MAX(esw->dev, hca_cap_2)) + goto out_free; + memset(query_ctx, 0, query_out_sz); err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx, MLX5_CAP_GENERAL_2); From 7abd955a58fb0fcd4e756fa2065c03ae488fcfa7 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Wed, 31 May 2023 21:18:49 +0300 Subject: [PATCH 9/9] net/mlx5e: RX, Fix page_pool page fragment tracking for XDP Currently mlx5e releases pages directly to the page_pool for XDP_TX and does page fragment counting for XDP_REDIRECT. RX pages from the page_pool are leaking on XDP_REDIRECT because the xdp core will release only one fragment out of MLX5E_PAGECNT_BIAS_MAX and subsequently the page is marked as "skip release" which avoids the driver release. A fix would be to take an extra fragment for XDP_REDIRECT and not set the "skip release" bit so that the release on the driver side can handle the remaining bias fragments. But this would be a shortsighted solution. Instead, this patch converges the two XDP paths (XDP_TX and XDP_REDIRECT) to always do fragment tracking. The "skip release" bit is no longer necessary for XDP. Fixes: 6f5742846053 ("net/mlx5e: RX, Enable skb page recycling through the page_pool") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/xdp.c | 3 +- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 32 +++++++------------ 2 files changed, 13 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index f0e6095809fa..40589cebb773 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -662,8 +662,7 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) * as we know this is a page_pool page. */ - page_pool_put_defragged_page(page->pp, - page, -1, true); + page_pool_recycle_direct(page->pp, page); } while (++n < num); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index a9575219e455..41d37159e027 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1751,11 +1751,11 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi prog = rcu_dereference(rq->xdp_prog); if (prog && mlx5e_xdp_handle(rq, prog, &mxbuf)) { - if (test_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { struct mlx5e_wqe_frag_info *pwi; for (pwi = head_wi; pwi < wi; pwi++) - pwi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + pwi->frag_page->frags++; } return NULL; /* page/packet was consumed by XDP */ } @@ -1825,12 +1825,8 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) rq, wi, cqe, cqe_bcnt); if (!skb) { /* probably for XDP */ - if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { - /* do not return page to cache, - * it will be returned on XDP_TX completion. - */ - wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); - } + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) + wi->frag_page->frags++; goto wq_cyc_pop; } @@ -1876,12 +1872,8 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) rq, wi, cqe, cqe_bcnt); if (!skb) { /* probably for XDP */ - if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { - /* do not return page to cache, - * it will be returned on XDP_TX completion. - */ - wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); - } + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) + wi->frag_page->frags++; goto wq_cyc_pop; } @@ -2060,12 +2052,12 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w if (prog) { if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { - int i; + struct mlx5e_frag_page *pfp; - for (i = 0; i < sinfo->nr_frags; i++) - /* non-atomic */ - __set_bit(page_idx + i, wi->skip_release_bitmap); - return NULL; + for (pfp = head_page; pfp < frag_page; pfp++) + pfp->frags++; + + wi->linear_page.frags++; } mlx5e_page_release_fragmented(rq, &wi->linear_page); return NULL; /* page/packet was consumed by XDP */ @@ -2163,7 +2155,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, cqe_bcnt, &mxbuf); if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) - __set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */ + frag_page->frags++; return NULL; /* page/packet was consumed by XDP */ }