From e0ee6891174c7db210e6c55cab69656c40956a6f Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 18 Aug 2021 14:17:33 +0300 Subject: [PATCH 01/15] net/mlx5e: Specify SQ stats struct for mlx5e_open_txqsq() Let the caller of mlx5e_open_txqsq() directly pass the SQ stats structure pointer. This replaces logic involving the qos_queue_group_id parameter, and helps generalizing its role in the next patch. Signed-off-by: Tariq Toukan Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/en/qos.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 +++++------ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 038b5c285730..e06c8b816a32 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1002,7 +1002,8 @@ int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, struct mlx5e_modify_sq_param *p); int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, struct mlx5e_params *params, struct mlx5e_sq_param *param, - struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, u16 qos_qid); + struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, + struct mlx5e_sq_stats *sq_stats); void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq); void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq); void mlx5e_free_txqsq(struct mlx5e_txqsq *sq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index e8a8d78e3e4d..17a607541af6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -238,7 +238,8 @@ static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs if (err) goto err_free_sq; err = mlx5e_open_txqsq(c, priv->tisn[c->lag_port][0], txq_ix, params, - ¶m_sq, sq, 0, node->hw_id, node->qid); + ¶m_sq, sq, 0, node->hw_id, + priv->htb.qos_sq_stats[node->qid]); if (err) goto err_close_cq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 005eb3c92506..9f6685d3b17b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1300,7 +1300,8 @@ static int mlx5e_set_sq_maxrate(struct net_device *dev, int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, struct mlx5e_params *params, struct mlx5e_sq_param *param, - struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, u16 qos_qid) + struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, + struct mlx5e_sq_stats *sq_stats) { struct mlx5e_create_sq_param csp = {}; u32 tx_rate; @@ -1310,10 +1311,7 @@ int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, if (err) return err; - if (qos_queue_group_id) - sq->stats = c->priv->htb.qos_sq_stats[qos_qid]; - else - sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; + sq->stats = sq_stats; csp.tisn = tisn; csp.tis_lst_sz = 1; @@ -1717,7 +1715,8 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c, int txq_ix = c->ix + tc * params->num_channels; err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix, - params, &cparam->txq_sq, &c->sq[tc], tc, 0, 0); + params, &cparam->txq_sq, &c->sq[tc], tc, 0, + &c->priv->channel_stats[c->ix].sq[tc]); if (err) goto err_close_sqs; } From 80743c4f8d34fcfdb3b6a35926316207813659ed Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 29 Sep 2021 16:30:38 +0300 Subject: [PATCH 02/15] net/mlx5e: Add TX max rate support for MQPRIO channel mode Add driver max_rate support for the MQPRIO bw_rlimit shaper in channel mode. Signed-off-by: Tariq Toukan Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 + .../net/ethernet/mellanox/mlx5/core/en/qos.c | 99 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en/qos.h | 9 ++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 97 ++++++++++++++++-- 4 files changed, 203 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index e06c8b816a32..a3a4fece0cac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -251,6 +251,9 @@ struct mlx5e_params { u16 mode; u8 num_tc; struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; + struct { + struct mlx5e_mqprio_rl *rl; + } channel; } mqprio; bool rx_cqe_compress_def; bool tunneled_offload_en; @@ -877,6 +880,7 @@ struct mlx5e_priv { #endif struct mlx5e_scratchpad scratchpad; struct mlx5e_htb htb; + struct mlx5e_mqprio_rl *mqprio_rl; }; struct mlx5e_rx_handlers { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index 17a607541af6..50977f01a050 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -7,6 +7,21 @@ #define BYTES_IN_MBIT 125000 +int mlx5e_qos_bytes_rate_check(struct mlx5_core_dev *mdev, u64 nbytes) +{ + if (nbytes < BYTES_IN_MBIT) { + qos_warn(mdev, "Input rate (%llu Bytes/sec) below minimum supported (%u Bytes/sec)\n", + nbytes, BYTES_IN_MBIT); + return -EINVAL; + } + return 0; +} + +static u32 mlx5e_qos_bytes2mbits(struct mlx5_core_dev *mdev, u64 nbytes) +{ + return div_u64(nbytes, BYTES_IN_MBIT); +} + int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev) { return min(MLX5E_QOS_MAX_LEAF_NODES, mlx5_qos_max_leaf_nodes(mdev)); @@ -980,3 +995,87 @@ int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ce return err; } + +struct mlx5e_mqprio_rl { + struct mlx5_core_dev *mdev; + u32 root_id; + u32 *leaves_id; + u8 num_tc; +}; + +struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_alloc(void) +{ + return kvzalloc(sizeof(struct mlx5e_mqprio_rl), GFP_KERNEL); +} + +void mlx5e_mqprio_rl_free(struct mlx5e_mqprio_rl *rl) +{ + kvfree(rl); +} + +int mlx5e_mqprio_rl_init(struct mlx5e_mqprio_rl *rl, struct mlx5_core_dev *mdev, u8 num_tc, + u64 max_rate[]) +{ + int err; + int tc; + + if (!mlx5_qos_is_supported(mdev)) { + qos_warn(mdev, "Missing QoS capabilities. Try disabling SRIOV or use a supported device."); + return -EOPNOTSUPP; + } + if (num_tc > mlx5e_qos_max_leaf_nodes(mdev)) + return -EINVAL; + + rl->mdev = mdev; + rl->num_tc = num_tc; + rl->leaves_id = kvcalloc(num_tc, sizeof(*rl->leaves_id), GFP_KERNEL); + if (!rl->leaves_id) + return -ENOMEM; + + err = mlx5_qos_create_root_node(mdev, &rl->root_id); + if (err) + goto err_free_leaves; + + qos_dbg(mdev, "Root created, id %#x\n", rl->root_id); + + for (tc = 0; tc < num_tc; tc++) { + u32 max_average_bw; + + max_average_bw = mlx5e_qos_bytes2mbits(mdev, max_rate[tc]); + err = mlx5_qos_create_leaf_node(mdev, rl->root_id, 0, max_average_bw, + &rl->leaves_id[tc]); + if (err) + goto err_destroy_leaves; + + qos_dbg(mdev, "Leaf[%d] created, id %#x, max average bw %u Mbits/sec\n", + tc, rl->leaves_id[tc], max_average_bw); + } + return 0; + +err_destroy_leaves: + while (--tc >= 0) + mlx5_qos_destroy_node(mdev, rl->leaves_id[tc]); + mlx5_qos_destroy_node(mdev, rl->root_id); +err_free_leaves: + kvfree(rl->leaves_id); + return err; +} + +void mlx5e_mqprio_rl_cleanup(struct mlx5e_mqprio_rl *rl) +{ + int tc; + + for (tc = 0; tc < rl->num_tc; tc++) + mlx5_qos_destroy_node(rl->mdev, rl->leaves_id[tc]); + mlx5_qos_destroy_node(rl->mdev, rl->root_id); + kvfree(rl->leaves_id); +} + +int mlx5e_mqprio_rl_get_node_hw_id(struct mlx5e_mqprio_rl *rl, int tc, u32 *hw_id) +{ + if (tc >= rl->num_tc) + return -EINVAL; + + *hw_id = rl->leaves_id[tc]; + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h index 757682b7c0e0..b7558907ba20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h @@ -12,6 +12,7 @@ struct mlx5e_priv; struct mlx5e_channels; struct mlx5e_channel; +int mlx5e_qos_bytes_rate_check(struct mlx5_core_dev *mdev, u64 nbytes); int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev); int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv); @@ -41,4 +42,12 @@ int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force, int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil, struct netlink_ext_ack *extack); +/* MQPRIO TX rate limit */ +struct mlx5e_mqprio_rl; +struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_alloc(void); +void mlx5e_mqprio_rl_free(struct mlx5e_mqprio_rl *rl); +int mlx5e_mqprio_rl_init(struct mlx5e_mqprio_rl *rl, struct mlx5_core_dev *mdev, u8 num_tc, + u64 max_rate[]); +void mlx5e_mqprio_rl_cleanup(struct mlx5e_mqprio_rl *rl); +int mlx5e_mqprio_rl_get_node_hw_id(struct mlx5e_mqprio_rl *rl, int tc, u32 *hw_id); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9f6685d3b17b..e81e5505207c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1705,6 +1705,36 @@ static void mlx5e_close_tx_cqs(struct mlx5e_channel *c) mlx5e_close_cq(&c->sq[tc].cq); } +static int mlx5e_mqprio_txq_to_tc(struct netdev_tc_txq *tc_to_txq, unsigned int txq) +{ + int tc; + + for (tc = 0; tc < TC_MAX_QUEUE; tc++) + if (txq - tc_to_txq[tc].offset < tc_to_txq[tc].count) + return tc; + + WARN(1, "Unexpected TCs configuration. No match found for txq %u", txq); + return -ENOENT; +} + +static int mlx5e_txq_get_qos_node_hw_id(struct mlx5e_params *params, int txq_ix, + u32 *hw_id) +{ + int tc; + + if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL || + !params->mqprio.channel.rl) { + *hw_id = 0; + return 0; + } + + tc = mlx5e_mqprio_txq_to_tc(params->mqprio.tc_to_txq, txq_ix); + if (tc < 0) + return tc; + + return mlx5e_mqprio_rl_get_node_hw_id(params->mqprio.channel.rl, tc, hw_id); +} + static int mlx5e_open_sqs(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_channel_param *cparam) @@ -1713,9 +1743,15 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c, for (tc = 0; tc < mlx5e_get_dcb_num_tc(params); tc++) { int txq_ix = c->ix + tc * params->num_channels; + u32 qos_queue_group_id; + + err = mlx5e_txq_get_qos_node_hw_id(params, txq_ix, &qos_queue_group_id); + if (err) + goto err_close_sqs; err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix, - params, &cparam->txq_sq, &c->sq[tc], tc, 0, + params, &cparam->txq_sq, &c->sq[tc], tc, + qos_queue_group_id, &c->priv->channel_stats[c->ix].sq[tc]); if (err) goto err_close_sqs; @@ -2341,6 +2377,13 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) netdev_warn(netdev, "netif_set_real_num_rx_queues failed, %d\n", err); goto err_txqs; } + if (priv->mqprio_rl != priv->channels.params.mqprio.channel.rl) { + if (priv->mqprio_rl) { + mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); + mlx5e_mqprio_rl_free(priv->mqprio_rl); + } + priv->mqprio_rl = priv->channels.params.mqprio.channel.rl; + } return 0; @@ -2902,15 +2945,18 @@ static void mlx5e_params_mqprio_dcb_set(struct mlx5e_params *params, u8 num_tc) { params->mqprio.mode = TC_MQPRIO_MODE_DCB; params->mqprio.num_tc = num_tc; + params->mqprio.channel.rl = NULL; mlx5e_mqprio_build_default_tc_to_txq(params->mqprio.tc_to_txq, num_tc, params->num_channels); } static void mlx5e_params_mqprio_channel_set(struct mlx5e_params *params, - struct tc_mqprio_qopt *qopt) + struct tc_mqprio_qopt *qopt, + struct mlx5e_mqprio_rl *rl) { params->mqprio.mode = TC_MQPRIO_MODE_CHANNEL; params->mqprio.num_tc = qopt->num_tc; + params->mqprio.channel.rl = rl; mlx5e_mqprio_build_tc_to_txq(params->mqprio.tc_to_txq, qopt); } @@ -2970,9 +3016,13 @@ static int mlx5e_mqprio_channel_validate(struct mlx5e_priv *priv, netdev_err(netdev, "Min tx rate is not supported\n"); return -EINVAL; } + if (mqprio->max_rate[i]) { - netdev_err(netdev, "Max tx rate is not supported\n"); - return -EINVAL; + int err; + + err = mlx5e_qos_bytes_rate_check(priv->mdev, mqprio->max_rate[i]); + if (err) + return err; } if (mqprio->qopt.offset[i] != agg_count) { @@ -2991,11 +3041,22 @@ static int mlx5e_mqprio_channel_validate(struct mlx5e_priv *priv, return 0; } +static bool mlx5e_mqprio_rate_limit(struct tc_mqprio_qopt_offload *mqprio) +{ + int tc; + + for (tc = 0; tc < mqprio->qopt.num_tc; tc++) + if (mqprio->max_rate[tc]) + return true; + return false; +} + static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv, struct tc_mqprio_qopt_offload *mqprio) { mlx5e_fp_preactivate preactivate; struct mlx5e_params new_params; + struct mlx5e_mqprio_rl *rl; bool nch_changed; int err; @@ -3003,13 +3064,32 @@ static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv, if (err) return err; + rl = NULL; + if (mlx5e_mqprio_rate_limit(mqprio)) { + rl = mlx5e_mqprio_rl_alloc(); + if (!rl) + return -ENOMEM; + err = mlx5e_mqprio_rl_init(rl, priv->mdev, mqprio->qopt.num_tc, + mqprio->max_rate); + if (err) { + mlx5e_mqprio_rl_free(rl); + return err; + } + } + new_params = priv->channels.params; - mlx5e_params_mqprio_channel_set(&new_params, &mqprio->qopt); + mlx5e_params_mqprio_channel_set(&new_params, &mqprio->qopt, rl); nch_changed = mlx5e_get_dcb_num_tc(&priv->channels.params) > 1; preactivate = nch_changed ? mlx5e_num_channels_changed_ctx : mlx5e_update_netdev_queues_ctx; - return mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, true); + err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, true); + if (err && rl) { + mlx5e_mqprio_rl_cleanup(rl); + mlx5e_mqprio_rl_free(rl); + } + + return err; } static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, @@ -4809,6 +4889,11 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv) kfree(priv->htb.qos_sq_stats[i]); kvfree(priv->htb.qos_sq_stats); + if (priv->mqprio_rl) { + mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); + mlx5e_mqprio_rl_free(priv->mqprio_rl); + } + memset(priv, 0, sizeof(*priv)); } From 61c6f0d19084578975f6344712cd1bffd5326851 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 14 Sep 2021 11:32:51 +0300 Subject: [PATCH 03/15] net/mlx5e: TC, Refactor sample offload error flow Refactor sample unoffload to be symmetric to sample offload. Use the existing del_post_rule() to release the post rule. Also mlx5e_tc_sample_unoffload() should not return post_rule which is NULL when post actions are supported. Sample offload works with this NULL because many places of the code use IS_ERR() instead of IS_ERR_OR_NULL() to check rule is valid and when rule is detected as sample offload the code is not using the rule. Let's be persistent and avoid returning NULL anyway and return the pre rule, like in CT case, which is not NULL. Signed-off-by: Roi Dayan Reviewed-by: Chris Mi Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/tc/sample.c | 21 +++++-------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c index 6552ecee3f9b..d1d7e4b9f7ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c @@ -602,7 +602,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, } sample_flow->pre_attr = pre_attr; - return sample_flow->post_rule; + return sample_flow->pre_rule; err_pre_offload_rule: kfree(pre_attr); @@ -613,7 +613,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, err_obj_id: sampler_put(tc_psample, sample_flow->sampler); err_sampler: - if (!post_act_handle) + if (sample_flow->post_rule) del_post_rule(esw, sample_flow, attr); err_post_rule: if (post_act_handle) @@ -628,9 +628,7 @@ mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample, struct mlx5_flow_handle *rule, struct mlx5_flow_attr *attr) { - struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; struct mlx5e_sample_flow *sample_flow; - struct mlx5_vport_tbl_attr tbl_attr; struct mlx5_eswitch *esw; if (IS_ERR_OR_NULL(tc_psample)) @@ -650,23 +648,14 @@ mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample, */ sample_flow = attr->sample_attr->sample_flow; mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr); - if (!sample_flow->post_act_handle) - mlx5_eswitch_del_offloaded_rule(esw, sample_flow->post_rule, - sample_flow->post_attr); sample_restore_put(tc_psample, sample_flow->restore); mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr->restore_obj_id); sampler_put(tc_psample, sample_flow->sampler); - if (sample_flow->post_act_handle) { + if (sample_flow->post_act_handle) mlx5e_tc_post_act_del(tc_psample->post_act, sample_flow->post_act_handle); - } else { - tbl_attr.chain = attr->chain; - tbl_attr.prio = attr->prio; - tbl_attr.vport = esw_attr->in_rep->vport; - tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns; - mlx5_esw_vporttbl_put(esw, &tbl_attr); - kfree(sample_flow->post_attr); - } + else + del_post_rule(esw, sample_flow, attr); kfree(sample_flow->pre_attr); kfree(sample_flow); From d9581e2fa73fadba187b2e62e05306e24e8a1ded Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Thu, 12 Aug 2021 15:46:34 +0300 Subject: [PATCH 04/15] net/mlx5e: Move mod hdr allocation to a single place Move mod hdr allocation chunk from parse_tc_fdb_actions() and parse_tc_nic_actions() to a shared function. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 87 +++++++++++-------- 1 file changed, 51 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 0e03cefc5eeb..f39589fdd48f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3354,10 +3354,50 @@ static int validate_goto_chain(struct mlx5e_priv *priv, return 0; } -static int parse_tc_nic_actions(struct mlx5e_priv *priv, - struct flow_action *flow_action, +static int +actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct pedit_headers_action *hdrs, struct netlink_ext_ack *extack) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + enum mlx5_flow_namespace_type ns_type; + int err; + + if (!hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits && + !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) + return 0; + + ns_type = get_flow_name_space(flow); + + err = alloc_tc_pedit_action(priv, ns_type, parse_attr, hdrs, + &attr->action, extack); + if (err) + return err; + + /* In case all pedit actions are skipped, remove the MOD_HDR flag. */ + if (parse_attr->mod_hdr_acts.num_actions > 0) + return 0; + + attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); + + if (ns_type != MLX5_FLOW_NAMESPACE_FDB) + return 0; + + if (!((attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || + (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))) + attr->esw_attr->split_count = 0; + + return 0; +} + +static int +parse_tc_nic_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) { struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; @@ -3467,21 +3507,6 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, } } - if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || - hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) { - err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL, - parse_attr, hdrs, &action, extack); - if (err) - return err; - /* in case all pedit actions are skipped, remove the MOD_HDR - * flag. - */ - if (parse_attr->mod_hdr_acts.num_actions == 0) { - action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); - } - } - attr->action = action; if (attr->dest_chain && parse_attr->mirred_ifindex[0]) { @@ -3489,6 +3514,10 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack); + if (err) + return err; + if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) return -EOPNOTSUPP; @@ -4043,26 +4072,12 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return err; } - if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || - hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) { - err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB, - parse_attr, hdrs, &action, extack); - if (err) - return err; - /* in case all pedit actions are skipped, remove the MOD_HDR - * flag. we might have set split_count either by pedit or - * pop/push. if there is no pop/push either, reset it too. - */ - if (parse_attr->mod_hdr_acts.num_actions == 0) { - action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); - if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || - (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))) - esw_attr->split_count = 0; - } - } - attr->action = action; + + err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack); + if (err) + return err; + if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) return -EOPNOTSUPP; From 9c1d3511a2c2fd30c991a20c670991ece4ef27c1 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 15 Aug 2021 12:53:13 +0300 Subject: [PATCH 05/15] net/mlx5e: Split actions_match_supported() into a sub function There will probably be more checks, some for nic flows, some for fdb flows and some are shared checks. Split it for fdb and nic to avoid the function getting too big. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 65 +++++++++++-------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f39589fdd48f..24b0c0e3c573 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3169,19 +3169,41 @@ static bool modify_header_match_supported(struct mlx5e_priv *priv, return true; } -static bool actions_match_supported(struct mlx5e_priv *priv, - struct flow_action *flow_action, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) +static bool +actions_match_supported_fdb(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) { - bool ct_flow = false, ct_clear = false; - u32 actions; + bool ct_flow, ct_clear; - ct_clear = flow->attr->ct_attr.ct_action & - TCA_CT_ACT_CLEAR; + ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; + ct_flow = flow_flag_test(flow, CT) && !ct_clear; + + if (flow->attr->esw_attr->split_count && ct_flow && + !MLX5_CAP_GEN(flow->attr->esw_attr->in_mdev, reg_c_preserve)) { + /* All registers used by ct are cleared when using + * split rules. + */ + NL_SET_ERR_MSG_MOD(extack, "Can't offload mirroring with action ct"); + return false; + } + + return true; +} + +static bool +actions_match_supported(struct mlx5e_priv *priv, + struct flow_action *flow_action, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + u32 actions = flow->attr->action; + bool ct_flow, ct_clear; + + ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; ct_flow = flow_flag_test(flow, CT) && !ct_clear; - actions = flow->attr->action; if (!(actions & (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { @@ -3189,23 +3211,14 @@ static bool actions_match_supported(struct mlx5e_priv *priv, return false; } - if (mlx5e_is_eswitch_flow(flow)) { - if (flow->attr->esw_attr->split_count && ct_flow && - !MLX5_CAP_GEN(flow->attr->esw_attr->in_mdev, reg_c_preserve)) { - /* All registers used by ct are cleared when using - * split rules. - */ - NL_SET_ERR_MSG_MOD(extack, - "Can't offload mirroring with action ct"); - return false; - } - } + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + !modify_header_match_supported(priv, &parse_attr->spec, flow_action, + actions, ct_flow, ct_clear, extack)) + return false; - if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - return modify_header_match_supported(priv, &parse_attr->spec, - flow_action, actions, - ct_flow, ct_clear, - extack); + if (mlx5e_is_eswitch_flow(flow) && + !actions_match_supported_fdb(priv, parse_attr, flow, extack)) + return false; return true; } From d4f401d9ab189b8283e661e57b1ac148bec147fe Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 15 Aug 2021 13:36:01 +0300 Subject: [PATCH 06/15] net/mlx5e: Move parse fdb check into actions_match_supported_fdb() The parse fdb/nic actions funcs parse the actions and then call actions_match_supported() for final check. Move related check in parse_tc_fdb_actions() into actions_match_supported_fdb() for more organized code. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 24b0c0e3c573..606a7757bb04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3175,13 +3175,14 @@ actions_match_supported_fdb(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; bool ct_flow, ct_clear; ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; ct_flow = flow_flag_test(flow, CT) && !ct_clear; - if (flow->attr->esw_attr->split_count && ct_flow && - !MLX5_CAP_GEN(flow->attr->esw_attr->in_mdev, reg_c_preserve)) { + if (esw_attr->split_count && ct_flow && + !MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve)) { /* All registers used by ct are cleared when using * split rules. */ @@ -3189,6 +3190,14 @@ actions_match_supported_fdb(struct mlx5e_priv *priv, return false; } + if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { + NL_SET_ERR_MSG_MOD(extack, + "current firmware doesn't support split rule for port mirroring"); + netdev_warn_once(priv->netdev, + "current firmware doesn't support split rule for port mirroring\n"); + return false; + } + return true; } @@ -4108,13 +4117,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { - NL_SET_ERR_MSG_MOD(extack, - "current firmware doesn't support split rule for port mirroring"); - netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n"); - return -EOPNOTSUPP; - } - /* Allocate sample attribute only when there is a sample action and * no errors after parsing. */ From 3222efd4b3a37b68068fb1c7470248eea7123f19 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 1 Sep 2021 18:22:37 +0300 Subject: [PATCH 07/15] net/mlx5e: Reserve a value from TC tunnel options mapping Reserve one more value from TC tunnel options range to be used by bridge offload in following patches. Signed-off-by: Vlad Buslov Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 606a7757bb04..dc21d28a4333 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -5035,9 +5035,11 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) } uplink_priv->tunnel_mapping = mapping; - /* 0xFFF is reserved for stack devices slow path table mark */ + /* Two last values are reserved for stack devices slow path table mark + * and bridge ingress push mark. + */ mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS, - sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true); + sz_enc_opts, ENC_OPTS_BITS_MASK - 2, true); if (IS_ERR(mapping)) { err = PTR_ERR(mapping); goto err_enc_opts_mapping; From 2f8ec867b6c3dd4c236414e61d5a67e09f77ab4a Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Sun, 26 Sep 2021 17:17:49 +0800 Subject: [PATCH 08/15] net/mlx5e: Specify out ifindex when looking up encap route There is a use case that the local and remote VTEPs are in the same host. Currently, the out ifindex is not specified when looking up the encap route for offloads. So in this case, a local route is returned and the route dev is lo. Actual tunnel interface can be created with a parameter "dev" [1], which specifies the physical device to use for tunnel endpoint communication. Pass this parameter to driver when looking up encap route for offloads. So that a unicast route will be returned. [1] ip link add name vxlan1 type vxlan id 100 dev enp4s0f0 remote 1.1.1.1 dstport 4789 Signed-off-by: Chris Mi Reviewed-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 8 ++++++++ drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h | 1 + .../net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c | 9 +++++++++ 3 files changed, 18 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index b4e986818794..cc7d7b895b80 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -118,6 +118,11 @@ static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv, uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); attr->fl.fl4.flowi4_oif = uplink_dev->ifindex; + } else { + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(mirred_dev); + + if (tunnel && tunnel->get_remote_ifindex) + attr->fl.fl4.flowi4_oif = tunnel->get_remote_ifindex(mirred_dev); } rt = ip_route_output_key(dev_net(mirred_dev), &attr->fl.fl4); @@ -435,12 +440,15 @@ static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5e_tc_tun_route_attr *attr) { + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(mirred_dev); struct net_device *route_dev; struct net_device *out_dev; struct dst_entry *dst; struct neighbour *n; int ret; + if (tunnel && tunnel->get_remote_ifindex) + attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(mirred_dev); dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, &attr->fl.fl6, NULL); if (IS_ERR(dst)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index 9350ca05ce65..aa092eaeaec3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -51,6 +51,7 @@ struct mlx5e_tc_tunnel { void *headers_v); bool (*encap_info_equal)(struct mlx5e_encap_key *a, struct mlx5e_encap_key *b); + int (*get_remote_ifindex)(struct net_device *mirred_dev); }; extern struct mlx5e_tc_tunnel vxlan_tunnel; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c index 4267f3a1059e..fd07c4cbfd1d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -141,6 +141,14 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, return 0; } +static int mlx5e_tc_tun_get_remote_ifindex(struct net_device *mirred_dev) +{ + const struct vxlan_dev *vxlan = netdev_priv(mirred_dev); + const struct vxlan_rdst *dst = &vxlan->default_dst; + + return dst->remote_ifindex; +} + struct mlx5e_tc_tunnel vxlan_tunnel = { .tunnel_type = MLX5E_TC_TUNNEL_TYPE_VXLAN, .match_level = MLX5_MATCH_L4, @@ -151,4 +159,5 @@ struct mlx5e_tc_tunnel vxlan_tunnel = { .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_vxlan, .parse_tunnel = mlx5e_tc_tun_parse_vxlan, .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic, + .get_remote_ifindex = mlx5e_tc_tun_get_remote_ifindex, }; From 6ba2e2b33df853b73c8494758a1da7067d144f7e Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 7 Sep 2021 18:47:02 +0300 Subject: [PATCH 09/15] net/mlx5e: Support accept action Support TC generic 'accept' action in mlx5 by introducing MLX5_ESW_ATTR_FLAG_ACCEPT attribute flag. Flag has similar semantics to existing MLX5_ESW_ATTR_FLAG_SLOW_PATH flag, however, dedicated flag is required because existing 'slow path' flag can be flipped by tunneling subsystem when neighbor changes state. Introduce new helper function mlx5_esw_attr_flags_skip() to check whether attribute flags for 'slow path' or 'accept' action are set and use it in eswitch code instead of direct bit manipulation. Signed-off-by: Vlad Buslov Reviewed-by: Paul Blakey Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +++++ drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 8 ++++++++ .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 6 +++--- .../mellanox/mlx5/core/eswitch_offloads_termtbl.c | 2 +- 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index dc21d28a4333..d92ee2f37c22 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3810,6 +3810,11 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, flow_action_for_each(i, act, flow_action) { switch (act->id) { + case FLOW_ACTION_ACCEPT: + action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->flags |= MLX5_ESW_ATTR_FLAG_ACCEPT; + break; case FLOW_ACTION_DROP: action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 2c7444101bb9..7461aafb321e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -447,8 +447,16 @@ enum { MLX5_ESW_ATTR_FLAG_NO_IN_PORT = BIT(2), MLX5_ESW_ATTR_FLAG_SRC_REWRITE = BIT(3), MLX5_ESW_ATTR_FLAG_SAMPLE = BIT(4), + MLX5_ESW_ATTR_FLAG_ACCEPT = BIT(5), }; +/* Returns true if any of the flags that require skipping further TC/NF processing are set. */ +static inline bool +mlx5_esw_attr_flags_skip(u32 attr_flags) +{ + return attr_flags & (MLX5_ESW_ATTR_FLAG_SLOW_PATH | MLX5_ESW_ATTR_FLAG_ACCEPT); +} + struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; struct mlx5_core_dev *in_mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 4e1628f25265..ca7e31a1a431 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -440,7 +440,7 @@ esw_setup_dests(struct mlx5_flow_destination *dest, } else if (attr->dest_ft) { esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i); (*i)++; - } else if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { + } else if (mlx5_esw_attr_flags_skip(attr->flags)) { esw_setup_slow_path_dest(dest, flow_act, chains, *i); (*i)++; } else if (attr->dest_chain) { @@ -467,7 +467,7 @@ esw_cleanup_dests(struct mlx5_eswitch *esw, if (attr->dest_ft) { esw_cleanup_decap_indir(esw, attr); - } else if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)) { + } else if (!mlx5_esw_attr_flags_skip(attr->flags)) { if (attr->dest_chain) esw_cleanup_chain_dest(chains, attr->dest_chain, 1, 0); else if (esw_is_indir_table(esw, attr)) @@ -678,7 +678,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, mlx5_del_flow_rules(rule); - if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)) { + if (!mlx5_esw_attr_flags_skip(attr->flags)) { /* unref the term table */ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { if (esw_attr->dests[i].termtbl) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index b45954905845..879d78e46e47 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -219,7 +219,7 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) || !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level) || - attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH || + mlx5_esw_attr_flags_skip(attr->flags) || !mlx5_eswitch_offload_is_uplink_port(esw, spec)) return false; From a1a6e7217eacf2c19a518d3adbe8ae3b4fd93a73 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 2 Sep 2021 11:42:36 +0300 Subject: [PATCH 10/15] net/mlx5: Bridge, refactor eswitch instance usage Several functions in bridge.c excessively obtain pointer to parent eswitch instance by dereferencing br_offloads->esw on every usage and following patches in this series add even more usages of eswitch. Introduce local variable 'esw' and use it instead. Signed-off-by: Vlad Buslov Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/esw/bridge.c | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index 7e221038df8d..6a36c5aef74c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -292,38 +292,39 @@ mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads) { struct mlx5_flow_group *mac_fg, *filter_fg, *vlan_fg; struct mlx5_flow_table *ingress_ft, *skip_ft; + struct mlx5_eswitch *esw = br_offloads->esw; int err; - if (!mlx5_eswitch_vport_match_metadata_enabled(br_offloads->esw)) + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) return -EOPNOTSUPP; ingress_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE, MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE, - br_offloads->esw); + esw); if (IS_ERR(ingress_ft)) return PTR_ERR(ingress_ft); skip_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE, MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE, - br_offloads->esw); + esw); if (IS_ERR(skip_ft)) { err = PTR_ERR(skip_ft); goto err_skip_tbl; } - vlan_fg = mlx5_esw_bridge_ingress_vlan_fg_create(br_offloads->esw, ingress_ft); + vlan_fg = mlx5_esw_bridge_ingress_vlan_fg_create(esw, ingress_ft); if (IS_ERR(vlan_fg)) { err = PTR_ERR(vlan_fg); goto err_vlan_fg; } - filter_fg = mlx5_esw_bridge_ingress_filter_fg_create(br_offloads->esw, ingress_ft); + filter_fg = mlx5_esw_bridge_ingress_filter_fg_create(esw, ingress_ft); if (IS_ERR(filter_fg)) { err = PTR_ERR(filter_fg); goto err_filter_fg; } - mac_fg = mlx5_esw_bridge_ingress_mac_fg_create(br_offloads->esw, ingress_ft); + mac_fg = mlx5_esw_bridge_ingress_mac_fg_create(esw, ingress_ft); if (IS_ERR(mac_fg)) { err = PTR_ERR(mac_fg); goto err_mac_fg; @@ -366,23 +367,24 @@ static int mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads, struct mlx5_esw_bridge *bridge) { + struct mlx5_eswitch *esw = br_offloads->esw; struct mlx5_flow_group *mac_fg, *vlan_fg; struct mlx5_flow_table *egress_ft; int err; egress_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE, MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE, - br_offloads->esw); + esw); if (IS_ERR(egress_ft)) return PTR_ERR(egress_ft); - vlan_fg = mlx5_esw_bridge_egress_vlan_fg_create(br_offloads->esw, egress_ft); + vlan_fg = mlx5_esw_bridge_egress_vlan_fg_create(esw, egress_ft); if (IS_ERR(vlan_fg)) { err = PTR_ERR(vlan_fg); goto err_vlan_fg; } - mac_fg = mlx5_esw_bridge_egress_mac_fg_create(br_offloads->esw, egress_ft); + mac_fg = mlx5_esw_bridge_egress_mac_fg_create(esw, egress_ft); if (IS_ERR(mac_fg)) { err = PTR_ERR(mac_fg); goto err_mac_fg; @@ -886,6 +888,7 @@ static void mlx5_esw_bridge_vlan_erase(struct mlx5_esw_bridge_port *port, static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_esw_bridge *bridge) { + struct mlx5_eswitch *esw = bridge->br_offloads->esw; struct mlx5_esw_bridge_fdb_entry *entry, *tmp; list_for_each_entry_safe(entry, tmp, &vlan->fdb_list, vlan_list) { @@ -894,9 +897,9 @@ static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_vlan *vlan, } if (vlan->pkt_reformat_pop) - mlx5_esw_bridge_vlan_pop_cleanup(vlan, bridge->br_offloads->esw); + mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw); if (vlan->pkt_reformat_push) - mlx5_esw_bridge_vlan_push_cleanup(vlan, bridge->br_offloads->esw); + mlx5_esw_bridge_vlan_push_cleanup(vlan, esw); } static void mlx5_esw_bridge_vlan_cleanup(struct mlx5_esw_bridge_port *port, From 64fc4b358941fc2e9ee38b8b870ea0dac7639494 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 2 Sep 2021 11:12:16 +0300 Subject: [PATCH 11/15] net/mlx5: Bridge, extract VLAN pop code to dedicated functions Following patches in series need to pop VLAN when packet misses on egress. To reuse existing bridge VLAN pop handling code, extract it to dedicated helpers mlx5_esw_bridge_pkt_reformat_vlan_pop_supported() and mlx5_esw_bridge_pkt_reformat_vlan_pop_create(). Signed-off-by: Vlad Buslov Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/esw/bridge.c | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index 6a36c5aef74c..8361dfc0bf1a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -86,6 +86,26 @@ mlx5_esw_bridge_fdb_del_notify(struct mlx5_esw_bridge_fdb_entry *entry) SWITCHDEV_FDB_DEL_TO_BRIDGE); } +static bool mlx5_esw_bridge_pkt_reformat_vlan_pop_supported(struct mlx5_eswitch *esw) +{ + return BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_remove)) && + MLX5_CAP_GEN_2(esw->dev, max_reformat_remove_size) >= sizeof(struct vlan_hdr) && + MLX5_CAP_GEN_2(esw->dev, max_reformat_remove_offset) >= + offsetof(struct vlan_ethhdr, h_vlan_proto); +} + +static struct mlx5_pkt_reformat * +mlx5_esw_bridge_pkt_reformat_vlan_pop_create(struct mlx5_eswitch *esw) +{ + struct mlx5_pkt_reformat_params reformat_params = {}; + + reformat_params.type = MLX5_REFORMAT_TYPE_REMOVE_HDR; + reformat_params.param_0 = MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START; + reformat_params.param_1 = offsetof(struct vlan_ethhdr, h_vlan_proto); + reformat_params.size = sizeof(struct vlan_hdr); + return mlx5_packet_reformat_alloc(esw->dev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB); +} + static struct mlx5_flow_table * mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw) { @@ -800,24 +820,14 @@ mlx5_esw_bridge_vlan_push_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5 static int mlx5_esw_bridge_vlan_pop_create(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) { - struct mlx5_pkt_reformat_params reformat_params = {}; struct mlx5_pkt_reformat *pkt_reformat; - if (!BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_remove)) || - MLX5_CAP_GEN_2(esw->dev, max_reformat_remove_size) < sizeof(struct vlan_hdr) || - MLX5_CAP_GEN_2(esw->dev, max_reformat_remove_offset) < - offsetof(struct vlan_ethhdr, h_vlan_proto)) { + if (!mlx5_esw_bridge_pkt_reformat_vlan_pop_supported(esw)) { esw_warn(esw->dev, "Packet reformat REMOVE_HEADER is not supported\n"); return -EOPNOTSUPP; } - reformat_params.type = MLX5_REFORMAT_TYPE_REMOVE_HDR; - reformat_params.param_0 = MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START; - reformat_params.param_1 = offsetof(struct vlan_ethhdr, h_vlan_proto); - reformat_params.size = sizeof(struct vlan_hdr); - pkt_reformat = mlx5_packet_reformat_alloc(esw->dev, - &reformat_params, - MLX5_FLOW_NAMESPACE_FDB); + pkt_reformat = mlx5_esw_bridge_pkt_reformat_vlan_pop_create(esw); if (IS_ERR(pkt_reformat)) { esw_warn(esw->dev, "Failed to alloc packet reformat REMOVE_HEADER (err=%ld)\n", PTR_ERR(pkt_reformat)); From 5249001d69a223811ad654884c6115d55158fd51 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 1 Sep 2021 19:05:02 +0300 Subject: [PATCH 12/15] net/mlx5: Bridge, mark reg_c1 when pushing VLAN On ingress VLAN push also assign value 0x7FE to reg_c1 tunnel id+opts bits (tunnel id 0, which is not a valid tunnel id, and option 0x7FE which was reserved by one of previous patches in the series). In following patch the reg value is matched on egress miss to restore the packet to its original state by removing the VLAN before passing it to the software data path. Signed-off-by: Vlad Buslov Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/esw/bridge.c | 40 ++++++++++++++++++- .../mellanox/mlx5/core/esw/bridge_priv.h | 1 + include/linux/mlx5/eswitch.h | 9 +++++ 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index 8361dfc0bf1a..439b67b4380f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -465,8 +465,10 @@ mlx5_esw_bridge_ingress_flow_with_esw_create(u16 vport_num, const unsigned char mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num)); if (vlan && vlan->pkt_reformat_push) { - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; flow_act.pkt_reformat = vlan->pkt_reformat_push; + flow_act.modify_hdr = vlan->pkt_mod_hdr_push_mark; } else if (vlan) { MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.cvlan_tag); @@ -845,6 +847,33 @@ mlx5_esw_bridge_vlan_pop_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_ vlan->pkt_reformat_pop = NULL; } +static int +mlx5_esw_bridge_vlan_push_mark_create(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) +{ + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5_modify_hdr *pkt_mod_hdr; + + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_1); + MLX5_SET(set_action_in, action, offset, 8); + MLX5_SET(set_action_in, action, length, ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS); + MLX5_SET(set_action_in, action, data, ESW_TUN_BRIDGE_INGRESS_PUSH_VLAN); + + pkt_mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, 1, action); + if (IS_ERR(pkt_mod_hdr)) + return PTR_ERR(pkt_mod_hdr); + + vlan->pkt_mod_hdr_push_mark = pkt_mod_hdr; + return 0; +} + +static void +mlx5_esw_bridge_vlan_push_mark_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) +{ + mlx5_modify_header_dealloc(esw->dev, vlan->pkt_mod_hdr_push_mark); + vlan->pkt_mod_hdr_push_mark = NULL; +} + static struct mlx5_esw_bridge_vlan * mlx5_esw_bridge_vlan_create(u16 vid, u16 flags, struct mlx5_esw_bridge_port *port, struct mlx5_eswitch *esw) @@ -864,6 +893,10 @@ mlx5_esw_bridge_vlan_create(u16 vid, u16 flags, struct mlx5_esw_bridge_port *por err = mlx5_esw_bridge_vlan_push_create(vlan, esw); if (err) goto err_vlan_push; + + err = mlx5_esw_bridge_vlan_push_mark_create(vlan, esw); + if (err) + goto err_vlan_push_mark; } if (flags & BRIDGE_VLAN_INFO_UNTAGGED) { err = mlx5_esw_bridge_vlan_pop_create(vlan, esw); @@ -882,6 +915,9 @@ mlx5_esw_bridge_vlan_create(u16 vid, u16 flags, struct mlx5_esw_bridge_port *por if (vlan->pkt_reformat_pop) mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw); err_vlan_pop: + if (vlan->pkt_mod_hdr_push_mark) + mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw); +err_vlan_push_mark: if (vlan->pkt_reformat_push) mlx5_esw_bridge_vlan_push_cleanup(vlan, esw); err_vlan_push: @@ -908,6 +944,8 @@ static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_vlan *vlan, if (vlan->pkt_reformat_pop) mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw); + if (vlan->pkt_mod_hdr_push_mark) + mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw); if (vlan->pkt_reformat_push) mlx5_esw_bridge_vlan_push_cleanup(vlan, esw); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h index 52964a82d6a6..878311fe950a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h @@ -49,6 +49,7 @@ struct mlx5_esw_bridge_vlan { struct list_head fdb_list; struct mlx5_pkt_reformat *pkt_reformat_push; struct mlx5_pkt_reformat *pkt_reformat_pop; + struct mlx5_modify_hdr *pkt_mod_hdr_push_mark; }; struct mlx5_esw_bridge_port { diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 4ab5c1fc1270..97afcea39a7b 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -130,11 +130,20 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, #define ESW_TUN_OPTS_MASK GENMASK(31 - ESW_TUN_ID_BITS - ESW_RESERVED_BITS, ESW_TUN_OPTS_OFFSET) #define ESW_TUN_MASK GENMASK(31 - ESW_RESERVED_BITS, ESW_TUN_OFFSET) #define ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT 0 /* 0 is not a valid tunnel id */ +#define ESW_TUN_ID_BRIDGE_INGRESS_PUSH_VLAN ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT /* 0x7FF is a reserved mapping */ #define ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT GENMASK(ESW_TUN_OPTS_BITS - 1, 0) #define ESW_TUN_SLOW_TABLE_GOTO_VPORT ((ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT << ESW_TUN_OPTS_BITS) | \ ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT) #define ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK ESW_TUN_OPTS_MASK +/* 0x7FE is a reserved mapping for bridge ingress push vlan mark */ +#define ESW_TUN_OPTS_BRIDGE_INGRESS_PUSH_VLAN (ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT - 1) +#define ESW_TUN_BRIDGE_INGRESS_PUSH_VLAN ((ESW_TUN_ID_BRIDGE_INGRESS_PUSH_VLAN << \ + ESW_TUN_OPTS_BITS) | \ + ESW_TUN_OPTS_BRIDGE_INGRESS_PUSH_VLAN) +#define ESW_TUN_BRIDGE_INGRESS_PUSH_VLAN_MARK \ + GENMASK(31 - ESW_TUN_ID_BITS - ESW_RESERVED_BITS, \ + ESW_TUN_OPTS_OFFSET + 1) u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev); u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev); From 575baa92fd463cb202447fe14770532cae55715a Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 8 Sep 2021 18:22:12 +0300 Subject: [PATCH 13/15] net/mlx5: Bridge, pop VLAN on egress table miss Create lowest priority flow group in egress table with single rule that matches on special reg_c1 value that is set on ingress VLAN push with single action that pops VLAN. The flow destination is skip table that is used to skip any further processing of packet in FDB bridge priority. Signed-off-by: Vlad Buslov Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/esw/bridge.c | 128 +++++++++++++++++- 1 file changed, 126 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index 439b67b4380f..ed72246d1d83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -28,7 +28,10 @@ #define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO (MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE / 2 - 1) #define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM \ (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO + 1) -#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO (MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE - 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO (MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE - 2) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO (MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE - 1) #define MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE 0 @@ -61,6 +64,9 @@ struct mlx5_esw_bridge { struct mlx5_flow_table *egress_ft; struct mlx5_flow_group *egress_vlan_fg; struct mlx5_flow_group *egress_mac_fg; + struct mlx5_flow_group *egress_miss_fg; + struct mlx5_pkt_reformat *egress_miss_pkt_reformat; + struct mlx5_flow_handle *egress_miss_handle; unsigned long ageing_time; u32 flags; }; @@ -307,6 +313,36 @@ mlx5_esw_bridge_egress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_ return fg; } +static struct mlx5_flow_group * +mlx5_esw_bridge_egress_miss_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); + + MLX5_SET(create_flow_group_in, in, start_flow_index, + MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM); + MLX5_SET(create_flow_group_in, in, end_flow_index, + MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO); + + fg = mlx5_create_flow_group(egress_ft, in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create bridge egress table miss flow group (err=%ld)\n", + PTR_ERR(fg)); + kvfree(in); + return fg; +} + static int mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads) { @@ -383,12 +419,19 @@ mlx5_esw_bridge_ingress_table_cleanup(struct mlx5_esw_bridge_offloads *br_offloa br_offloads->ingress_ft = NULL; } +static struct mlx5_flow_handle * +mlx5_esw_bridge_egress_miss_flow_create(struct mlx5_flow_table *egress_ft, + struct mlx5_flow_table *skip_ft, + struct mlx5_pkt_reformat *pkt_reformat); + static int mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads, struct mlx5_esw_bridge *bridge) { + struct mlx5_flow_group *miss_fg = NULL, *mac_fg, *vlan_fg; + struct mlx5_pkt_reformat *miss_pkt_reformat = NULL; + struct mlx5_flow_handle *miss_handle = NULL; struct mlx5_eswitch *esw = br_offloads->esw; - struct mlx5_flow_group *mac_fg, *vlan_fg; struct mlx5_flow_table *egress_ft; int err; @@ -410,9 +453,48 @@ mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads, goto err_mac_fg; } + if (mlx5_esw_bridge_pkt_reformat_vlan_pop_supported(esw)) { + miss_fg = mlx5_esw_bridge_egress_miss_fg_create(esw, egress_ft); + if (IS_ERR(miss_fg)) { + esw_warn(esw->dev, "Failed to create miss flow group (err=%ld)\n", + PTR_ERR(miss_fg)); + miss_fg = NULL; + goto skip_miss_flow; + } + + miss_pkt_reformat = mlx5_esw_bridge_pkt_reformat_vlan_pop_create(esw); + if (IS_ERR(miss_pkt_reformat)) { + esw_warn(esw->dev, + "Failed to alloc packet reformat REMOVE_HEADER (err=%ld)\n", + PTR_ERR(miss_pkt_reformat)); + miss_pkt_reformat = NULL; + mlx5_destroy_flow_group(miss_fg); + miss_fg = NULL; + goto skip_miss_flow; + } + + miss_handle = mlx5_esw_bridge_egress_miss_flow_create(egress_ft, + br_offloads->skip_ft, + miss_pkt_reformat); + if (IS_ERR(miss_handle)) { + esw_warn(esw->dev, "Failed to create miss flow (err=%ld)\n", + PTR_ERR(miss_handle)); + miss_handle = NULL; + mlx5_packet_reformat_dealloc(esw->dev, miss_pkt_reformat); + miss_pkt_reformat = NULL; + mlx5_destroy_flow_group(miss_fg); + miss_fg = NULL; + goto skip_miss_flow; + } + } +skip_miss_flow: + bridge->egress_ft = egress_ft; bridge->egress_vlan_fg = vlan_fg; bridge->egress_mac_fg = mac_fg; + bridge->egress_miss_fg = miss_fg; + bridge->egress_miss_pkt_reformat = miss_pkt_reformat; + bridge->egress_miss_handle = miss_handle; return 0; err_mac_fg: @@ -425,6 +507,13 @@ mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads, static void mlx5_esw_bridge_egress_table_cleanup(struct mlx5_esw_bridge *bridge) { + if (bridge->egress_miss_handle) + mlx5_del_flow_rules(bridge->egress_miss_handle); + if (bridge->egress_miss_pkt_reformat) + mlx5_packet_reformat_dealloc(bridge->br_offloads->esw->dev, + bridge->egress_miss_pkt_reformat); + if (bridge->egress_miss_fg) + mlx5_destroy_flow_group(bridge->egress_miss_fg); mlx5_destroy_flow_group(bridge->egress_mac_fg); mlx5_destroy_flow_group(bridge->egress_vlan_fg); mlx5_destroy_flow_table(bridge->egress_ft); @@ -623,6 +712,41 @@ mlx5_esw_bridge_egress_flow_create(u16 vport_num, u16 esw_owner_vhca_id, const u return handle; } +static struct mlx5_flow_handle * +mlx5_esw_bridge_egress_miss_flow_create(struct mlx5_flow_table *egress_ft, + struct mlx5_flow_table *skip_ft, + struct mlx5_pkt_reformat *pkt_reformat) +{ + struct mlx5_flow_destination dest = { + .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE, + .ft = skip_ft, + }; + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT, + .flags = FLOW_ACT_NO_APPEND, + .pkt_reformat = pkt_reformat, + }; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + rule_spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + + MLX5_SET(fte_match_param, rule_spec->match_criteria, + misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); + MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_1, + ESW_TUN_BRIDGE_INGRESS_PUSH_VLAN_MARK); + + handle = mlx5_add_flow_rules(egress_ft, rule_spec, &flow_act, &dest, 1); + + kvfree(rule_spec); + return handle; +} + static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads) { From 3663ad34bc707fc85492f4d83a313f5df84718d4 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 19 Aug 2021 16:18:57 +0300 Subject: [PATCH 14/15] net/mlx5: Shift control IRQ to the last index Control IRQ is the first IRQ vector. This complicates handling of completion irqs as we need to offset them by one. in the next patch, there are scenarios where completion and control EQs will share the same irq. for example: functions with single IRQ. To ease such scenarios, we shift control IRQ to the end of the irq array. Signed-off-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/odp.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/eq.c | 9 +++++---- drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h | 2 -- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 10 +++++----- include/linux/mlx5/driver.h | 2 ++ 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index d0d98e584ebc..81147d774dd2 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1559,6 +1559,7 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int; param = (struct mlx5_eq_param) { + .irq_index = MLX5_IRQ_EQ_CTRL, .nent = MLX5_IB_NUM_PF_EQE, }; param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_FAULT; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 605c8ecc3610..792e0d6aa861 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -632,6 +632,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) mlx5_eq_notifier_register(dev, &table->cq_err_nb); param = (struct mlx5_eq_param) { + .irq_index = MLX5_IRQ_EQ_CTRL, .nent = MLX5_NUM_CMD_EQE, .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD, }; @@ -644,6 +645,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); param = (struct mlx5_eq_param) { + .irq_index = MLX5_IRQ_EQ_CTRL, .nent = MLX5_NUM_ASYNC_EQE, }; @@ -653,6 +655,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) goto err2; param = (struct mlx5_eq_param) { + .irq_index = MLX5_IRQ_EQ_CTRL, .nent = /* TODO: sriov max_vf + */ 1, .mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST, }; @@ -806,8 +809,8 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) ncomp_eqs = table->num_comp_eqs; nent = MLX5_COMP_EQ_SIZE; for (i = 0; i < ncomp_eqs; i++) { - int vecidx = i + MLX5_IRQ_VEC_COMP_BASE; struct mlx5_eq_param param = {}; + int vecidx = i; eq = kzalloc(sizeof(*eq), GFP_KERNEL); if (!eq) { @@ -953,9 +956,7 @@ static int set_rmap(struct mlx5_core_dev *mdev) goto err_out; } - vecidx = MLX5_IRQ_VEC_COMP_BASE; - for (; vecidx < eq_table->num_comp_eqs + MLX5_IRQ_VEC_COMP_BASE; - vecidx++) { + for (vecidx = 0; vecidx < eq_table->num_comp_eqs; vecidx++) { err = irq_cpu_rmap_add(eq_table->rmap, pci_irq_vector(mdev->pdev, vecidx)); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h index abd024173c42..8116815663a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h @@ -8,8 +8,6 @@ #define MLX5_COMP_EQS_PER_SF 8 -#define MLX5_IRQ_EQ_CTRL (0) - struct mlx5_irq; int mlx5_irq_table_init(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 763c83a02380..a66144b54fc8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -194,15 +194,14 @@ static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx); } -static void irq_set_name(char *name, int vecidx) +static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) { - if (vecidx == 0) { + if (vecidx == pool->xa_num_irqs.max) { snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx); return; } - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", - vecidx - MLX5_IRQ_VEC_COMP_BASE); + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx); } static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) @@ -217,7 +216,7 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) return ERR_PTR(-ENOMEM); irq->irqn = pci_irq_vector(dev->pdev, i); if (!pool->name[0]) - irq_set_name(name, i); + irq_set_name(pool, name, i); else irq_sf_set_name(pool, name, i); ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh); @@ -440,6 +439,7 @@ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, } pf_irq: pool = irq_table->pf_pool; + vecidx = (vecidx == MLX5_IRQ_EQ_CTRL) ? pool->xa_num_irqs.max : vecidx; irq = irq_pool_request_vector(pool, vecidx, affinity); out: if (IS_ERR(irq)) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e23417424373..0ca719c00824 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -59,6 +59,8 @@ #define MLX5_ADEV_NAME "mlx5_core" +#define MLX5_IRQ_EQ_CTRL (U8_MAX) + enum { MLX5_BOARD_ID_LEN = 64, }; From f891b7cdbdcda116fd26bbd706f91bd58567aa17 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 1 Aug 2021 12:08:49 +0300 Subject: [PATCH 15/15] net/mlx5: Enable single IRQ for PCI Function Prior to this patch the driver requires two IRQs to function properly, one required IRQ for control and at least one required IRQ for IO. This requirement can be relaxed to one as the driver now allows sharing of IRQs, so control and IO EQs can share the same irq. This is needed for high scale amount of VFs. Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 26 ++++++++++++++----- include/linux/mlx5/eq.h | 1 - 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index a66144b54fc8..830444f927d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -196,6 +196,12 @@ static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) { + if (!pool->xa_num_irqs.max) { + /* in case we only have a single irq for the device */ + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx); + return; + } + if (vecidx == pool->xa_num_irqs.max) { snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx); return; @@ -204,6 +210,11 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx); } +static bool irq_pool_is_sf_pool(struct mlx5_irq_pool *pool) +{ + return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf")); +} + static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) { struct mlx5_core_dev *dev = pool->dev; @@ -215,7 +226,7 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) if (!irq) return ERR_PTR(-ENOMEM); irq->irqn = pci_irq_vector(dev->pdev, i); - if (!pool->name[0]) + if (!irq_pool_is_sf_pool(pool)) irq_set_name(pool, name, i); else irq_sf_set_name(pool, name, i); @@ -385,6 +396,9 @@ irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, if (IS_ERR(irq) || !affinity) goto unlock; cpumask_copy(irq->mask, affinity); + if (!irq_pool_is_sf_pool(pool) && !pool->xa_num_irqs.max && + cpumask_empty(irq->mask)) + cpumask_set_cpu(0, irq->mask); irq_set_affinity_hint(irq->irqn, irq->mask); unlock: mutex_unlock(&pool->lock); @@ -577,6 +591,8 @@ void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table) { + if (!table->pf_pool->xa_num_irqs.max) + return 1; return table->pf_pool->xa_num_irqs.max - table->pf_pool->xa_num_irqs.min; } @@ -592,19 +608,15 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev) if (mlx5_core_is_sf(dev)) return 0; - pf_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + - MLX5_IRQ_VEC_COMP_BASE; + pf_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1; pf_vec = min_t(int, pf_vec, num_eqs); - if (pf_vec <= MLX5_IRQ_VEC_COMP_BASE) - return -ENOMEM; total_vec = pf_vec; if (mlx5_sf_max_functions(dev)) total_vec += MLX5_IRQ_CTRL_SF_MAX + MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev); - total_vec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1, - total_vec, PCI_IRQ_MSIX); + total_vec = pci_alloc_irq_vectors(dev->pdev, 1, total_vec, PCI_IRQ_MSIX); if (total_vec < 0) return total_vec; pf_vec = min(pf_vec, total_vec); diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index cea6ecb4b73e..ea3ff5a8ced3 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -4,7 +4,6 @@ #ifndef MLX5_CORE_EQ_H #define MLX5_CORE_EQ_H -#define MLX5_IRQ_VEC_COMP_BASE 1 #define MLX5_NUM_CMD_EQE (32) #define MLX5_NUM_ASYNC_EQE (0x1000) #define MLX5_NUM_SPARE_EQE (0x80)