From b407b4b804cd6193f45599aff82307c9e2a81225 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Mon, 10 Mar 2025 23:26:53 +0200 Subject: [PATCH 1/4] net/mlx5: Rename devlink rate parent set function for leaf nodes Rename `mlx5_esw_devlink_rate_parent_set()` to `mlx5_esw_devlink_rate_leaf_parent_set()` to distinguish setting a parent for leafs from nodes, which is not yet supported. Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1741642016-44918-2-git-send-email-tariqt@nvidia.com Reviewed-by: Jacob Keller Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 8 ++++---- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index a2cf3e79693d..2a9e3a43c1c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -327,7 +327,7 @@ static const struct devlink_ops mlx5_devlink_ops = { .rate_node_tx_max_set = mlx5_esw_devlink_rate_node_tx_max_set, .rate_node_new = mlx5_esw_devlink_rate_node_new, .rate_node_del = mlx5_esw_devlink_rate_node_del, - .rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set, + .rate_leaf_parent_set = mlx5_esw_devlink_rate_leaf_parent_set, #endif #ifdef CONFIG_MLX5_SF_MANAGER .port_new = mlx5_devlink_sf_port_new, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 823c1ba456cd..c56027838a57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -986,10 +986,10 @@ int mlx5_esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_s return err; } -int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, - struct devlink_rate *parent, - void *priv, void *parent_priv, - struct netlink_ext_ack *extack) +int mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack) { struct mlx5_esw_sched_node *node; struct mlx5_vport *vport = priv; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h index 6eb8f6a648c8..43a40bda7d19 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h @@ -29,10 +29,10 @@ int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, struct netlink_ext_ack *extack); int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, struct netlink_ext_ack *extack); -int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, - struct devlink_rate *parent, - void *priv, void *parent_priv, - struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack); #endif #endif From 498bd79cb92be572258a587a4abee67d464ba1de Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Mon, 10 Mar 2025 23:26:54 +0200 Subject: [PATCH 2/4] net/mlx5: Introduce hierarchy level tracking on scheduling nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a `level` field to `mlx5_esw_sched_node` to track the hierarchy depth of each scheduling node. This allows enforcement of the scheduling depth constraints based on `log_esw_max_sched_depth`. Modify `esw_qos_node_set_parent()` and `__esw_qos_alloc_node()` to correctly assign hierarchy levels. Ensure that nodes inherit their parent’s level incrementally. Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1741642016-44918-3-git-send-email-tariqt@nvidia.com Reviewed-by: Jacob Keller Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index c56027838a57..959e4446327d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -90,8 +90,22 @@ struct mlx5_esw_sched_node { struct list_head children; /* Valid only if this node is associated with a vport. */ struct mlx5_vport *vport; + /* Level in the hierarchy. The root node level is 1. */ + u8 level; }; +static void esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node *node) +{ + if (!node->parent) { + /* Root children are assigned a depth level of 2. */ + node->level = 2; + list_add_tail(&node->entry, &node->esw->qos.domain->nodes); + } else { + node->level = node->parent->level + 1; + list_add_tail(&node->entry, &node->parent->children); + } +} + static void esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_node *parent) { @@ -99,6 +113,7 @@ esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_ node->parent = parent; list_add_tail(&node->entry, &parent->children); node->esw = parent->esw; + node->level = parent->level + 1; } void mlx5_esw_qos_vport_qos_free(struct mlx5_vport *vport) @@ -358,7 +373,6 @@ static struct mlx5_esw_sched_node * __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type type, struct mlx5_esw_sched_node *parent) { - struct list_head *parent_children; struct mlx5_esw_sched_node *node; node = kzalloc(sizeof(*node), GFP_KERNEL); @@ -370,8 +384,7 @@ __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type node->type = type; node->parent = parent; INIT_LIST_HEAD(&node->children); - parent_children = parent ? &parent->children : &esw->qos.domain->nodes; - list_add_tail(&node->entry, parent_children); + esw_qos_node_attach_to_parent(node); return node; } From f88c349c75e3784a3f5463f5b403ff28dd823782 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Mon, 10 Mar 2025 23:26:55 +0200 Subject: [PATCH 3/4] net/mlx5: Preserve rate settings when creating a rate node Modify `esw_qos_create_node_sched_elem()` to receive max_rate and bw_share values while maintaining the previous configuration. This change is essential for the upcoming patch that will modify rate nodes and requires the existing settings to be preserved unless explicitly changed. Signed-off-by: Carolina Jubran Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1741642016-44918-4-git-send-email-tariqt@nvidia.com Reviewed-by: Jacob Keller Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 959e4446327d..3c850efb4ca3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -320,8 +320,9 @@ static int esw_qos_set_node_min_rate(struct mlx5_esw_sched_node *node, return 0; } -static int esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_element_id, - u32 *tsar_ix) +static int +esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_element_id, + u32 max_rate, u32 bw_share, u32 *tsar_ix) { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; void *attr; @@ -338,6 +339,8 @@ static int esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_ SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, parent_element_id); + MLX5_SET(scheduling_context, tsar_ctx, max_average_bw, max_rate); + MLX5_SET(scheduling_context, tsar_ctx, bw_share, bw_share); attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); @@ -409,7 +412,8 @@ __esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sch u32 tsar_ix; int err; - err = esw_qos_create_node_sched_elem(esw->dev, esw->qos.root_tsar_ix, &tsar_ix); + err = esw_qos_create_node_sched_elem(esw->dev, esw->qos.root_tsar_ix, 0, + 0, &tsar_ix); if (err) { NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for node failed"); return ERR_PTR(err); @@ -476,7 +480,8 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) return -EOPNOTSUPP; - err = esw_qos_create_node_sched_elem(esw->dev, 0, &esw->qos.root_tsar_ix); + err = esw_qos_create_node_sched_elem(esw->dev, 0, 0, 0, + &esw->qos.root_tsar_ix); if (err) { esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err); return err; From 9c7bbf4c3304f139faad4b753241963f875b4195 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Mon, 10 Mar 2025 23:26:56 +0200 Subject: [PATCH 4/4] net/mlx5: Add support for setting parent of nodes Introduce `mlx5_esw_devlink_rate_node_parent_set()` to allow assigning a parent to scheduling nodes. Implement `mlx5_esw_qos_node_update_parent()` and `mlx5_esw_qos_node_validate_set_parent()` to enforce constraints on node reassignment. Don't allow reassignment of nodes with active rate objects. Update `esw_qos_node_set_parent()` to handle cases where the parent is NULL. A NULL parent indicates that the scheduling element is attached to the root scheduling element, and since only rate nodes can be connected to the root, this update is now necessary. Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1741642016-44918-5-git-send-email-tariqt@nvidia.com Reviewed-by: Jacob Keller Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlx5/core/devlink.c | 1 + .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 108 +++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/esw/qos.h | 4 + 3 files changed, 110 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 2a9e3a43c1c8..73cd74644378 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -328,6 +328,7 @@ static const struct devlink_ops mlx5_devlink_ops = { .rate_node_new = mlx5_esw_devlink_rate_node_new, .rate_node_del = mlx5_esw_devlink_rate_node_del, .rate_leaf_parent_set = mlx5_esw_devlink_rate_leaf_parent_set, + .rate_node_parent_set = mlx5_esw_devlink_rate_node_parent_set, #endif #ifdef CONFIG_MLX5_SF_MANAGER .port_new = mlx5_devlink_sf_port_new, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 3c850efb4ca3..b6ae384396b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -111,9 +111,9 @@ esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_ { list_del_init(&node->entry); node->parent = parent; - list_add_tail(&node->entry, &parent->children); - node->esw = parent->esw; - node->level = parent->level + 1; + if (parent) + node->esw = parent->esw; + esw_qos_node_attach_to_parent(node); } void mlx5_esw_qos_vport_qos_free(struct mlx5_vport *vport) @@ -1018,3 +1018,105 @@ int mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate *devlink_rate, node = parent_priv; return mlx5_esw_qos_vport_update_parent(vport, node, extack); } + +static int +mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node *node, + struct mlx5_esw_sched_node *parent, + struct netlink_ext_ack *extack) +{ + u8 new_level, max_level; + + if (parent && parent->esw != node->esw) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot assign node to another E-Switch"); + return -EOPNOTSUPP; + } + + if (!list_empty(&node->children)) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot reassign a node that contains rate objects"); + return -EOPNOTSUPP; + } + + new_level = parent ? parent->level + 1 : 2; + max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth); + if (new_level > max_level) { + NL_SET_ERR_MSG_MOD(extack, + "Node hierarchy depth exceeds the maximum supported level"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node *node, + struct mlx5_esw_sched_node *parent, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *curr_parent = node->parent; + struct mlx5_eswitch *esw = node->esw; + u32 parent_ix; + int err; + + parent_ix = parent ? parent->ix : node->esw->qos.root_tsar_ix; + mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + node->ix); + err = esw_qos_create_node_sched_elem(esw->dev, parent_ix, + node->max_rate, 0, &node->ix); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to create a node under the new hierarchy."); + if (esw_qos_create_node_sched_elem(esw->dev, curr_parent->ix, + node->max_rate, + node->bw_share, + &node->ix)) + esw_warn(esw->dev, "Node restore QoS failed\n"); + + return err; + } + esw_qos_node_set_parent(node, parent); + + return 0; +} + +static int mlx5_esw_qos_node_update_parent(struct mlx5_esw_sched_node *node, + struct mlx5_esw_sched_node *parent, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *curr_parent; + struct mlx5_eswitch *esw = node->esw; + int err; + + err = mlx5_esw_qos_node_validate_set_parent(node, parent, extack); + if (err) + return err; + + esw_qos_lock(esw); + curr_parent = node->parent; + err = esw_qos_vports_node_update_parent(node, parent, extack); + if (err) + goto out; + + esw_qos_normalize_min_rate(esw, curr_parent, extack); + esw_qos_normalize_min_rate(esw, parent, extack); + +out: + esw_qos_unlock(esw); + + return err; +} + +int mlx5_esw_devlink_rate_node_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *node = priv, *parent_node; + + if (!parent) + return mlx5_esw_qos_node_update_parent(node, NULL, extack); + + parent_node = parent_priv; + return mlx5_esw_qos_node_update_parent(node, parent_node, extack); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h index 43a40bda7d19..ed40ec8f027e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h @@ -33,6 +33,10 @@ int mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate *devlink_rate, struct devlink_rate *parent, void *priv, void *parent_priv, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack); #endif #endif