From 0470e5e38c9d97e7ce1e804cd54cf59accdf3ac2 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Mon, 22 Jul 2019 14:34:42 -0400 Subject: [PATCH 01/24] net/mlx5: fix -Wtype-limits compilation warnings The commit b9a7ba556207 ("net/mlx5: Use event mask based on device capabilities") introduced a few compilation warnings due to it bumps MLX5_EVENT_TYPE_MAX from 0x27 to 0x100 which is always greater than an "struct {mlx5_eqe|mlx5_nb}.type" that is an "u8". drivers/net/ethernet/mellanox/mlx5/core/eq.c: In function 'mlx5_eq_notifier_register': drivers/net/ethernet/mellanox/mlx5/core/eq.c:948:21: warning: comparison is always false due to limited range of data type [-Wtype-limits] if (nb->event_type >= MLX5_EVENT_TYPE_MAX) ^~ drivers/net/ethernet/mellanox/mlx5/core/eq.c: In function 'mlx5_eq_notifier_unregister': drivers/net/ethernet/mellanox/mlx5/core/eq.c:959:21: warning: comparison is always false due to limited range of data type [-Wtype-limits] if (nb->event_type >= MLX5_EVENT_TYPE_MAX) Fix them by removing unnecessary checkings. Fixes: b9a7ba556207 ("net/mlx5: Use event mask based on device capabilities") Signed-off-by: Qian Cai Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 41f25ea2e8d9..2df9aaa421c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -215,11 +215,7 @@ static int mlx5_eq_async_int(struct notifier_block *nb, */ dma_rmb(); - if (likely(eqe->type < MLX5_EVENT_TYPE_MAX)) - atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe); - else - mlx5_core_warn_once(dev, "notifier_call_chain is not setup for eqe: %d\n", eqe->type); - + atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe); atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe); ++eq->cons_index; @@ -945,9 +941,6 @@ int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) { struct mlx5_eq_table *eqt = dev->priv.eq_table; - if (nb->event_type >= MLX5_EVENT_TYPE_MAX) - return -EINVAL; - return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb); } EXPORT_SYMBOL(mlx5_eq_notifier_register); @@ -956,9 +949,6 @@ int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb) { struct mlx5_eq_table *eqt = dev->priv.eq_table; - if (nb->event_type >= MLX5_EVENT_TYPE_MAX) - return -EINVAL; - return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb); } EXPORT_SYMBOL(mlx5_eq_notifier_unregister); From 6f06e04b67baa1c9da61c8b15b1335a1dbb98bcb Mon Sep 17 00:00:00 2001 From: Gavi Teitz Date: Mon, 29 Jul 2019 21:12:52 +0000 Subject: [PATCH 02/24] net/mlx5: Refactor and optimize flow counter bulk query Towards introducing the ability to allocate bulks of flow counters, refactor the flow counter bulk query process, removing functions and structs whose names indicated being used for flow counter bulk allocation FW commands, despite them actually only being used to support bulk querying, and migrate their functionality to correctly named functions in their natural location, fs_counters.c. Additionally, optimize the bulk query process by: * Extracting the memory used for the query to mlx5_fc_stats so that it is only allocated once, and not for each bulk query. * Querying all the counters in one function call. Signed-off-by: Gavi Teitz Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/fs_cmd.c | 61 ++------- .../net/ethernet/mellanox/mlx5/core/fs_cmd.h | 13 +- .../ethernet/mellanox/mlx5/core/fs_counters.c | 129 ++++++++++-------- include/linux/mlx5/driver.h | 1 + 4 files changed, 83 insertions(+), 121 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 7ac1249eadc3..51f6972f4c70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -615,67 +615,24 @@ int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, return 0; } -struct mlx5_cmd_fc_bulk { - u32 id; - int num; - int outlen; - u32 out[0]; -}; - -struct mlx5_cmd_fc_bulk * -mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num) +int mlx5_cmd_fc_get_bulk_query_out_len(int bulk_len) { - struct mlx5_cmd_fc_bulk *b; - int outlen = - MLX5_ST_SZ_BYTES(query_flow_counter_out) + - MLX5_ST_SZ_BYTES(traffic_counter) * num; - - b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL); - if (!b) - return NULL; - - b->id = id; - b->num = num; - b->outlen = outlen; - - return b; + return MLX5_ST_SZ_BYTES(query_flow_counter_out) + + MLX5_ST_SZ_BYTES(traffic_counter) * bulk_len; } -void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b) -{ - kfree(b); -} - -int -mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b) +int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len, + u32 *out) { + int outlen = mlx5_cmd_fc_get_bulk_query_out_len(bulk_len); u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0}; MLX5_SET(query_flow_counter_in, in, opcode, MLX5_CMD_OP_QUERY_FLOW_COUNTER); MLX5_SET(query_flow_counter_in, in, op_mod, 0); - MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id); - MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num); - return mlx5_cmd_exec(dev, in, sizeof(in), b->out, b->outlen); -} - -void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, - struct mlx5_cmd_fc_bulk *b, u32 id, - u64 *packets, u64 *bytes) -{ - int index = id - b->id; - void *stats; - - if (index < 0 || index >= b->num) { - mlx5_core_warn(dev, "Flow counter id (0x%x) out of range (0x%x..0x%x). Counter ignored.\n", - id, b->id, b->id + b->num - 1); - return; - } - - stats = MLX5_ADDR_OF(query_flow_counter_out, b->out, - flow_statistics[index]); - *packets = MLX5_GET64(traffic_counter, stats, packets); - *bytes = MLX5_GET64(traffic_counter, stats, octets); + MLX5_SET(query_flow_counter_in, in, flow_counter_id, base_id); + MLX5_SET(query_flow_counter_in, in, num_of_counters, bulk_len); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index e340f9af2f5a..db49eabba98d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -82,16 +82,9 @@ int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id); int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, u64 *packets, u64 *bytes); -struct mlx5_cmd_fc_bulk; - -struct mlx5_cmd_fc_bulk * -mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num); -void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b); -int -mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b); -void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, - struct mlx5_cmd_fc_bulk *b, u32 id, - u64 *packets, u64 *bytes); +int mlx5_cmd_fc_get_bulk_query_out_len(int bulk_len); +int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len, + u32 *out); const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index b3762123a69c..067a4b56498b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -75,7 +75,7 @@ struct mlx5_fc { * access to counter list: * - create (user context) * - mlx5_fc_create() only adds to an addlist to be used by - * mlx5_fc_stats_query_work(). addlist is a lockless single linked list + * mlx5_fc_stats_work(). addlist is a lockless single linked list * that doesn't require any additional synchronization when adding single * node. * - spawn thread to do the actual destroy @@ -136,72 +136,69 @@ static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev, spin_unlock(&fc_stats->counters_idr_lock); } -/* The function returns the last counter that was queried so the caller - * function can continue calling it till all counters are queried. - */ -static struct mlx5_fc *mlx5_fc_stats_query(struct mlx5_core_dev *dev, - struct mlx5_fc *first, - u32 last_id) +static int get_max_bulk_query_len(struct mlx5_core_dev *dev) +{ + return min_t(int, MLX5_SW_MAX_COUNTERS_BULK, + (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk))); +} + +static void update_counter_cache(int index, u32 *bulk_raw_data, + struct mlx5_fc_cache *cache) +{ + void *stats = MLX5_ADDR_OF(query_flow_counter_out, bulk_raw_data, + flow_statistics[index]); + u64 packets = MLX5_GET64(traffic_counter, stats, packets); + u64 bytes = MLX5_GET64(traffic_counter, stats, octets); + + if (cache->packets == packets) + return; + + cache->packets = packets; + cache->bytes = bytes; + cache->lastuse = jiffies; +} + +static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev, + struct mlx5_fc *first, + u32 last_id) { struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; - struct mlx5_fc *counter = NULL; - struct mlx5_cmd_fc_bulk *b; - bool more = false; - u32 afirst_id; - int num; + bool query_more_counters = (first->id <= last_id); + int max_bulk_len = get_max_bulk_query_len(dev); + u32 *data = fc_stats->bulk_query_out; + struct mlx5_fc *counter = first; + u32 bulk_base_id; + int bulk_len; int err; - int max_bulk = min_t(int, MLX5_SW_MAX_COUNTERS_BULK, - (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk))); + while (query_more_counters) { + /* first id must be aligned to 4 when using bulk query */ + bulk_base_id = counter->id & ~0x3; - /* first id must be aligned to 4 when using bulk query */ - afirst_id = first->id & ~0x3; + /* number of counters to query inc. the last counter */ + bulk_len = min_t(int, max_bulk_len, + ALIGN(last_id - bulk_base_id + 1, 4)); - /* number of counters to query inc. the last counter */ - num = ALIGN(last_id - afirst_id + 1, 4); - if (num > max_bulk) { - num = max_bulk; - last_id = afirst_id + num - 1; - } - - b = mlx5_cmd_fc_bulk_alloc(dev, afirst_id, num); - if (!b) { - mlx5_core_err(dev, "Error allocating resources for bulk query\n"); - return NULL; - } - - err = mlx5_cmd_fc_bulk_query(dev, b); - if (err) { - mlx5_core_err(dev, "Error doing bulk query: %d\n", err); - goto out; - } - - counter = first; - list_for_each_entry_from(counter, &fc_stats->counters, list) { - struct mlx5_fc_cache *c = &counter->cache; - u64 packets; - u64 bytes; - - if (counter->id > last_id) { - more = true; - break; + err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len, + data); + if (err) { + mlx5_core_err(dev, "Error doing bulk query: %d\n", err); + return; } + query_more_counters = false; - mlx5_cmd_fc_bulk_get(dev, b, - counter->id, &packets, &bytes); + list_for_each_entry_from(counter, &fc_stats->counters, list) { + int counter_index = counter->id - bulk_base_id; + struct mlx5_fc_cache *cache = &counter->cache; - if (c->packets == packets) - continue; + if (counter->id >= bulk_base_id + bulk_len) { + query_more_counters = true; + break; + } - c->packets = packets; - c->bytes = bytes; - c->lastuse = jiffies; + update_counter_cache(counter_index, data, cache); + } } - -out: - mlx5_cmd_fc_bulk_free(b); - - return more ? counter : NULL; } static void mlx5_free_fc(struct mlx5_core_dev *dev, @@ -244,8 +241,8 @@ static void mlx5_fc_stats_work(struct work_struct *work) counter = list_first_entry(&fc_stats->counters, struct mlx5_fc, list); - while (counter) - counter = mlx5_fc_stats_query(dev, counter, last->id); + if (counter) + mlx5_fc_stats_query_counter_range(dev, counter, last->id); fc_stats->next_query = now + fc_stats->sampling_interval; } @@ -324,6 +321,8 @@ EXPORT_SYMBOL(mlx5_fc_destroy); int mlx5_init_fc_stats(struct mlx5_core_dev *dev) { struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + int max_bulk_len; + int max_out_len; spin_lock_init(&fc_stats->counters_idr_lock); idr_init(&fc_stats->counters_idr); @@ -331,14 +330,24 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev) init_llist_head(&fc_stats->addlist); init_llist_head(&fc_stats->dellist); + max_bulk_len = get_max_bulk_query_len(dev); + max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len); + fc_stats->bulk_query_out = kzalloc(max_out_len, GFP_KERNEL); + if (!fc_stats->bulk_query_out) + return -ENOMEM; + fc_stats->wq = create_singlethread_workqueue("mlx5_fc"); if (!fc_stats->wq) - return -ENOMEM; + goto err_wq_create; fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD; INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work); return 0; + +err_wq_create: + kfree(fc_stats->bulk_query_out); + return -ENOMEM; } void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) @@ -352,6 +361,8 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) destroy_workqueue(dev->priv.fc_stats.wq); dev->priv.fc_stats.wq = NULL; + kfree(fc_stats->bulk_query_out); + idr_destroy(&fc_stats->counters_idr); tmplist = llist_del_all(&fc_stats->addlist); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0e6da1840c7d..267b2bc0ca4a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -488,6 +488,7 @@ struct mlx5_fc_stats { struct delayed_work work; unsigned long next_query; unsigned long sampling_interval; /* jiffies */ + u32 *bulk_query_out; }; struct mlx5_events; From 8536a6bf2ea1f3bf4e3159b590fbecce4d854796 Mon Sep 17 00:00:00 2001 From: Gavi Teitz Date: Mon, 29 Jul 2019 21:12:54 +0000 Subject: [PATCH 03/24] net/mlx5: Add flow counter bulk allocation hardware bits and command Add a handle to invoke the new FW capability of allocating a bulk of flow counters. Signed-off-by: Gavi Teitz Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/fs_cmd.c | 10 ++++++++- .../net/ethernet/mellanox/mlx5/core/fs_cmd.h | 3 +++ include/linux/mlx5/mlx5_ifc.h | 21 +++++++++++++++++-- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 51f6972f4c70..b84a225bbe86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -566,7 +566,9 @@ static int mlx5_cmd_delete_fte(struct mlx5_flow_root_namespace *ns, return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id) +int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, + enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask, + u32 *id) { u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {0}; u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0}; @@ -574,6 +576,7 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id) MLX5_SET(alloc_flow_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_FLOW_COUNTER); + MLX5_SET(alloc_flow_counter_in, in, flow_counter_bulk, alloc_bitmask); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) @@ -581,6 +584,11 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id) return err; } +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id) +{ + return mlx5_cmd_fc_bulk_alloc(dev, 0, id); +} + int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id) { u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {0}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index db49eabba98d..bc4606306009 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -78,6 +78,9 @@ struct mlx5_flow_cmds { }; int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id); +int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, + enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask, + u32 *id); int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id); int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, u64 *packets, u64 *bytes); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index b3d5752657d9..196987f14a3f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1040,6 +1040,21 @@ enum { MLX5_UCTX_CAP_INTERNAL_DEV_RES = 1UL << 1, }; +#define MLX5_FC_BULK_SIZE_FACTOR 128 + +enum mlx5_fc_bulk_alloc_bitmask { + MLX5_FC_BULK_128 = (1 << 0), + MLX5_FC_BULK_256 = (1 << 1), + MLX5_FC_BULK_512 = (1 << 2), + MLX5_FC_BULK_1024 = (1 << 3), + MLX5_FC_BULK_2048 = (1 << 4), + MLX5_FC_BULK_4096 = (1 << 5), + MLX5_FC_BULK_8192 = (1 << 6), + MLX5_FC_BULK_16384 = (1 << 7), +}; + +#define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum)) + struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x30]; u8 vhca_id[0x10]; @@ -1244,7 +1259,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_2e0[0x7]; u8 max_qp_mcg[0x19]; - u8 reserved_at_300[0x18]; + u8 reserved_at_300[0x10]; + u8 flow_counter_bulk_alloc[0x8]; u8 log_max_mcg[0x8]; u8 reserved_at_320[0x3]; @@ -7815,7 +7831,8 @@ struct mlx5_ifc_alloc_flow_counter_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x38]; + u8 flow_counter_bulk[0x8]; }; struct mlx5_ifc_add_vxlan_udp_dport_out_bits { From 7761f9eef3f09f2f4c579313e0c536770b5ecff4 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 29 Jul 2019 21:12:56 +0000 Subject: [PATCH 04/24] net/mlx5: Fix offset of tisc bits reserved field First reserved field is off by one instead of reserved_at_1 it should be reserved_at_2, fix that. Fixes: a12ff35e0fb7 ("net/mlx5: Introduce TLS TX offload hardware bits and structures") Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 196987f14a3f..9265c84ad353 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2782,7 +2782,7 @@ struct mlx5_ifc_traffic_counter_bits { struct mlx5_ifc_tisc_bits { u8 strict_lag_tx_port_affinity[0x1]; u8 tls_en[0x1]; - u8 reserved_at_1[0x2]; + u8 reserved_at_2[0x2]; u8 lag_tx_port_affinity[0x04]; u8 reserved_at_8[0x4]; From 0000a5f2507deef8668d23d6406e9d19ba371def Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 29 Jul 2019 21:12:58 +0000 Subject: [PATCH 05/24] net/mlx5: Make load_one() and unload_one() symmetric Currently mlx5_load_one() perform device registration using mlx5_register_device(). But mlx5_unload_one() doesn't unregister. Make them symmetric by doing device unregistration in mlx5_unload_one(). Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index b15b27a497fc..fa0e991f1983 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1217,8 +1217,10 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) { int err = 0; - if (cleanup) + if (cleanup) { + mlx5_unregister_device(dev); mlx5_drain_health_wq(dev); + } mutex_lock(&dev->intf_state_mutex); if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { @@ -1369,7 +1371,6 @@ static void remove_one(struct pci_dev *pdev) mlx5_crdump_disable(dev); mlx5_devlink_unregister(devlink); - mlx5_unregister_device(dev); if (mlx5_unload_one(dev, true)) { mlx5_core_err(dev, "mlx5_unload_one failed\n"); From 6cedde4513990af7191afa43528533f80e92c989 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 29 Jul 2019 21:13:00 +0000 Subject: [PATCH 06/24] net/mlx5: E-Switch, Verify support QoS element type Check if firmware supports the requested element type before attempting to create the element type. In addition, explicitly specify the request element type and tsar type. Signed-off-by: Eli Cohen Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 31 +++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 7 +++++ 2 files changed, 38 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 1f3891fde2eb..2927fa1da92f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1393,19 +1393,50 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, return err; } +static bool element_type_supported(struct mlx5_eswitch *esw, int type) +{ + struct mlx5_core_dev *dev = esw->dev = esw->dev; + + switch (type) { + case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_TASR; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_VPORT; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_VPORT_TC; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; + } + return false; +} + /* Vport QoS management */ static int esw_create_tsar(struct mlx5_eswitch *esw) { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; struct mlx5_core_dev *dev = esw->dev; + __be32 *attr; int err; if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) return 0; + if (!element_type_supported(esw, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR)) + return 0; + if (esw->qos.enabled) return -EEXIST; + MLX5_SET(scheduling_context, tsar_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + + attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); + *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); + err = mlx5_create_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, tsar_ctx, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 9265c84ad353..30d15e80bcc7 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2957,6 +2957,13 @@ enum { SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3, }; +enum { + ELEMENT_TYPE_CAP_MASK_TASR = 1 << 0, + ELEMENT_TYPE_CAP_MASK_VPORT = 1 << 1, + ELEMENT_TYPE_CAP_MASK_VPORT_TC = 1 << 2, + ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC = 1 << 3, +}; + struct mlx5_ifc_scheduling_context_bits { u8 element_type[0x8]; u8 reserved_at_8[0x18]; From 332bd3a5b931eb67deb370db62d59f9cc7f76dac Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 29 Jul 2019 21:13:02 +0000 Subject: [PATCH 07/24] net/mlx5: E-switch, Combine metadata enable/disable functionality Except bit toggling code, rest of the code is same to enable/disable metadata passing functionality. Hence, combine them to single function and control using enable flag. Also instead of checking metadata supported at multiple places, fold into the helper function. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 50 +++++-------------- 1 file changed, 13 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 089ae4d48a82..4be19890f725 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -587,13 +587,16 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) mlx5_del_flow_rules(rule); } -static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw) +static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable) { u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; u8 fdb_to_vport_reg_c_id; int err; + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + return 0; + err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport, out, sizeof(out)); if (err) @@ -602,33 +605,10 @@ static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw) fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, esw_vport_context.fdb_to_vport_reg_c_id); - fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0; - MLX5_SET(modify_esw_vport_context_in, in, - esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); - - MLX5_SET(modify_esw_vport_context_in, in, - field_select.fdb_to_vport_reg_c_id, 1); - - return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport, - in, sizeof(in)); -} - -static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw) -{ - u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; - u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; - u8 fdb_to_vport_reg_c_id; - int err; - - err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport, - out, sizeof(out)); - if (err) - return err; - - fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.fdb_to_vport_reg_c_id); - - fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0; + if (enable) + fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0; + else + fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0; MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); @@ -2138,11 +2118,9 @@ int esw_offloads_init(struct mlx5_eswitch *esw) if (err) return err; - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { - err = mlx5_eswitch_enable_passing_vport_metadata(esw); - if (err) - goto err_vport_metadata; - } + err = esw_set_passing_vport_metadata(esw, true); + if (err) + goto err_vport_metadata; err = esw_offloads_load_all_reps(esw); if (err) @@ -2156,8 +2134,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw) return 0; err_reps: - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) - mlx5_eswitch_disable_passing_vport_metadata(esw); + esw_set_passing_vport_metadata(esw, false); err_vport_metadata: esw_offloads_steering_cleanup(esw); return err; @@ -2187,8 +2164,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw) mlx5_rdma_disable_roce(esw->dev); esw_offloads_devcom_cleanup(esw); esw_offloads_unload_all_reps(esw); - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) - mlx5_eswitch_disable_passing_vport_metadata(esw); + esw_set_passing_vport_metadata(esw, false); esw_offloads_steering_cleanup(esw); esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; } From 610090ebce92ab6a3e1e623344be5a9dd72a8b6d Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 29 Jul 2019 21:13:04 +0000 Subject: [PATCH 08/24] net/mlx5: E-switch, Initialize TSAR Qos hardware block before its user vports First enable TSAR Qos hardware block in device before enabling its user vports. This refactor is needed so that vports can be enabled before their representor netdevice can be created. While at it, esw_create_tsar() returns error code which was used only to print error. However esw_create_tsar() already prints warning if it hits an error. Hence, remove the redundant warning. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 2927fa1da92f..820970911f8b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1415,7 +1415,7 @@ static bool element_type_supported(struct mlx5_eswitch *esw, int type) } /* Vport QoS management */ -static int esw_create_tsar(struct mlx5_eswitch *esw) +static void esw_create_tsar(struct mlx5_eswitch *esw) { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; struct mlx5_core_dev *dev = esw->dev; @@ -1423,13 +1423,13 @@ static int esw_create_tsar(struct mlx5_eswitch *esw) int err; if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) - return 0; + return; if (!element_type_supported(esw, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR)) - return 0; + return; if (esw->qos.enabled) - return -EEXIST; + return; MLX5_SET(scheduling_context, tsar_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); @@ -1443,11 +1443,10 @@ static int esw_create_tsar(struct mlx5_eswitch *esw) &esw->qos.root_tsar_id); if (err) { esw_warn(esw->dev, "E-Switch create TSAR failed (%d)\n", err); - return err; + return; } esw->qos.enabled = true; - return 0; } static void esw_destroy_tsar(struct mlx5_eswitch *esw) @@ -1819,6 +1818,8 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support)) esw_warn(esw->dev, "engress ACL is not supported by FW\n"); + esw_create_tsar(esw); + esw->mode = mode; mlx5_lag_update(esw->dev); @@ -1836,10 +1837,6 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) if (err) goto abort; - err = esw_create_tsar(esw); - if (err) - esw_warn(esw->dev, "Failed to create eswitch TSAR"); - enabled_events = (mode == MLX5_ESWITCH_LEGACY) ? SRIOV_VPORT_EVENTS : UC_ADDR_CHANGE; @@ -1899,13 +1896,13 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw) if (mc_promisc && mc_promisc->uplink_rule) mlx5_del_flow_rules(mc_promisc->uplink_rule); - esw_destroy_tsar(esw); - if (esw->mode == MLX5_ESWITCH_LEGACY) esw_destroy_legacy_table(esw); else if (esw->mode == MLX5_ESWITCH_OFFLOADS) esw_offloads_cleanup(esw); + esw_destroy_tsar(esw); + old_mode = esw->mode; esw->mode = MLX5_ESWITCH_NONE; From 5019833d661f5edb6bd63abd3da064d2517966b4 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 29 Jul 2019 21:13:06 +0000 Subject: [PATCH 09/24] net/mlx5: E-switch, Introduce helper function to enable/disable vports vports needs to be enabled in switchdev and legacy mode. In switchdev mode, vports should be enabled after initializing the FDB tables and before creating their represntors so that representor works on an initialized vport object. Prepare a helper function which can be called when enabling either of the eswitch modes. Similarly, have disable vports helper function. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 96 +++++++++++-------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 19 +++- 2 files changed, 73 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 820970911f8b..6d82aefae6e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -58,20 +58,9 @@ struct vport_addr { bool mc_promisc; }; -enum { - UC_ADDR_CHANGE = BIT(0), - MC_ADDR_CHANGE = BIT(1), - PROMISC_CHANGE = BIT(3), -}; - static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw); static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw); -/* Vport context events */ -#define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \ - MC_ADDR_CHANGE | \ - PROMISC_CHANGE) - struct mlx5_vport *__must_check mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num) { @@ -108,13 +97,13 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1); - if (events_mask & UC_ADDR_CHANGE) + if (events_mask & MLX5_VPORT_UC_ADDR_CHANGE) MLX5_SET(nic_vport_context, nic_vport_ctx, event_on_uc_address_change, 1); - if (events_mask & MC_ADDR_CHANGE) + if (events_mask & MLX5_VPORT_MC_ADDR_CHANGE) MLX5_SET(nic_vport_context, nic_vport_ctx, event_on_mc_address_change, 1); - if (events_mask & PROMISC_CHANGE) + if (events_mask & MLX5_VPORT_PROMISC_CHANGE) MLX5_SET(nic_vport_context, nic_vport_ctx, event_on_promisc_change, 1); @@ -901,21 +890,21 @@ static void esw_vport_change_handle_locked(struct mlx5_vport *vport) esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n", vport->vport, mac); - if (vport->enabled_events & UC_ADDR_CHANGE) { + if (vport->enabled_events & MLX5_VPORT_UC_ADDR_CHANGE) { esw_update_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_UC); esw_apply_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_UC); } - if (vport->enabled_events & MC_ADDR_CHANGE) + if (vport->enabled_events & MLX5_VPORT_MC_ADDR_CHANGE) esw_update_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_MC); - if (vport->enabled_events & PROMISC_CHANGE) { + if (vport->enabled_events & MLX5_VPORT_PROMISC_CHANGE) { esw_update_vport_rx_mode(esw, vport); if (!IS_ERR_OR_NULL(vport->allmulti_rule)) esw_update_vport_mc_promisc(esw, vport); } - if (vport->enabled_events & (PROMISC_CHANGE | MC_ADDR_CHANGE)) + if (vport->enabled_events & (MLX5_VPORT_PROMISC_CHANGE | MLX5_VPORT_MC_ADDR_CHANGE)) esw_apply_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_MC); esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport); @@ -1649,7 +1638,7 @@ static void esw_vport_destroy_drop_counters(struct mlx5_vport *vport) } static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, - int enable_events) + enum mlx5_eswitch_vport_event enabled_events) { u16 vport_num = vport->vport; @@ -1671,7 +1660,7 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, esw_warn(esw->dev, "Failed to attach vport %d to eswitch rate limiter", vport_num); /* Sync with current vport context */ - vport->enabled_events = enable_events; + vport->enabled_events = enabled_events; vport->enabled = true; /* Esw manager is trusted by default. Host PF (vport 0) is trusted as well @@ -1800,11 +1789,51 @@ static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw) /* Public E-Switch API */ #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) -int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) +/* mlx5_eswitch_enable_pf_vf_vports() enables vports of PF, ECPF and VFs + * whichever are present on the eswitch. + */ +void +mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, + enum mlx5_eswitch_vport_event enabled_events) { struct mlx5_vport *vport; + int i; + + /* Enable PF vport */ + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); + esw_enable_vport(esw, vport, enabled_events); + + /* Enable ECPF vports */ + if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + esw_enable_vport(esw, vport, enabled_events); + } + + /* Enable VF vports */ + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) + esw_enable_vport(esw, vport, enabled_events); +} + +/* mlx5_eswitch_disable_pf_vf_vports() disables vports of PF, ECPF and VFs + * whichever are previously enabled on the eswitch. + */ +void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + int i; + + mlx5_esw_for_all_vports_reverse(esw, i, vport) + esw_disable_vport(esw, vport); +} + +#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \ + MLX5_VPORT_MC_ADDR_CHANGE | \ + MLX5_VPORT_PROMISC_CHANGE) + +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) +{ + int enabled_events; int err; - int i, enabled_events; if (!ESW_ALLOWED(esw) || !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { @@ -1837,22 +1866,10 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) if (err) goto abort; - enabled_events = (mode == MLX5_ESWITCH_LEGACY) ? SRIOV_VPORT_EVENTS : - UC_ADDR_CHANGE; + enabled_events = (mode == MLX5_ESWITCH_LEGACY) ? MLX5_LEGACY_SRIOV_VPORT_EVENTS : + MLX5_VPORT_UC_ADDR_CHANGE; - /* Enable PF vport */ - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); - esw_enable_vport(esw, vport, enabled_events); - - /* Enable ECPF vports */ - if (mlx5_ecpf_vport_exists(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); - esw_enable_vport(esw, vport, enabled_events); - } - - /* Enable VF vports */ - mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) - esw_enable_vport(esw, vport, enabled_events); + mlx5_eswitch_enable_pf_vf_vports(esw, enabled_events); mlx5_eswitch_event_handlers_register(esw); @@ -1876,9 +1893,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) void mlx5_eswitch_disable(struct mlx5_eswitch *esw) { struct esw_mc_addr *mc_promisc; - struct mlx5_vport *vport; int old_mode; - int i; if (!ESW_ALLOWED(esw) || esw->mode == MLX5_ESWITCH_NONE) return; @@ -1890,8 +1905,7 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw) mc_promisc = &esw->mc_promisc; mlx5_eswitch_event_handlers_unregister(esw); - mlx5_esw_for_all_vports(esw, i, vport) - esw_disable_vport(esw, vport); + mlx5_eswitch_disable_pf_vf_vports(esw); if (mc_promisc && mc_promisc->uplink_rule) mlx5_del_flow_rules(mc_promisc->uplink_rule); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index a38e8a3c7c9a..3103a34c619c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -101,6 +101,13 @@ struct mlx5_vport_info { bool trusted; }; +/* Vport context events */ +enum mlx5_eswitch_vport_event { + MLX5_VPORT_UC_ADDR_CHANGE = BIT(0), + MLX5_VPORT_MC_ADDR_CHANGE = BIT(1), + MLX5_VPORT_PROMISC_CHANGE = BIT(3), +}; + struct mlx5_vport { struct mlx5_core_dev *dev; int vport; @@ -122,7 +129,7 @@ struct mlx5_vport { } qos; bool enabled; - u16 enabled_events; + enum mlx5_eswitch_vport_event enabled_events; }; enum offloads_fdb_flags { @@ -513,6 +520,11 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); (vport) = &(esw)->vports[i], \ (i) < (esw)->total_vports; (i)++) +#define mlx5_esw_for_all_vports_reverse(esw, i, vport) \ + for ((i) = (esw)->total_vports - 1; \ + (vport) = &(esw)->vports[i], \ + (i) >= MLX5_VPORT_PF; (i)--) + #define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) \ for ((i) = MLX5_VPORT_FIRST_VF; \ (vport) = &(esw)->vports[(i)], \ @@ -574,6 +586,11 @@ bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num); void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs); int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data); +void +mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, + enum mlx5_eswitch_vport_event enabled_events); +void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } From 9ddb830a14dbd88308354d27cd17009fc97d3a6f Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 29 Jul 2019 21:13:08 +0000 Subject: [PATCH 10/24] net/mlx5: E-Switch, remove redundant error handling We don't need to handle error flow of esw_create_legacy_table() in the same branch, it is already being handled directly after the if statement, for both legacy and switchdev modes in one place. Signed-off-by: Saeed Mahameed Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 6d82aefae6e1..17fb982b3489 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1855,8 +1855,6 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) if (mode == MLX5_ESWITCH_LEGACY) { err = esw_create_legacy_table(esw); - if (err) - goto abort; } else { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); From 131ce7014043087fbeddbcb3b8dac8891cd0e0fe Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 29 Jul 2019 21:13:10 +0000 Subject: [PATCH 11/24] net/mlx5: E-Switch, Remove redundant mc_promisc NULL check mc_promisc pointer points to an instance of struct esw_mc_addr allocated as part of the esw structure. Hence it cannot be NULL. Removed such redundant check and assign where it is actually used. While at it, add comment around legacy mode fields and move mc_promisc close to other legacy mode structures to improve code redability. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 17fb982b3489..90d150be237b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1900,12 +1900,12 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw) esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", esw->esw_funcs.num_vfs, esw->enabled_vports); - mc_promisc = &esw->mc_promisc; mlx5_eswitch_event_handlers_unregister(esw); mlx5_eswitch_disable_pf_vf_vports(esw); - if (mc_promisc && mc_promisc->uplink_rule) + mc_promisc = &esw->mc_promisc; + if (mc_promisc->uplink_rule) mlx5_del_flow_rules(mc_promisc->uplink_rule); if (esw->mode == MLX5_ESWITCH_LEGACY) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 3103a34c619c..51b6d29466f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -214,8 +214,11 @@ enum { struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_nb nb; + /* legacy data structures */ struct mlx5_eswitch_fdb fdb_table; struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; + struct esw_mc_addr mc_promisc; + /* end of legacy */ struct workqueue_struct *work_queue; struct mlx5_vport *vports; u32 flags; @@ -225,7 +228,6 @@ struct mlx5_eswitch { * and async SRIOV admin state changes */ struct mutex state_lock; - struct esw_mc_addr mc_promisc; struct { bool enabled; From 5896b97296a7928035590ff3f477629774dce250 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 29 Jul 2019 21:13:12 +0000 Subject: [PATCH 12/24] net/mlx5: E-switch, Tide up eswitch config sequence Currently for PF and ECPF vports, representors are created before their eswitch hardware ports are initialized in below flow. mlx5_eswitch_enable() esw_offloads_init() esw_offloads_load_all_reps() [..] esw_enable_vport() However for VFs, vports are initialized before creating their respective netdev represnetors in event handling context. Similarly while disabling eswitch, first hardware vports are disabled, followed by destroying their representors. Here while underlying vports gets destroyed but its respective user facing netdevice can still exist on which user can continue to perform more offload operations. Instead, its more accurate to do enable_eswitch switchdev mode: 1. perform FDB tables initialization 2. initialize hw vport 3. create and publish representor for this vport disable_eswitch switchdev mode: 1. destroy user facing representor for the vport 2. disable hw vport 3. perform FDB tables cleanup Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 54 +++++++++++-------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 4 +- .../mellanox/mlx5/core/eswitch_offloads.c | 8 ++- 3 files changed, 41 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 90d150be237b..d4465dd18c11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -452,6 +452,22 @@ static int esw_create_legacy_table(struct mlx5_eswitch *esw) return err; } +#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \ + MLX5_VPORT_MC_ADDR_CHANGE | \ + MLX5_VPORT_PROMISC_CHANGE) + +static int esw_legacy_enable(struct mlx5_eswitch *esw) +{ + int ret; + + ret = esw_create_legacy_table(esw); + if (ret) + return ret; + + mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS); + return 0; +} + static void esw_destroy_legacy_table(struct mlx5_eswitch *esw) { esw_cleanup_vepa_rules(esw); @@ -459,6 +475,19 @@ static void esw_destroy_legacy_table(struct mlx5_eswitch *esw) esw_destroy_legacy_vepa_table(esw); } +static void esw_legacy_disable(struct mlx5_eswitch *esw) +{ + struct esw_mc_addr *mc_promisc; + + mlx5_eswitch_disable_pf_vf_vports(esw); + + mc_promisc = &esw->mc_promisc; + if (mc_promisc->uplink_rule) + mlx5_del_flow_rules(mc_promisc->uplink_rule); + + esw_destroy_legacy_table(esw); +} + /* E-Switch vport UC/MC lists management */ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, struct vport_addr *vaddr); @@ -1826,13 +1855,8 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) esw_disable_vport(esw, vport); } -#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \ - MLX5_VPORT_MC_ADDR_CHANGE | \ - MLX5_VPORT_PROMISC_CHANGE) - int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { - int enabled_events; int err; if (!ESW_ALLOWED(esw) || @@ -1854,21 +1878,16 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) mlx5_lag_update(esw->dev); if (mode == MLX5_ESWITCH_LEGACY) { - err = esw_create_legacy_table(esw); + err = esw_legacy_enable(esw); } else { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - err = esw_offloads_init(esw); + err = esw_offloads_enable(esw); } if (err) goto abort; - enabled_events = (mode == MLX5_ESWITCH_LEGACY) ? MLX5_LEGACY_SRIOV_VPORT_EVENTS : - MLX5_VPORT_UC_ADDR_CHANGE; - - mlx5_eswitch_enable_pf_vf_vports(esw, enabled_events); - mlx5_eswitch_event_handlers_register(esw); esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n", @@ -1890,7 +1909,6 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) void mlx5_eswitch_disable(struct mlx5_eswitch *esw) { - struct esw_mc_addr *mc_promisc; int old_mode; if (!ESW_ALLOWED(esw) || esw->mode == MLX5_ESWITCH_NONE) @@ -1902,16 +1920,10 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw) mlx5_eswitch_event_handlers_unregister(esw); - mlx5_eswitch_disable_pf_vf_vports(esw); - - mc_promisc = &esw->mc_promisc; - if (mc_promisc->uplink_rule) - mlx5_del_flow_rules(mc_promisc->uplink_rule); - if (esw->mode == MLX5_ESWITCH_LEGACY) - esw_destroy_legacy_table(esw); + esw_legacy_disable(esw); else if (esw->mode == MLX5_ESWITCH_OFFLOADS) - esw_offloads_cleanup(esw); + esw_offloads_disable(esw); esw_destroy_tsar(esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 51b6d29466f1..d447e1e44d59 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -242,8 +242,8 @@ struct mlx5_eswitch { struct mlx5_esw_functions esw_funcs; }; -void esw_offloads_cleanup(struct mlx5_eswitch *esw); -int esw_offloads_init(struct mlx5_eswitch *esw); +void esw_offloads_disable(struct mlx5_eswitch *esw); +int esw_offloads_enable(struct mlx5_eswitch *esw); void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 4be19890f725..db01b8ee9385 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2104,7 +2104,7 @@ int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type return NOTIFY_OK; } -int esw_offloads_init(struct mlx5_eswitch *esw) +int esw_offloads_enable(struct mlx5_eswitch *esw) { int err; @@ -2122,6 +2122,8 @@ int esw_offloads_init(struct mlx5_eswitch *esw) if (err) goto err_vport_metadata; + mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE); + err = esw_offloads_load_all_reps(esw); if (err) goto err_reps; @@ -2134,6 +2136,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw) return 0; err_reps: + mlx5_eswitch_disable_pf_vf_vports(esw); esw_set_passing_vport_metadata(esw, false); err_vport_metadata: esw_offloads_steering_cleanup(esw); @@ -2159,11 +2162,12 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, return err; } -void esw_offloads_cleanup(struct mlx5_eswitch *esw) +void esw_offloads_disable(struct mlx5_eswitch *esw) { mlx5_rdma_disable_roce(esw->dev); esw_offloads_devcom_cleanup(esw); esw_offloads_unload_all_reps(esw); + mlx5_eswitch_disable_pf_vf_vports(esw); esw_set_passing_vport_metadata(esw, false); esw_offloads_steering_cleanup(esw); esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; From fcb64c0f5640e629bd77c2cb088f9fd70ff5bde7 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 8 May 2019 11:44:56 +0300 Subject: [PATCH 13/24] net/mlx5: E-Switch, add ingress rate support Use the scheduling elements to implement ingress rate limiter on an eswitch ports ingress traffic. Since the ingress of eswitch port is the egress of VF port, we control eswitch ingress by controlling VF egress. Configuration is done using the ports' representor net devices. Please note that burst size configuration is not supported by devices ConnectX-5 and earlier generations. Configuration examples: tc: tc filter add dev enp59s0f0_0 root protocol ip matchall action police rate 1mbit burst 20k ovs: ovs-vsctl set interface eth0 ingress_policing_rate=1000 Signed-off-by: Eli Cohen Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 19 ++++ .../net/ethernet/mellanox/mlx5/core/en_rep.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_tc.c | 100 ++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en_tc.h | 7 ++ .../net/ethernet/mellanox/mlx5/core/eswitch.c | 16 +++ .../net/ethernet/mellanox/mlx5/core/eswitch.h | 2 + 6 files changed, 145 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 6edf0aeb1e26..bf6f4835457e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1156,6 +1156,23 @@ mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, } } +static +int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + switch (ma->command) { + case TC_CLSMATCHALL_REPLACE: + return mlx5e_tc_configure_matchall(priv, ma); + case TC_CLSMATCHALL_DESTROY: + return mlx5e_tc_delete_matchall(priv, ma); + case TC_CLSMATCHALL_STATS: + mlx5e_tc_stats_matchall(priv, ma); + return 0; + default: + return -EOPNOTSUPP; + } +} + static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { @@ -1165,6 +1182,8 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, switch (type) { case TC_SETUP_CLSFLOWER: return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags); + case TC_SETUP_CLSMATCHALL: + return mlx5e_rep_setup_tc_cls_matchall(priv, type_data); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 10fafd5fa17b..43eeebe9c8d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -88,6 +88,7 @@ struct mlx5e_rep_priv { struct mlx5_flow_handle *vport_rx_rule; struct list_head vport_sqs_list; struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */ + struct rtnl_link_stats64 prev_vf_vport_stats; struct devlink_port dl_port; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f3ed028d5017..dc5fc3350b65 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3638,6 +3638,106 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, return err; } +static int apply_police_params(struct mlx5e_priv *priv, u32 rate, + struct netlink_ext_ack *extack) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch *esw; + u16 vport_num; + u32 rate_mbps; + int err; + + esw = priv->mdev->priv.eswitch; + /* rate is given in bytes/sec. + * First convert to bits/sec and then round to the nearest mbit/secs. + * mbit means million bits. + * Moreover, if rate is non zero we choose to configure to a minimum of + * 1 mbit/sec. + */ + rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0; + vport_num = rpriv->rep->vport; + + err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps); + if (err) + NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); + + return err; +} + +static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, + struct netlink_ext_ack *extack) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + const struct flow_action_entry *act; + int err; + int i; + + if (!flow_action_has_entries(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "matchall called with no action"); + return -EINVAL; + } + + if (!flow_offload_has_one_action(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action"); + return -EOPNOTSUPP; + } + + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_POLICE: + err = apply_police_params(priv, act->police.rate_bytes_ps, extack); + if (err) + return err; + + rpriv->prev_vf_vport_stats = priv->stats.vf_vport; + break; + default: + NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall"); + return -EOPNOTSUPP; + } + } + + return 0; +} + +int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + struct netlink_ext_ack *extack = ma->common.extack; + int prio = TC_H_MAJ(ma->common.prio) >> 16; + + if (prio != 1) { + NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); + return -EINVAL; + } + + return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack); +} + +int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + struct netlink_ext_ack *extack = ma->common.extack; + + return apply_police_params(priv, 0, extack); +} + +void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct rtnl_link_stats64 cur_stats; + u64 dbytes; + u64 dpkts; + + cur_stats = priv->stats.vf_vport; + dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; + dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; + rpriv->prev_vf_vport_stats = cur_stats; + flow_stats_update(&ma->stats, dpkts, dbytes, jiffies); +} + static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 1cb66bf76997..20f045e96c92 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -63,6 +63,13 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, struct flow_cls_offload *f, unsigned long flags); +int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *f); +int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *f); +void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma); + struct mlx5e_encap_entry; void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index f4ace5f8e884..5fbebee7254d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1585,6 +1585,22 @@ static int esw_vport_qos_config(struct mlx5_eswitch *esw, return 0; } +int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, + u32 rate_mbps) +{ + u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); + + return mlx5_modify_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + ctx, + vport->qos.esw_tsar_ix, + MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW); +} + static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) { ((u8 *)node_guid)[7] = mac[0]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 4a03fdadb47e..804912e38dee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -261,6 +261,8 @@ void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport); void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, + u32 rate_mbps); /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); From 5d8a02536545080a555fe37064d24a402fd00d6a Mon Sep 17 00:00:00 2001 From: Gavi Teitz Date: Thu, 27 Jun 2019 13:58:56 +0300 Subject: [PATCH 14/24] net/mlx5: Add flow counter bulk infrastructure Add infrastructure to track bulks of flow counters, providing the means to allocate and deallocate bulks, and to acquire and release individual counters from the bulks. Signed-off-by: Gavi Teitz Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/fs_counters.c | 105 ++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 067a4b56498b..3e734e62a6cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -58,6 +58,7 @@ struct mlx5_fc { u64 lastpackets; u64 lastbytes; + struct mlx5_fc_bulk *bulk; u32 id; bool aging; @@ -412,3 +413,107 @@ void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, fc_stats->sampling_interval = min_t(unsigned long, interval, fc_stats->sampling_interval); } + +/* Flow counter bluks */ + +struct mlx5_fc_bulk { + u32 base_id; + int bulk_len; + unsigned long *bitmask; + struct mlx5_fc fcs[0]; +}; + +static void +mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk, u32 id) +{ + counter->bulk = bulk; + counter->id = id; +} + +static int mlx5_fc_bulk_get_free_fcs_amount(struct mlx5_fc_bulk *bulk) +{ + return bitmap_weight(bulk->bitmask, bulk->bulk_len); +} + +static struct mlx5_fc_bulk __attribute__((unused)) +*mlx5_fc_bulk_create(struct mlx5_core_dev *dev) +{ + enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask; + struct mlx5_fc_bulk *bulk; + int err = -ENOMEM; + int bulk_len; + u32 base_id; + int i; + + alloc_bitmask = MLX5_CAP_GEN(dev, flow_counter_bulk_alloc); + bulk_len = alloc_bitmask > 0 ? MLX5_FC_BULK_NUM_FCS(alloc_bitmask) : 1; + + bulk = kzalloc(sizeof(*bulk) + bulk_len * sizeof(struct mlx5_fc), + GFP_KERNEL); + if (!bulk) + goto err_alloc_bulk; + + bulk->bitmask = kcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long), + GFP_KERNEL); + if (!bulk->bitmask) + goto err_alloc_bitmask; + + err = mlx5_cmd_fc_bulk_alloc(dev, alloc_bitmask, &base_id); + if (err) + goto err_mlx5_cmd_bulk_alloc; + + bulk->base_id = base_id; + bulk->bulk_len = bulk_len; + for (i = 0; i < bulk_len; i++) { + mlx5_fc_init(&bulk->fcs[i], bulk, base_id + i); + set_bit(i, bulk->bitmask); + } + + return bulk; + +err_mlx5_cmd_bulk_alloc: + kfree(bulk->bitmask); +err_alloc_bitmask: + kfree(bulk); +err_alloc_bulk: + return ERR_PTR(err); +} + +static int __attribute__((unused)) +mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk) +{ + if (mlx5_fc_bulk_get_free_fcs_amount(bulk) < bulk->bulk_len) { + mlx5_core_err(dev, "Freeing bulk before all counters were released\n"); + return -EBUSY; + } + + mlx5_cmd_fc_free(dev, bulk->base_id); + kfree(bulk->bitmask); + kfree(bulk); + + return 0; +} + +static struct mlx5_fc __attribute__((unused)) +*mlx5_fc_bulk_acquire_fc(struct mlx5_fc_bulk *bulk) +{ + int free_fc_index = find_first_bit(bulk->bitmask, bulk->bulk_len); + + if (free_fc_index >= bulk->bulk_len) + return ERR_PTR(-ENOSPC); + + clear_bit(free_fc_index, bulk->bitmask); + return &bulk->fcs[free_fc_index]; +} + +static int __attribute__((unused)) +mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc) +{ + int fc_index = fc->id - bulk->base_id; + + if (test_bit(fc_index, bulk->bitmask)) + return -EINVAL; + + set_bit(fc_index, bulk->bitmask); + return 0; +} From 558101f1b9807b34d8eeefb352d11e642b7e98dd Mon Sep 17 00:00:00 2001 From: Gavi Teitz Date: Thu, 27 Jun 2019 20:53:03 +0300 Subject: [PATCH 15/24] net/mlx5: Add flow counter pool Add a pool of flow counters, based on flow counter bulks, removing the need to allocate a new counter via a costly FW command during the flow creation process. The time it takes to acquire/release a flow counter is cut from ~50 [us] to ~50 [ns]. The pool is part of the mlx5 driver instance, and provides flow counters for aging flows. mlx5_fc_create() was modified to provide counters for aging flows from the pool by default, and mlx5_destroy_fc() was modified to release counters back to the pool for later reuse. If bulk allocation is not supported or fails, and for non-aging flows, the fallback behavior is to allocate and free individual counters. The pool is comprised of three lists of flow counter bulks, one of fully used bulks, one of partially used bulks, and one of unused bulks. Counters are provided from the partially used bulks first, to help limit bulk fragmentation. The pool maintains a threshold, and strives to maintain the amount of available counters below it. The pool is increased in size when a counter acquisition request is made and there are no available counters, and it is decreased in size when the last counter in a bulk is released and there are more available counters than the threshold. All pool size changes are done in the context of the acquiring/releasing process. The value of the threshold is directly correlated to the amount of used counters the pool is providing, while constrained by a hard maximum, and is recalculated every time a bulk is allocated/freed. This ensures that the pool only consumes large amounts of memory for available counters if the pool is being used heavily. When fully populated and at the hard maximum, the buffer of available counters consumes ~40 [MB]. Signed-off-by: Gavi Teitz Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/fs_counters.c | 231 ++++++++++++++++-- include/linux/mlx5/driver.h | 12 + 2 files changed, 218 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 3e734e62a6cd..51f1736c455d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -40,6 +40,8 @@ #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000) /* Max number of counters to query in bulk read is 32K */ #define MLX5_SW_MAX_COUNTERS_BULK BIT(15) +#define MLX5_FC_POOL_MAX_THRESHOLD BIT(18) +#define MLX5_FC_POOL_USED_BUFF_RATIO 10 struct mlx5_fc_cache { u64 packets; @@ -65,6 +67,11 @@ struct mlx5_fc { struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; }; +static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev); +static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool); +static struct mlx5_fc *mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool); +static void mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc); + /* locking scheme: * * It is the responsibility of the user to prevent concurrent calls or bad @@ -202,13 +209,22 @@ static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev, } } -static void mlx5_free_fc(struct mlx5_core_dev *dev, - struct mlx5_fc *counter) +static void mlx5_fc_free(struct mlx5_core_dev *dev, struct mlx5_fc *counter) { mlx5_cmd_fc_free(dev, counter->id); kfree(counter); } +static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + if (counter->bulk) + mlx5_fc_pool_release_counter(&fc_stats->fc_pool, counter); + else + mlx5_fc_free(dev, counter); +} + static void mlx5_fc_stats_work(struct work_struct *work) { struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, @@ -232,7 +248,7 @@ static void mlx5_fc_stats_work(struct work_struct *work) llist_for_each_entry_safe(counter, tmp, dellist, dellist) { mlx5_fc_stats_remove(dev, counter); - mlx5_free_fc(dev, counter); + mlx5_fc_release(dev, counter); } if (time_before(now, fc_stats->next_query) || @@ -248,26 +264,56 @@ static void mlx5_fc_stats_work(struct work_struct *work) fc_stats->next_query = now + fc_stats->sampling_interval; } -struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) +static struct mlx5_fc *mlx5_fc_single_alloc(struct mlx5_core_dev *dev) { - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; struct mlx5_fc *counter; int err; counter = kzalloc(sizeof(*counter), GFP_KERNEL); if (!counter) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&counter->list); err = mlx5_cmd_fc_alloc(dev, &counter->id); - if (err) - goto err_out; + if (err) { + kfree(counter); + return ERR_PTR(err); + } + + return counter; +} + +static struct mlx5_fc *mlx5_fc_acquire(struct mlx5_core_dev *dev, bool aging) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc *counter; + + if (aging && MLX5_CAP_GEN(dev, flow_counter_bulk_alloc) != 0) { + counter = mlx5_fc_pool_acquire_counter(&fc_stats->fc_pool); + if (!IS_ERR(counter)) + return counter; + } + + return mlx5_fc_single_alloc(dev); +} + +struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) +{ + struct mlx5_fc *counter = mlx5_fc_acquire(dev, aging); + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + int err; + + if (IS_ERR(counter)) + return counter; + + INIT_LIST_HEAD(&counter->list); + counter->aging = aging; if (aging) { u32 id = counter->id; counter->cache.lastuse = jiffies; - counter->aging = true; + counter->lastbytes = counter->cache.bytes; + counter->lastpackets = counter->cache.packets; idr_preload(GFP_KERNEL); spin_lock(&fc_stats->counters_idr_lock); @@ -288,10 +334,7 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) return counter; err_out_alloc: - mlx5_cmd_fc_free(dev, counter->id); -err_out: - kfree(counter); - + mlx5_fc_release(dev, counter); return ERR_PTR(err); } EXPORT_SYMBOL(mlx5_fc_create); @@ -315,7 +358,7 @@ void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter) return; } - mlx5_free_fc(dev, counter); + mlx5_fc_release(dev, counter); } EXPORT_SYMBOL(mlx5_fc_destroy); @@ -344,6 +387,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev) fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD; INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work); + mlx5_fc_pool_init(&fc_stats->fc_pool, dev); return 0; err_wq_create: @@ -358,6 +402,7 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) struct mlx5_fc *counter; struct mlx5_fc *tmp; + mlx5_fc_pool_cleanup(&fc_stats->fc_pool); cancel_delayed_work_sync(&dev->priv.fc_stats.work); destroy_workqueue(dev->priv.fc_stats.wq); dev->priv.fc_stats.wq = NULL; @@ -368,10 +413,10 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) tmplist = llist_del_all(&fc_stats->addlist); llist_for_each_entry_safe(counter, tmp, tmplist, addlist) - mlx5_free_fc(dev, counter); + mlx5_fc_release(dev, counter); list_for_each_entry_safe(counter, tmp, &fc_stats->counters, list) - mlx5_free_fc(dev, counter); + mlx5_fc_release(dev, counter); } int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter, @@ -417,14 +462,15 @@ void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, /* Flow counter bluks */ struct mlx5_fc_bulk { + struct list_head pool_list; u32 base_id; int bulk_len; unsigned long *bitmask; struct mlx5_fc fcs[0]; }; -static void -mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk, u32 id) +static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk, + u32 id) { counter->bulk = bulk; counter->id = id; @@ -435,8 +481,7 @@ static int mlx5_fc_bulk_get_free_fcs_amount(struct mlx5_fc_bulk *bulk) return bitmap_weight(bulk->bitmask, bulk->bulk_len); } -static struct mlx5_fc_bulk __attribute__((unused)) -*mlx5_fc_bulk_create(struct mlx5_core_dev *dev) +static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev) { enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask; struct mlx5_fc_bulk *bulk; @@ -479,7 +524,7 @@ static struct mlx5_fc_bulk __attribute__((unused)) return ERR_PTR(err); } -static int __attribute__((unused)) +static int mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk) { if (mlx5_fc_bulk_get_free_fcs_amount(bulk) < bulk->bulk_len) { @@ -494,8 +539,7 @@ mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk) return 0; } -static struct mlx5_fc __attribute__((unused)) -*mlx5_fc_bulk_acquire_fc(struct mlx5_fc_bulk *bulk) +static struct mlx5_fc *mlx5_fc_bulk_acquire_fc(struct mlx5_fc_bulk *bulk) { int free_fc_index = find_first_bit(bulk->bitmask, bulk->bulk_len); @@ -506,8 +550,7 @@ static struct mlx5_fc __attribute__((unused)) return &bulk->fcs[free_fc_index]; } -static int __attribute__((unused)) -mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc) +static int mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc) { int fc_index = fc->id - bulk->base_id; @@ -517,3 +560,141 @@ mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc) set_bit(fc_index, bulk->bitmask); return 0; } + +/* Flow counters pool API */ + +static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev) +{ + fc_pool->dev = dev; + mutex_init(&fc_pool->pool_lock); + INIT_LIST_HEAD(&fc_pool->fully_used); + INIT_LIST_HEAD(&fc_pool->partially_used); + INIT_LIST_HEAD(&fc_pool->unused); + fc_pool->available_fcs = 0; + fc_pool->used_fcs = 0; + fc_pool->threshold = 0; +} + +static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + struct mlx5_fc_bulk *bulk; + struct mlx5_fc_bulk *tmp; + + list_for_each_entry_safe(bulk, tmp, &fc_pool->fully_used, pool_list) + mlx5_fc_bulk_destroy(dev, bulk); + list_for_each_entry_safe(bulk, tmp, &fc_pool->partially_used, pool_list) + mlx5_fc_bulk_destroy(dev, bulk); + list_for_each_entry_safe(bulk, tmp, &fc_pool->unused, pool_list) + mlx5_fc_bulk_destroy(dev, bulk); +} + +static void mlx5_fc_pool_update_threshold(struct mlx5_fc_pool *fc_pool) +{ + fc_pool->threshold = min_t(int, MLX5_FC_POOL_MAX_THRESHOLD, + fc_pool->used_fcs / MLX5_FC_POOL_USED_BUFF_RATIO); +} + +static struct mlx5_fc_bulk * +mlx5_fc_pool_alloc_new_bulk(struct mlx5_fc_pool *fc_pool) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + struct mlx5_fc_bulk *new_bulk; + + new_bulk = mlx5_fc_bulk_create(dev); + if (!IS_ERR(new_bulk)) + fc_pool->available_fcs += new_bulk->bulk_len; + mlx5_fc_pool_update_threshold(fc_pool); + return new_bulk; +} + +static void +mlx5_fc_pool_free_bulk(struct mlx5_fc_pool *fc_pool, struct mlx5_fc_bulk *bulk) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + + fc_pool->available_fcs -= bulk->bulk_len; + mlx5_fc_bulk_destroy(dev, bulk); + mlx5_fc_pool_update_threshold(fc_pool); +} + +static struct mlx5_fc * +mlx5_fc_pool_acquire_from_list(struct list_head *src_list, + struct list_head *next_list, + bool move_non_full_bulk) +{ + struct mlx5_fc_bulk *bulk; + struct mlx5_fc *fc; + + if (list_empty(src_list)) + return ERR_PTR(-ENODATA); + + bulk = list_first_entry(src_list, struct mlx5_fc_bulk, pool_list); + fc = mlx5_fc_bulk_acquire_fc(bulk); + if (move_non_full_bulk || mlx5_fc_bulk_get_free_fcs_amount(bulk) == 0) + list_move(&bulk->pool_list, next_list); + return fc; +} + +static struct mlx5_fc * +mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool) +{ + struct mlx5_fc_bulk *new_bulk; + struct mlx5_fc *fc; + + mutex_lock(&fc_pool->pool_lock); + + fc = mlx5_fc_pool_acquire_from_list(&fc_pool->partially_used, + &fc_pool->fully_used, false); + if (IS_ERR(fc)) + fc = mlx5_fc_pool_acquire_from_list(&fc_pool->unused, + &fc_pool->partially_used, + true); + if (IS_ERR(fc)) { + new_bulk = mlx5_fc_pool_alloc_new_bulk(fc_pool); + if (IS_ERR(new_bulk)) { + fc = ERR_CAST(new_bulk); + goto out; + } + fc = mlx5_fc_bulk_acquire_fc(new_bulk); + list_add(&new_bulk->pool_list, &fc_pool->partially_used); + } + fc_pool->available_fcs--; + fc_pool->used_fcs++; + +out: + mutex_unlock(&fc_pool->pool_lock); + return fc; +} + +static void +mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + struct mlx5_fc_bulk *bulk = fc->bulk; + int bulk_free_fcs_amount; + + mutex_lock(&fc_pool->pool_lock); + + if (mlx5_fc_bulk_release_fc(bulk, fc)) { + mlx5_core_warn(dev, "Attempted to release a counter which is not acquired\n"); + goto unlock; + } + + fc_pool->available_fcs++; + fc_pool->used_fcs--; + + bulk_free_fcs_amount = mlx5_fc_bulk_get_free_fcs_amount(bulk); + if (bulk_free_fcs_amount == 1) + list_move_tail(&bulk->pool_list, &fc_pool->partially_used); + if (bulk_free_fcs_amount == bulk->bulk_len) { + list_del(&bulk->pool_list); + if (fc_pool->available_fcs > fc_pool->threshold) + mlx5_fc_pool_free_bulk(fc_pool, bulk); + else + list_add(&bulk->pool_list, &fc_pool->unused); + } + +unlock: + mutex_unlock(&fc_pool->pool_lock); +} diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 267b2bc0ca4a..d8f348ef9c33 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -477,6 +477,17 @@ struct mlx5_core_sriov { u16 max_vfs; }; +struct mlx5_fc_pool { + struct mlx5_core_dev *dev; + struct mutex pool_lock; /* protects pool lists */ + struct list_head fully_used; + struct list_head partially_used; + struct list_head unused; + int available_fcs; + int used_fcs; + int threshold; +}; + struct mlx5_fc_stats { spinlock_t counters_idr_lock; /* protects counters_idr */ struct idr counters_idr; @@ -489,6 +500,7 @@ struct mlx5_fc_stats { unsigned long next_query; unsigned long sampling_interval; /* jiffies */ u32 *bulk_query_out; + struct mlx5_fc_pool fc_pool; }; struct mlx5_events; From 68865419ba1bf502a5bd279a500deda64000249d Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 11 Jul 2019 11:20:22 +0300 Subject: [PATCH 16/24] net/mlx5e: Tx, Strict the room needed for SQ edge NOPs We use NOPs to populate the WQ fragment edge if the WQE does not fit in frag, to avoid WQEs crossing a page boundary (or wrap-around the WQ). The upper bound on the needed number of NOPs is one WQEBB less than the largest possible WQE, for otherwise the WQE would certainly fit. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index ddfe19adb3d9..7da22b413a48 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -6,7 +6,7 @@ #include "en.h" -#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS +#define MLX5E_SQ_NOPS_ROOM (MLX5_SEND_WQE_MAX_WQEBBS - 1) #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ MLX5E_SQ_NOPS_ROOM) From 6c085a8aab5183d8658c9a692bcfda3e24195b7a Mon Sep 17 00:00:00 2001 From: Shay Agroskin Date: Sun, 12 May 2019 18:28:27 +0300 Subject: [PATCH 17/24] net/mlx5e: XDP, Close TX MPWQE session when no room for inline packet left In MPWQE mode, when transmitting packets with XDP, a packet that is smaller than a certain size (set to 256 bytes) would be sent inline within its WQE TX descriptor (mem-copied), in case the hardware tx queue is congested beyond a pre-defined water-mark. If a MPWQE cannot contain an additional inline packet, we close this MPWQE session, and send the packet inlined within the next MPWQE. To save some MPWQE session close+open operations, we don't open MPWQE sessions that are contiguously smaller than certain size (set to the HW MPWQE maximum size). If there isn't enough contiguous room in the send queue, we fill it with NOPs and wrap the send queue index around. This way, qualified packets are always sent inline. Perf tests: Tested packet rate for UDP 64Byte multi-stream over two dual port ConnectX-5 100Gbps NICs. CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz XDP_TX: With 24 channels: | ------ | bounced packets | inlined packets | inline ratio | | before | 113.6Mpps | 96.3Mpps | 84% | | after | 115Mpps | 99.5Mpps | 86% | With one channel: | ------ | bounced packets | inlined packets | inline ratio | | before | 6.7Mpps | 0pps | 0% | | after | 6.8Mpps | 0pps | 0% | As we can see, there is improvement in both inline ratio and overall packet rate for 24 channels. Also, we see no degradation for the one-channel case. Signed-off-by: Shay Agroskin Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 - .../net/ethernet/mellanox/mlx5/core/en/xdp.c | 32 ++++------- .../net/ethernet/mellanox/mlx5/core/en/xdp.h | 53 +++++++++++++++---- .../ethernet/mellanox/mlx5/core/en_stats.c | 6 +++ .../ethernet/mellanox/mlx5/core/en_stats.h | 3 ++ 5 files changed, 63 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 79d93d6c7d7a..745bcc25c6f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -483,8 +483,6 @@ struct mlx5e_xdp_mpwqe { struct mlx5e_tx_wqe *wqe; u8 ds_count; u8 pkt_count; - u8 max_ds_count; - u8 complete; u8 inline_on; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index b0b982cf69bb..8cb98326531f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -179,34 +179,22 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; struct mlx5e_xdpsq_stats *stats = sq->stats; struct mlx5_wq_cyc *wq = &sq->wq; - u8 wqebbs; - u16 pi; + u16 pi, contig_wqebbs; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + + if (unlikely(contig_wqebbs < MLX5_SEND_WQE_MAX_WQEBBS)) + mlx5e_fill_xdpsq_frag_edge(sq, wq, pi, contig_wqebbs); mlx5e_xdpsq_fetch_wqe(sq, &session->wqe); prefetchw(session->wqe->data); session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; session->pkt_count = 0; - session->complete = 0; pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); -/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS - * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. - * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a - * full-session WQE be cache-aligned. - */ -#if L1_CACHE_BYTES < 128 -#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) -#else -#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) -#endif - - wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi), - MLX5E_XDP_MPW_MAX_WQEBBS); - - session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs; - mlx5e_xdp_update_inline_state(sq); stats->mpwqe++; @@ -244,7 +232,7 @@ static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) { if (unlikely(!sq->mpwqe.wqe)) { if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, - MLX5_SEND_WQE_MAX_WQEBBS))) { + MLX5E_XDPSQ_STOP_ROOM))) { /* SQ is full, ring doorbell */ mlx5e_xmit_xdp_doorbell(sq); sq->stats->full++; @@ -285,8 +273,8 @@ static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); - if (unlikely(session->complete || - session->ds_count == session->max_ds_count)) + if (unlikely(mlx5e_xdp_no_room_for_inline_pkt(session) || + session->ds_count == MLX5E_XDP_MPW_MAX_NUM_DS)) mlx5e_xdp_mpwqe_complete(sq); mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index b90923932668..e0ed7710f5f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -40,6 +40,26 @@ (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */) +#define MLX5E_XDPSQ_STOP_ROOM (MLX5E_SQ_STOP_ROOM) + +#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg)) +#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \ + DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS) + +/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS + * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. + * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a + * full-session WQE be cache-aligned. + */ +#if L1_CACHE_BYTES < 128 +#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) +#else +#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) +#endif + +#define MLX5E_XDP_MPW_MAX_NUM_DS \ + (MLX5E_XDP_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS) + struct mlx5e_xsk_param; int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, @@ -114,6 +134,30 @@ static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq) session->inline_on = 1; } +static inline bool +mlx5e_xdp_no_room_for_inline_pkt(struct mlx5e_xdp_mpwqe *session) +{ + return session->inline_on && + session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > MLX5E_XDP_MPW_MAX_NUM_DS; +} + +static inline void +mlx5e_fill_xdpsq_frag_edge(struct mlx5e_xdpsq *sq, struct mlx5_wq_cyc *wq, + u16 pi, u16 nnops) +{ + struct mlx5e_xdp_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi]; + + edge_wi = wi + nnops; + /* fill sq frag edge with nops to avoid wqe wrapping two pages */ + for (; wi < edge_wi; wi++) { + wi->num_wqebbs = 1; + wi->num_pkts = 0; + mlx5e_post_nop(wq, sq->sqn, &sq->pc); + } + + sq->stats->nops += nnops; +} + static inline void mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd, @@ -126,20 +170,12 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, session->pkt_count++; -#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg)) - if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) { struct mlx5_wqe_inline_seg *inline_dseg = (struct mlx5_wqe_inline_seg *)dseg; u16 ds_len = sizeof(*inline_dseg) + dma_len; u16 ds_cnt = DIV_ROUND_UP(ds_len, MLX5_SEND_WQE_DS); - if (unlikely(session->ds_count + ds_cnt > session->max_ds_count)) { - /* Not enough space for inline wqe, send with memory pointer */ - session->complete = true; - goto no_inline; - } - inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG); memcpy(inline_dseg->data, xdptxd->data, dma_len); @@ -148,7 +184,6 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, return; } -no_inline: dseg->addr = cpu_to_be64(xdptxd->dma_addr); dseg->byte_count = cpu_to_be32(dma_len); dseg->lkey = sq->mkey_be; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 539b4d3656da..6eee3c7d4b06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -74,6 +74,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_xmit) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_mpwqe) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_inlnw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_nops) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_cqe) }, @@ -90,6 +91,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_xmit) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_mpwqe) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_inlnw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_nops) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_cqes) }, @@ -200,6 +202,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->rx_xdp_tx_xmit += xdpsq_stats->xmit; s->rx_xdp_tx_mpwqe += xdpsq_stats->mpwqe; s->rx_xdp_tx_inlnw += xdpsq_stats->inlnw; + s->rx_xdp_tx_nops += xdpsq_stats->nops; s->rx_xdp_tx_full += xdpsq_stats->full; s->rx_xdp_tx_err += xdpsq_stats->err; s->rx_xdp_tx_cqe += xdpsq_stats->cqes; @@ -227,6 +230,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_xdp_xmit += xdpsq_red_stats->xmit; s->tx_xdp_mpwqe += xdpsq_red_stats->mpwqe; s->tx_xdp_inlnw += xdpsq_red_stats->inlnw; + s->tx_xdp_nops += xdpsq_red_stats->nops; s->tx_xdp_full += xdpsq_red_stats->full; s->tx_xdp_err += xdpsq_red_stats->err; s->tx_xdp_cqes += xdpsq_red_stats->cqes; @@ -1331,6 +1335,7 @@ static const struct counter_desc rq_xdpsq_stats_desc[] = { { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, nops) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, @@ -1340,6 +1345,7 @@ static const struct counter_desc xdpsq_stats_desc[] = { { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, nops) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 76ac111e14d0..bf645d42c833 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -81,6 +81,7 @@ struct mlx5e_sw_stats { u64 rx_xdp_tx_xmit; u64 rx_xdp_tx_mpwqe; u64 rx_xdp_tx_inlnw; + u64 rx_xdp_tx_nops; u64 rx_xdp_tx_full; u64 rx_xdp_tx_err; u64 rx_xdp_tx_cqe; @@ -97,6 +98,7 @@ struct mlx5e_sw_stats { u64 tx_xdp_xmit; u64 tx_xdp_mpwqe; u64 tx_xdp_inlnw; + u64 tx_xdp_nops; u64 tx_xdp_full; u64 tx_xdp_err; u64 tx_xdp_cqes; @@ -288,6 +290,7 @@ struct mlx5e_xdpsq_stats { u64 xmit; u64 mpwqe; u64 inlnw; + u64 nops; u64 full; u64 err; /* dirtied @completion */ From 7cf6f811b72aced0c48e1065fe059d604ef6363d Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 14 Jul 2019 17:50:51 +0300 Subject: [PATCH 18/24] net/mlx5e: XDP, Slight enhancement for WQE fetch function Instead of passing an output param, let function return the WQE pointer. In addition, pass &pi so it gets its value in the function, and save the redundant assignment that comes after it. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 4 +--- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h | 13 ++++++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 8cb98326531f..1ed5c33e022f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -187,14 +187,12 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) if (unlikely(contig_wqebbs < MLX5_SEND_WQE_MAX_WQEBBS)) mlx5e_fill_xdpsq_frag_edge(sq, wq, pi, contig_wqebbs); - mlx5e_xdpsq_fetch_wqe(sq, &session->wqe); + session->wqe = mlx5e_xdpsq_fetch_wqe(sq, &pi); prefetchw(session->wqe->data); session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; session->pkt_count = 0; - pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - mlx5e_xdp_update_inline_state(sq); stats->mpwqe++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index e0ed7710f5f1..36ac1e3816b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -190,14 +190,17 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, session->ds_count++; } -static inline void mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq, - struct mlx5e_tx_wqe **wqe) +static inline struct mlx5e_tx_wqe * +mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq, u16 *pi) { struct mlx5_wq_cyc *wq = &sq->wq; - u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + struct mlx5e_tx_wqe *wqe; - *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - memset(*wqe, 0, sizeof(**wqe)); + *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + wqe = mlx5_wq_cyc_get_wqe(wq, *pi); + memset(wqe, 0, sizeof(*wqe)); + + return wqe; } static inline void From b431302e92f00b7acd5617a4d289f8006394bfc2 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 1 Jul 2019 12:08:08 +0300 Subject: [PATCH 19/24] net/mlx5e: Tx, Soften inline mode VLAN dependencies If capable, use zero inline mode in TX WQE for non-VLAN packets. For VLAN ones, keep the enforcement of at least L2 inline mode, unless the WQE VLAN insertion offload cap is on. Performance: Tested single core packet rate of 64Bytes. NIC: ConnectX-5 CPU: Intel(R) Xeon(R) Gold 6154 CPU @ 3.00GHz pktgen: Before: 12.46 Mpps After: 14.65 Mpps (+17.5%) XDP_TX: The MPWQE flow is not affected, as it already has this optimization. So we test with priv-flag xdp_tx_mpwqe: off. Before: 9.90 Mpps After: 10.20 Mpps (+3%) Signed-off-by: Tariq Toukan Tested-by: Noam Stolero Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- .../net/ethernet/mellanox/mlx5/core/en/txrx.h | 22 +++++++++++++++++-- .../ethernet/mellanox/mlx5/core/en_common.c | 12 ---------- .../ethernet/mellanox/mlx5/core/en_dcbnl.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 4 +++- .../net/ethernet/mellanox/mlx5/core/en_tx.c | 7 +++--- .../net/ethernet/mellanox/mlx5/core/vport.c | 7 +++--- 7 files changed, 33 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 745bcc25c6f8..30f13f81c965 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -359,6 +359,7 @@ enum { MLX5E_SQ_STATE_IPSEC, MLX5E_SQ_STATE_AM, MLX5E_SQ_STATE_TLS, + MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, }; struct mlx5e_sq_wqe_info { @@ -1132,7 +1133,6 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, u16 num_channels); -u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev); void mlx5e_rx_dim_work(struct work_struct *work); void mlx5e_tx_dim_work(struct work_struct *work); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 7da22b413a48..87be96747902 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -117,9 +117,27 @@ mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map, mlx5_write64((__be32 *)ctrl, uar_map); } -static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5e_tx_wqe *wqe) +static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg) { - return !!wqe->ctrl.tisn; + return cseg && !!cseg->tisn; +} + +static inline u8 +mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg, + struct sk_buff *skb) +{ + u8 mode; + + if (mlx5e_transport_inline_tx_wqe(cseg)) + return MLX5_INLINE_MODE_TCP_UDP; + + mode = sq->min_inline_mode; + + if (skb_vlan_tag_present(skb) && + test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state)) + mode = max_t(u8, MLX5_INLINE_MODE_L2, mode); + + return mode; } static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 1539cf3de5dc..f7890e0ce96c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -180,15 +180,3 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) return err; } - -u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev) -{ - u8 min_inline_mode; - - mlx5_query_min_inline(mdev, &min_inline_mode); - if (min_inline_mode == MLX5_INLINE_MODE_NONE && - !MLX5_CAP_ETH(mdev, wqe_vlan_insert)) - min_inline_mode = MLX5_INLINE_MODE_L2; - - return min_inline_mode; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 8dd31b5c740c..01f2918063af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1101,7 +1101,7 @@ void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) static void mlx5e_trust_update_tx_min_inline_mode(struct mlx5e_priv *priv, struct mlx5e_params *params) { - params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(priv->mdev); + mlx5_query_min_inline(priv->mdev, ¶ms->tx_min_inline_mode); if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP && params->tx_min_inline_mode == MLX5_INLINE_MODE_L2) params->tx_min_inline_mode = MLX5_INLINE_MODE_IP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b2618dd6dd10..e75cb18c2256 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1131,6 +1131,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; sq->stop_room = MLX5E_SQ_STOP_ROOM; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); + if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) + set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); if (mlx5_accel_is_tls_device(c->priv->mdev)) { @@ -4777,7 +4779,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); /* TX inline */ - params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev); + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); /* RSS */ mlx5e_build_rss_params(rss_params, params->num_channels); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index acf25cc38fa1..d3a67a9b4eba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -292,8 +292,7 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; stats->packets += skb_shinfo(skb)->gso_segs; } else { - u8 mode = mlx5e_transport_inline_tx_wqe(wqe) ? - MLX5_INLINE_MODE_TCP_UDP : sq->min_inline_mode; + u8 mode = mlx5e_tx_wqe_inline_mode(sq, &wqe->ctrl, skb); opcode = MLX5_OPCODE_SEND; mss = 0; @@ -608,9 +607,11 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; stats->packets += skb_shinfo(skb)->gso_segs; } else { + u8 mode = mlx5e_tx_wqe_inline_mode(sq, NULL, skb); + opcode = MLX5_OPCODE_SEND; mss = 0; - ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); + ihs = mlx5e_calc_min_inline(mode, skb); num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); stats->packets++; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index c912d82ca64b..30f7848a6f88 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -122,12 +122,13 @@ void mlx5_query_min_inline(struct mlx5_core_dev *mdev, u8 *min_inline_mode) { switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + if (!mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode)) + break; + /* fall through */ case MLX5_CAP_INLINE_MODE_L2: *min_inline_mode = MLX5_INLINE_MODE_L2; break; - case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: - mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode); - break; case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: *min_inline_mode = MLX5_INLINE_MODE_NONE; break; From 8c7698d5caa7852bebae0cf7402b7d3a1f30423b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 3 May 2019 15:12:46 -0700 Subject: [PATCH 20/24] net/mlx5e: Rx, checksum handling refactoring Move vlan checksum fixup flow into mlx5e_skb_padding_csum(), which is supposed to fixup SKB checksum if needed. And rename mlx5e_skb_padding_csum() to mlx5e_skb_csum_fixup(). Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index ac6e586d403d..60570b442fff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -859,13 +859,24 @@ tail_padding_csum(struct sk_buff *skb, int offset, } static void -mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto, - struct mlx5e_rq_stats *stats) +mlx5e_skb_csum_fixup(struct sk_buff *skb, int network_depth, __be16 proto, + struct mlx5e_rq_stats *stats) { struct ipv6hdr *ip6; struct iphdr *ip4; int pkt_len; + /* Fixup vlan headers, if any */ + if (network_depth > ETH_HLEN) + /* CQE csum is calculated from the IP header and does + * not cover VLAN headers (if present). This will add + * the checksum manually. + */ + skb->csum = csum_partial(skb->data + ETH_HLEN, + network_depth - ETH_HLEN, + skb->csum); + + /* Fixup tail padding, if any */ switch (proto) { case htons(ETH_P_IP): ip4 = (struct iphdr *)(skb->data + network_depth); @@ -931,16 +942,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, return; /* CQE csum covers all received bytes */ /* csum might need some fixups ...*/ - if (network_depth > ETH_HLEN) - /* CQE csum is calculated from the IP header and does - * not cover VLAN headers (if present). This will add - * the checksum manually. - */ - skb->csum = csum_partial(skb->data + ETH_HLEN, - network_depth - ETH_HLEN, - skb->csum); - - mlx5e_skb_padding_csum(skb, network_depth, proto, stats); + mlx5e_skb_csum_fixup(skb, network_depth, proto, stats); return; } From 7f7cc235c2dfc2a9208a743492b80d6cdfee50a6 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 3 Jul 2019 09:16:52 +0300 Subject: [PATCH 21/24] net/mlx5e: Fix mlx5e_tx_reporter_create return value Return error when failing to create a reporter in devlink. Since NET_DEVLINK mandatory to MLX5_CORE in Kconfig, returned pointer can't be NULL and can only hold an error in bad path. Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Acked-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index f3d98748b211..383ecfd85d8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -296,11 +296,13 @@ int mlx5e_tx_reporter_create(struct mlx5e_priv *priv) devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops, MLX5_REPORTER_TX_GRACEFUL_PERIOD, true, priv); - if (IS_ERR(priv->tx_reporter)) + if (IS_ERR(priv->tx_reporter)) { netdev_warn(priv->netdev, "Failed to create tx reporter, err = %ld\n", PTR_ERR(priv->tx_reporter)); - return IS_ERR_OR_NULL(priv->tx_reporter); + return PTR_ERR(priv->tx_reporter); + } + return 0; } void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv) From baf6dfdb10e9695637d72429159fd26fc36d30c3 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 24 Jun 2019 19:34:42 +0300 Subject: [PATCH 22/24] net/mlx5e: Set tx reporter only on successful creation When failing to create tx reporter, don't set the reporter's pointer. Creating a reporter is not mandatory for driver load, avoid garbage/error pointer. Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Acked-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/reporter_tx.c | 14 ++++++++------ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 383ecfd85d8a..f1c652f75718 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -117,7 +117,7 @@ static int mlx5_tx_health_report(struct devlink_health_reporter *tx_reporter, char *err_str, struct mlx5e_tx_err_ctx *err_ctx) { - if (IS_ERR_OR_NULL(tx_reporter)) { + if (!tx_reporter) { netdev_err(err_ctx->sq->channel->netdev, err_str); return err_ctx->recover(err_ctx->sq); } @@ -289,25 +289,27 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { int mlx5e_tx_reporter_create(struct mlx5e_priv *priv) { + struct devlink_health_reporter *reporter; struct mlx5_core_dev *mdev = priv->mdev; struct devlink *devlink = priv_to_devlink(mdev); - priv->tx_reporter = + reporter = devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops, MLX5_REPORTER_TX_GRACEFUL_PERIOD, true, priv); - if (IS_ERR(priv->tx_reporter)) { + if (IS_ERR(reporter)) { netdev_warn(priv->netdev, "Failed to create tx reporter, err = %ld\n", - PTR_ERR(priv->tx_reporter)); - return PTR_ERR(priv->tx_reporter); + PTR_ERR(reporter)); + return PTR_ERR(reporter); } + priv->tx_reporter = reporter; return 0; } void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv) { - if (IS_ERR_OR_NULL(priv->tx_reporter)) + if (!priv->tx_reporter) return; devlink_health_reporter_destroy(priv->tx_reporter); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index e75cb18c2256..4db595a7eb03 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2325,7 +2325,7 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, goto err_close_channels; } - if (!IS_ERR_OR_NULL(priv->tx_reporter)) + if (priv->tx_reporter) devlink_health_reporter_state_update(priv->tx_reporter, DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); From c9e6c7209a9a26a0281b311c6880b9e2382ad635 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 24 Jun 2019 20:33:52 +0300 Subject: [PATCH 23/24] net/mlx5e: TX reporter cleanup Remove redundant include files. Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Acked-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h index e78e92753d73..ed7a3881d2c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h @@ -4,7 +4,6 @@ #ifndef __MLX5E_EN_REPORTER_H #define __MLX5E_EN_REPORTER_H -#include #include "en.h" int mlx5e_tx_reporter_create(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index f1c652f75718..6e54fefea410 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2019 Mellanox Technologies. */ -#include #include "reporter.h" #include "lib/eq.h" From 6830b468259b45e3b73070474b8cec9388aa8c11 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Thu, 1 Aug 2019 16:40:59 +0800 Subject: [PATCH 24/24] net/mlx5e: Allow dropping specific tunnel packets In some case, we don't want to allow specific tunnel packets to host that can avoid to take up high CPU (e.g network attacks). But other tunnel packets which not matched in hardware will be sent to host too. $ tc filter add dev vxlan_sys_4789 \ protocol ip chain 0 parent ffff: prio 1 handle 1 \ flower dst_ip 1.1.1.100 ip_proto tcp dst_port 80 \ enc_dst_ip 2.2.2.100 enc_key_id 100 enc_dst_port 4789 \ action tunnel_key unset pipe action drop Signed-off-by: Tonghao Zhang Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index dc5fc3350b65..c5d75e2ecf54 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2485,7 +2485,8 @@ static bool actions_match_supported(struct mlx5e_priv *priv, if (flow_flag_test(flow, EGRESS) && !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) || - (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP))) + (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || + (actions & MLX5_FLOW_CONTEXT_ACTION_DROP))) return false; if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)