mirror of
https://github.com/torvalds/linux.git
synced 2026-05-25 23:52:08 +02:00
net/mlx5: Propagate LAG effective max_tx_speed to vports
Currently, vports report only their parent's uplink speed, which in LAG setups does not reflect the true aggregated bandwidth. This makes it hard for upper-layer software to optimize load balancing decisions based on accurate bandwidth information. Fix the issue by calculating the possible maximum speed of a LAG as the sum of speeds of all active uplinks that are part of the LAG. Propagate this effective max speed to vports associated with the LAG whenever a relevant event occurs, such as physical port link state changes or LAG creation/modification. With this change, upper-layer components receive accurate bandwidth information corresponding to the active members of the LAG and can make better load balancing decisions. Signed-off-by: Or Har-Toov <ohartoov@nvidia.com> Reviewed-by: Maher Sanalla <msanalla@nvidia.com> Reviewed-by: Mark Bloch <mbloch@nvidia.com> Signed-off-by: Edward Srouji <edwards@nvidia.com> Signed-off-by: Leon Romanovsky <leon@kernel.org>
This commit is contained in:
parent
3df5dd46fc
commit
50f1d188c5
|
|
@ -996,6 +996,126 @@ static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
|
|||
ldev->mode != MLX5_LAG_MODE_MPESW;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MLX5_ESWITCH
|
||||
static int
|
||||
mlx5_lag_sum_devices_speed(struct mlx5_lag *ldev, u32 *sum_speed,
|
||||
int (*get_speed)(struct mlx5_core_dev *, u32 *))
|
||||
{
|
||||
struct mlx5_core_dev *pf_mdev;
|
||||
int pf_idx;
|
||||
u32 speed;
|
||||
int ret;
|
||||
|
||||
*sum_speed = 0;
|
||||
mlx5_ldev_for_each(pf_idx, 0, ldev) {
|
||||
pf_mdev = ldev->pf[pf_idx].dev;
|
||||
if (!pf_mdev)
|
||||
continue;
|
||||
|
||||
ret = get_speed(pf_mdev, &speed);
|
||||
if (ret) {
|
||||
mlx5_core_dbg(pf_mdev,
|
||||
"Failed to get device speed using %ps. Device %s speed is not available (err=%d)\n",
|
||||
get_speed, dev_name(pf_mdev->device),
|
||||
ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
*sum_speed += speed;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mlx5_lag_sum_devices_max_speed(struct mlx5_lag *ldev, u32 *max_speed)
|
||||
{
|
||||
return mlx5_lag_sum_devices_speed(ldev, max_speed,
|
||||
mlx5_port_max_linkspeed);
|
||||
}
|
||||
|
||||
static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev,
|
||||
u32 speed)
|
||||
{
|
||||
u16 op_mod = MLX5_VPORT_STATE_OP_MOD_ESW_VPORT;
|
||||
struct mlx5_eswitch *esw = mdev->priv.eswitch;
|
||||
struct mlx5_vport *vport;
|
||||
unsigned long i;
|
||||
int ret;
|
||||
|
||||
if (!esw)
|
||||
return;
|
||||
|
||||
if (!MLX5_CAP_ESW(mdev, esw_vport_state_max_tx_speed))
|
||||
return;
|
||||
|
||||
mlx5_esw_for_each_vport(esw, i, vport) {
|
||||
if (!vport)
|
||||
continue;
|
||||
|
||||
if (vport->vport == MLX5_VPORT_UPLINK)
|
||||
continue;
|
||||
|
||||
ret = mlx5_modify_vport_max_tx_speed(mdev, op_mod,
|
||||
vport->vport, true, speed);
|
||||
if (ret)
|
||||
mlx5_core_dbg(mdev,
|
||||
"Failed to set vport %d speed %d, err=%d\n",
|
||||
vport->vport, speed, ret);
|
||||
}
|
||||
}
|
||||
|
||||
void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev)
|
||||
{
|
||||
struct mlx5_core_dev *mdev;
|
||||
u32 speed;
|
||||
int pf_idx;
|
||||
|
||||
speed = ldev->tracker.bond_speed_mbps;
|
||||
|
||||
if (speed == SPEED_UNKNOWN)
|
||||
return;
|
||||
|
||||
/* If speed is not set, use the sum of max speeds of all PFs */
|
||||
if (!speed && mlx5_lag_sum_devices_max_speed(ldev, &speed))
|
||||
return;
|
||||
|
||||
speed = speed / MLX5_MAX_TX_SPEED_UNIT;
|
||||
|
||||
mlx5_ldev_for_each(pf_idx, 0, ldev) {
|
||||
mdev = ldev->pf[pf_idx].dev;
|
||||
if (!mdev)
|
||||
continue;
|
||||
|
||||
mlx5_lag_modify_device_vports_speed(mdev, speed);
|
||||
}
|
||||
}
|
||||
|
||||
void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev)
|
||||
{
|
||||
struct mlx5_core_dev *mdev;
|
||||
u32 speed;
|
||||
int pf_idx;
|
||||
int ret;
|
||||
|
||||
mlx5_ldev_for_each(pf_idx, 0, ldev) {
|
||||
mdev = ldev->pf[pf_idx].dev;
|
||||
if (!mdev)
|
||||
continue;
|
||||
|
||||
ret = mlx5_port_oper_linkspeed(mdev, &speed);
|
||||
if (ret) {
|
||||
mlx5_core_dbg(mdev,
|
||||
"Failed to reset vports speed for device %s. Oper speed is not available (err=%d)\n",
|
||||
dev_name(mdev->device), ret);
|
||||
continue;
|
||||
}
|
||||
|
||||
speed = speed / MLX5_MAX_TX_SPEED_UNIT;
|
||||
mlx5_lag_modify_device_vports_speed(mdev, speed);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void mlx5_do_bond(struct mlx5_lag *ldev)
|
||||
{
|
||||
int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
|
||||
|
|
@ -1083,9 +1203,12 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
|
|||
ndev);
|
||||
dev_put(ndev);
|
||||
}
|
||||
mlx5_lag_set_vports_agg_speed(ldev);
|
||||
} else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
|
||||
mlx5_modify_lag(ldev, &tracker);
|
||||
mlx5_lag_set_vports_agg_speed(ldev);
|
||||
} else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
|
||||
mlx5_lag_reset_vports_speed(ldev);
|
||||
mlx5_disable_lag(ldev);
|
||||
}
|
||||
}
|
||||
|
|
@ -1286,6 +1409,38 @@ static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
|
|||
return 1;
|
||||
}
|
||||
|
||||
static void mlx5_lag_update_tracker_speed(struct lag_tracker *tracker,
|
||||
struct net_device *ndev)
|
||||
{
|
||||
struct ethtool_link_ksettings lksettings;
|
||||
struct net_device *bond_dev;
|
||||
int err;
|
||||
|
||||
if (netif_is_lag_master(ndev))
|
||||
bond_dev = ndev;
|
||||
else
|
||||
bond_dev = netdev_master_upper_dev_get(ndev);
|
||||
|
||||
if (!bond_dev) {
|
||||
tracker->bond_speed_mbps = SPEED_UNKNOWN;
|
||||
return;
|
||||
}
|
||||
|
||||
err = __ethtool_get_link_ksettings(bond_dev, &lksettings);
|
||||
if (err) {
|
||||
netdev_dbg(bond_dev,
|
||||
"Failed to get speed for bond dev %s, err=%d\n",
|
||||
bond_dev->name, err);
|
||||
tracker->bond_speed_mbps = SPEED_UNKNOWN;
|
||||
return;
|
||||
}
|
||||
|
||||
if (lksettings.base.speed == SPEED_UNKNOWN)
|
||||
tracker->bond_speed_mbps = 0;
|
||||
else
|
||||
tracker->bond_speed_mbps = lksettings.base.speed;
|
||||
}
|
||||
|
||||
/* this handler is always registered to netdev events */
|
||||
static int mlx5_lag_netdev_event(struct notifier_block *this,
|
||||
unsigned long event, void *ptr)
|
||||
|
|
@ -1317,6 +1472,9 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
|
|||
break;
|
||||
}
|
||||
|
||||
if (changed)
|
||||
mlx5_lag_update_tracker_speed(&tracker, ndev);
|
||||
|
||||
ldev->tracker = tracker;
|
||||
|
||||
if (changed)
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ struct lag_tracker {
|
|||
unsigned int is_bonded:1;
|
||||
unsigned int has_inactive:1;
|
||||
enum netdev_lag_hash hash_type;
|
||||
u32 bond_speed_mbps;
|
||||
};
|
||||
|
||||
/* LAG data of a ConnectX card.
|
||||
|
|
@ -116,6 +117,14 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev);
|
|||
void mlx5_lag_add_devices(struct mlx5_lag *ldev);
|
||||
struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev);
|
||||
|
||||
#ifdef CONFIG_MLX5_ESWITCH
|
||||
void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev);
|
||||
void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev);
|
||||
#else
|
||||
static inline void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev) {}
|
||||
static inline void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev) {}
|
||||
#endif
|
||||
|
||||
static inline bool mlx5_lag_is_supported(struct mlx5_core_dev *dev)
|
||||
{
|
||||
if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
|
||||
|
|
|
|||
|
|
@ -381,6 +381,7 @@ const struct mlx5_link_info *mlx5_port_ptys2info(struct mlx5_core_dev *mdev,
|
|||
u32 mlx5_port_info2linkmodes(struct mlx5_core_dev *mdev,
|
||||
struct mlx5_link_info *info,
|
||||
bool force_legacy);
|
||||
int mlx5_port_oper_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
|
||||
int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
|
||||
|
||||
#define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) && \
|
||||
|
|
|
|||
|
|
@ -1200,6 +1200,30 @@ u32 mlx5_port_info2linkmodes(struct mlx5_core_dev *mdev,
|
|||
return link_modes;
|
||||
}
|
||||
|
||||
int mlx5_port_oper_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
|
||||
{
|
||||
const struct mlx5_link_info *table;
|
||||
struct mlx5_port_eth_proto eproto;
|
||||
u32 oper_speed = 0;
|
||||
u32 max_size;
|
||||
bool ext;
|
||||
int err;
|
||||
int i;
|
||||
|
||||
ext = mlx5_ptys_ext_supported(mdev);
|
||||
err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
mlx5e_port_get_link_mode_info_arr(mdev, &table, &max_size, false);
|
||||
for (i = 0; i < max_size; ++i)
|
||||
if (eproto.oper & MLX5E_PROT_MASK(i))
|
||||
oper_speed = max(oper_speed, table[i].speed);
|
||||
|
||||
*speed = oper_speed;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
|
||||
{
|
||||
const struct mlx5_link_info *table;
|
||||
|
|
|
|||
|
|
@ -62,6 +62,28 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
|
|||
return MLX5_GET(query_vport_state_out, out, state);
|
||||
}
|
||||
|
||||
static int mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
|
||||
u16 vport, u8 other_vport,
|
||||
u8 *admin_state)
|
||||
{
|
||||
u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
|
||||
u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
|
||||
int err;
|
||||
|
||||
MLX5_SET(query_vport_state_in, in, opcode,
|
||||
MLX5_CMD_OP_QUERY_VPORT_STATE);
|
||||
MLX5_SET(query_vport_state_in, in, op_mod, opmod);
|
||||
MLX5_SET(query_vport_state_in, in, vport_number, vport);
|
||||
MLX5_SET(query_vport_state_in, in, other_vport, other_vport);
|
||||
|
||||
err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
*admin_state = MLX5_GET(query_vport_state_out, out, admin_state);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
|
||||
u16 vport, u8 other_vport, u8 state)
|
||||
{
|
||||
|
|
@ -77,6 +99,29 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
|
|||
return mlx5_cmd_exec_in(mdev, modify_vport_state, in);
|
||||
}
|
||||
|
||||
int mlx5_modify_vport_max_tx_speed(struct mlx5_core_dev *mdev, u8 opmod,
|
||||
u16 vport, u8 other_vport, u16 max_tx_speed)
|
||||
{
|
||||
u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {};
|
||||
u8 admin_state;
|
||||
int err;
|
||||
|
||||
err = mlx5_query_vport_admin_state(mdev, opmod, vport, other_vport,
|
||||
&admin_state);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
MLX5_SET(modify_vport_state_in, in, opcode,
|
||||
MLX5_CMD_OP_MODIFY_VPORT_STATE);
|
||||
MLX5_SET(modify_vport_state_in, in, op_mod, opmod);
|
||||
MLX5_SET(modify_vport_state_in, in, vport_number, vport);
|
||||
MLX5_SET(modify_vport_state_in, in, other_vport, other_vport);
|
||||
MLX5_SET(modify_vport_state_in, in, admin_state, admin_state);
|
||||
MLX5_SET(modify_vport_state_in, in, max_tx_speed, max_tx_speed);
|
||||
|
||||
return mlx5_cmd_exec_in(mdev, modify_vport_state, in);
|
||||
}
|
||||
|
||||
static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
|
||||
bool other_vport, u32 *out)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -41,6 +41,8 @@
|
|||
(MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
|
||||
mlx5_core_is_pf(mdev))
|
||||
|
||||
#define MLX5_MAX_TX_SPEED_UNIT 100
|
||||
|
||||
enum {
|
||||
MLX5_CAP_INLINE_MODE_L2,
|
||||
MLX5_CAP_INLINE_MODE_VPORT_CONTEXT,
|
||||
|
|
@ -58,6 +60,8 @@ enum {
|
|||
u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport);
|
||||
int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
|
||||
u16 vport, u8 other_vport, u8 state);
|
||||
int mlx5_modify_vport_max_tx_speed(struct mlx5_core_dev *mdev, u8 opmod,
|
||||
u16 vport, u8 other_vport, u16 max_tx_speed);
|
||||
int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
|
||||
u16 vport, bool other, u8 *addr);
|
||||
int mlx5_query_mac_address(struct mlx5_core_dev *mdev, u8 *addr);
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user