Merge branch 'net-mlx5-refactor-devcom-and-add-net-namespace-support'

Tariq Toukan says:

====================
net/mlx5: Refactor devcom and add net namespace support

This series by Shay improves the mlx5 devcom infrastructure by
introducing a structured matching attribute interface, relocating
certain devcom registration flows to more appropriate locations, and
adding net namespace awareness to the devcom framework and its users.

Patch 1: Refactors the devcom interface to accept a match attribute
structure instead of raw keys, enabling future extensibility such as
namespace-based matching.

Patch 2: Moves the devcom registration for HCA components from the core
code to the LAG layer to better reflect their logical ownership and
lifecycle.

Patch 3: Adds net namespace support to the devcom framework, enabling
components to operate in isolated namespaces.

Patch 4: Updates the LAG layer to make use of the new namespace-aware
devcom interface and improves reload behavior in LAG mode.
====================

Link: https://patch.msgid.link/1757940070-618661-1-git-send-email-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2025-09-16 17:25:56 -07:00
commit 739d911ce5
12 changed files with 126 additions and 57 deletions

View File

@ -204,11 +204,6 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
return 0;
}
if (mlx5_lag_is_active(dev)) {
NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode");
return -EOPNOTSUPP;
}
if (mlx5_core_is_mp_slave(dev)) {
NL_SET_ERR_MSG_MOD(extack, "reload is unsupported for multi port slave");
return -EOPNOTSUPP;

View File

@ -235,9 +235,13 @@ static int mlx5e_devcom_event_mpv(int event, void *my_data, void *event_data)
static int mlx5e_devcom_init_mpv(struct mlx5e_priv *priv, u64 *data)
{
struct mlx5_devcom_match_attr attr = {
.key.val = *data,
};
priv->devcom = mlx5_devcom_register_component(priv->mdev->priv.devc,
MLX5_DEVCOM_MPV,
*data,
&attr,
mlx5e_devcom_event_mpv,
priv);
if (IS_ERR(priv->devcom))

View File

@ -66,6 +66,7 @@
#include "lib/devcom.h"
#include "lib/geneve.h"
#include "lib/fs_chains.h"
#include "lib/mlx5.h"
#include "diag/en_tc_tracepoint.h"
#include <asm/div64.h>
#include "lag/lag.h"
@ -5387,12 +5388,13 @@ void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht)
int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv)
{
const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
struct mlx5_devcom_match_attr attr = {};
struct netdev_phys_item_id ppid;
struct mlx5e_rep_priv *rpriv;
struct mapping_ctx *mapping;
struct mlx5_eswitch *esw;
struct mlx5e_priv *priv;
u64 mapping_id, key;
u64 mapping_id;
int err = 0;
rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
@ -5448,8 +5450,10 @@ int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv)
err = netif_get_port_parent_id(priv->netdev, &ppid, false);
if (!err) {
memcpy(&key, &ppid.id, sizeof(key));
mlx5_esw_offloads_devcom_init(esw, key);
memcpy(&attr.key.val, &ppid.id, sizeof(attr.key.val));
attr.flags = MLX5_DEVCOM_MATCH_FLAGS_NS;
attr.net = mlx5_core_net(esw->dev);
mlx5_esw_offloads_devcom_init(esw, &attr);
}
return 0;

View File

@ -433,7 +433,8 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs);
void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf);
void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw);
void mlx5_eswitch_disable(struct mlx5_eswitch *esw);
void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, u64 key);
void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw,
const struct mlx5_devcom_match_attr *attr);
void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw);
bool mlx5_esw_offloads_devcom_is_ready(struct mlx5_eswitch *esw);
int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
@ -928,7 +929,9 @@ static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; }
static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) {}
static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {}
static inline void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, u64 key) {}
static inline void
mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw,
const struct mlx5_devcom_match_attr *attr) {}
static inline void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) {}
static inline bool mlx5_esw_offloads_devcom_is_ready(struct mlx5_eswitch *esw) { return false; }
static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }

View File

@ -3104,7 +3104,8 @@ static int mlx5_esw_offloads_devcom_event(int event,
return err;
}
void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, u64 key)
void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw,
const struct mlx5_devcom_match_attr *attr)
{
int i;
@ -3123,7 +3124,7 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, u64 key)
esw->num_peers = 0;
esw->devcom = mlx5_devcom_register_component(esw->dev->priv.devc,
MLX5_DEVCOM_ESW_OFFLOADS,
key,
attr,
mlx5_esw_offloads_devcom_event,
esw);
if (IS_ERR(esw->devcom))

View File

@ -35,6 +35,7 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/eswitch.h>
#include <linux/mlx5/vport.h>
#include "lib/mlx5.h"
#include "lib/devcom.h"
#include "mlx5_core.h"
#include "eswitch.h"
@ -231,9 +232,13 @@ static void mlx5_do_bond_work(struct work_struct *work);
static void mlx5_ldev_free(struct kref *ref)
{
struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
struct net *net;
if (ldev->nb.notifier_call) {
net = read_pnet(&ldev->net);
unregister_netdevice_notifier_net(net, &ldev->nb);
}
if (ldev->nb.notifier_call)
unregister_netdevice_notifier_net(&init_net, &ldev->nb);
mlx5_lag_mp_cleanup(ldev);
cancel_delayed_work_sync(&ldev->bond_work);
destroy_workqueue(ldev->wq);
@ -271,7 +276,8 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
ldev->nb.notifier_call = mlx5_lag_netdev_event;
if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
write_pnet(&ldev->net, mlx5_core_net(dev));
if (register_netdevice_notifier_net(read_pnet(&ldev->net), &ldev->nb)) {
ldev->nb.notifier_call = NULL;
mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
}
@ -1404,6 +1410,36 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
return 0;
}
static void mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev *dev)
{
mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp);
}
static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev)
{
struct mlx5_devcom_match_attr attr = {
.key.val = mlx5_query_nic_system_image_guid(dev),
.flags = MLX5_DEVCOM_MATCH_FLAGS_NS,
.net = mlx5_core_net(dev),
};
/* This component is use to sync adding core_dev to lag_dev and to sync
* changes of mlx5_adev_devices between LAG layer and other layers.
*/
dev->priv.hca_devcom_comp =
mlx5_devcom_register_component(dev->priv.devc,
MLX5_DEVCOM_HCA_PORTS,
&attr, NULL, dev);
if (IS_ERR(dev->priv.hca_devcom_comp)) {
mlx5_core_err(dev,
"Failed to register devcom HCA component, err: %ld\n",
PTR_ERR(dev->priv.hca_devcom_comp));
return PTR_ERR(dev->priv.hca_devcom_comp);
}
return 0;
}
void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
@ -1425,6 +1461,7 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
}
mlx5_ldev_remove_mdev(ldev, dev);
mutex_unlock(&ldev->lock);
mlx5_lag_unregister_hca_devcom_comp(dev);
mlx5_ldev_put(ldev);
}
@ -1435,7 +1472,7 @@ void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
if (!mlx5_lag_is_supported(dev))
return;
if (IS_ERR_OR_NULL(dev->priv.hca_devcom_comp))
if (mlx5_lag_register_hca_devcom_comp(dev))
return;
recheck:

View File

@ -67,6 +67,7 @@ struct mlx5_lag {
struct workqueue_struct *wq;
struct delayed_work bond_work;
struct notifier_block nb;
possible_net_t net;
struct lag_mp lag_mp;
struct mlx5_lag_port_sel port_sel;
/* Protect lag fields/state changes */

View File

@ -1435,14 +1435,20 @@ static int mlx5_clock_alloc(struct mlx5_core_dev *mdev, bool shared)
static void mlx5_shared_clock_register(struct mlx5_core_dev *mdev, u64 key)
{
struct mlx5_core_dev *peer_dev, *next = NULL;
struct mlx5_devcom_match_attr attr = {
.key.val = key,
};
struct mlx5_devcom_comp_dev *compd;
struct mlx5_devcom_comp_dev *pos;
mdev->clock_state->compdev = mlx5_devcom_register_component(mdev->priv.devc,
MLX5_DEVCOM_SHARED_CLOCK,
key, NULL, mdev);
if (IS_ERR(mdev->clock_state->compdev))
compd = mlx5_devcom_register_component(mdev->priv.devc,
MLX5_DEVCOM_SHARED_CLOCK,
&attr, NULL, mdev);
if (IS_ERR(compd))
return;
mdev->clock_state->compdev = compd;
mlx5_devcom_comp_lock(mdev->clock_state->compdev);
mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) {
if (peer_dev->clock) {

View File

@ -4,6 +4,7 @@
#include <linux/mlx5/vport.h>
#include <linux/list.h>
#include "lib/devcom.h"
#include "lib/mlx5.h"
#include "mlx5_core.h"
static LIST_HEAD(devcom_dev_list);
@ -22,11 +23,17 @@ struct mlx5_devcom_dev {
struct kref ref;
};
struct mlx5_devcom_key {
u32 flags;
union mlx5_devcom_match_key key;
possible_net_t net;
};
struct mlx5_devcom_comp {
struct list_head comp_list;
enum mlx5_devcom_component id;
u64 key;
struct list_head comp_dev_list_head;
struct mlx5_devcom_key key;
mlx5_devcom_event_handler_t handler;
struct kref ref;
bool ready;
@ -108,7 +115,8 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom_dev *devc)
}
static struct mlx5_devcom_comp *
mlx5_devcom_comp_alloc(u64 id, u64 key, mlx5_devcom_event_handler_t handler)
mlx5_devcom_comp_alloc(u64 id, const struct mlx5_devcom_match_attr *attr,
mlx5_devcom_event_handler_t handler)
{
struct mlx5_devcom_comp *comp;
@ -117,7 +125,10 @@ mlx5_devcom_comp_alloc(u64 id, u64 key, mlx5_devcom_event_handler_t handler)
return ERR_PTR(-ENOMEM);
comp->id = id;
comp->key = key;
comp->key.key = attr->key;
comp->key.flags = attr->flags;
if (attr->flags & MLX5_DEVCOM_MATCH_FLAGS_NS)
write_pnet(&comp->key.net, attr->net);
comp->handler = handler;
init_rwsem(&comp->sem);
lockdep_register_key(&comp->lock_key);
@ -180,21 +191,34 @@ devcom_free_comp_dev(struct mlx5_devcom_comp_dev *devcom)
static bool
devcom_component_equal(struct mlx5_devcom_comp *devcom,
enum mlx5_devcom_component id,
u64 key)
const struct mlx5_devcom_match_attr *attr)
{
return devcom->id == id && devcom->key == key;
if (devcom->id != id)
return false;
if (devcom->key.flags != attr->flags)
return false;
if (memcmp(&devcom->key.key, &attr->key, sizeof(devcom->key.key)))
return false;
if (devcom->key.flags & MLX5_DEVCOM_MATCH_FLAGS_NS &&
!net_eq(read_pnet(&devcom->key.net), attr->net))
return false;
return true;
}
static struct mlx5_devcom_comp *
devcom_component_get(struct mlx5_devcom_dev *devc,
enum mlx5_devcom_component id,
u64 key,
const struct mlx5_devcom_match_attr *attr,
mlx5_devcom_event_handler_t handler)
{
struct mlx5_devcom_comp *comp;
devcom_for_each_component(comp) {
if (devcom_component_equal(comp, id, key)) {
if (devcom_component_equal(comp, id, attr)) {
if (handler == comp->handler) {
kref_get(&comp->ref);
return comp;
@ -212,7 +236,7 @@ devcom_component_get(struct mlx5_devcom_dev *devc,
struct mlx5_devcom_comp_dev *
mlx5_devcom_register_component(struct mlx5_devcom_dev *devc,
enum mlx5_devcom_component id,
u64 key,
const struct mlx5_devcom_match_attr *attr,
mlx5_devcom_event_handler_t handler,
void *data)
{
@ -223,14 +247,14 @@ mlx5_devcom_register_component(struct mlx5_devcom_dev *devc,
return ERR_PTR(-EINVAL);
mutex_lock(&comp_list_lock);
comp = devcom_component_get(devc, id, key, handler);
comp = devcom_component_get(devc, id, attr, handler);
if (IS_ERR(comp)) {
devcom = ERR_PTR(-EINVAL);
goto out_unlock;
}
if (!comp) {
comp = mlx5_devcom_comp_alloc(id, key, handler);
comp = mlx5_devcom_comp_alloc(id, attr, handler);
if (IS_ERR(comp)) {
devcom = ERR_CAST(comp);
goto out_unlock;

View File

@ -6,6 +6,20 @@
#include <linux/mlx5/driver.h>
enum mlx5_devom_match_flags {
MLX5_DEVCOM_MATCH_FLAGS_NS = BIT(0),
};
union mlx5_devcom_match_key {
u64 val;
};
struct mlx5_devcom_match_attr {
u32 flags;
union mlx5_devcom_match_key key;
struct net *net;
};
enum mlx5_devcom_component {
MLX5_DEVCOM_ESW_OFFLOADS,
MLX5_DEVCOM_MPV,
@ -25,7 +39,7 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom_dev *devc);
struct mlx5_devcom_comp_dev *
mlx5_devcom_register_component(struct mlx5_devcom_dev *devc,
enum mlx5_devcom_component id,
u64 key,
const struct mlx5_devcom_match_attr *attr,
mlx5_devcom_event_handler_t handler,
void *data);
void mlx5_devcom_unregister_component(struct mlx5_devcom_comp_dev *devcom);

View File

@ -210,13 +210,17 @@ static void sd_cleanup(struct mlx5_core_dev *dev)
static int sd_register(struct mlx5_core_dev *dev)
{
struct mlx5_devcom_comp_dev *devcom, *pos;
struct mlx5_devcom_match_attr attr = {};
struct mlx5_core_dev *peer, *primary;
struct mlx5_sd *sd, *primary_sd;
int err, i;
sd = mlx5_get_sd(dev);
attr.key.val = sd->group_id;
attr.flags = MLX5_DEVCOM_MATCH_FLAGS_NS;
attr.net = mlx5_core_net(dev);
devcom = mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_SD_GROUP,
sd->group_id, NULL, dev);
&attr, NULL, dev);
if (IS_ERR(devcom))
return PTR_ERR(devcom);

View File

@ -973,27 +973,6 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev)
mlx5_pci_disable_device(dev);
}
static void mlx5_register_hca_devcom_comp(struct mlx5_core_dev *dev)
{
/* This component is use to sync adding core_dev to lag_dev and to sync
* changes of mlx5_adev_devices between LAG layer and other layers.
*/
if (!mlx5_lag_is_supported(dev))
return;
dev->priv.hca_devcom_comp =
mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_HCA_PORTS,
mlx5_query_nic_system_image_guid(dev),
NULL, dev);
if (IS_ERR(dev->priv.hca_devcom_comp))
mlx5_core_err(dev, "Failed to register devcom HCA component\n");
}
static void mlx5_unregister_hca_devcom_comp(struct mlx5_core_dev *dev)
{
mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp);
}
static int mlx5_init_once(struct mlx5_core_dev *dev)
{
int err;
@ -1002,7 +981,6 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
if (IS_ERR(dev->priv.devc))
mlx5_core_warn(dev, "failed to register devcom device %ld\n",
PTR_ERR(dev->priv.devc));
mlx5_register_hca_devcom_comp(dev);
err = mlx5_query_board_id(dev);
if (err) {
@ -1140,7 +1118,6 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
err_irq_cleanup:
mlx5_irq_table_cleanup(dev);
err_devcom:
mlx5_unregister_hca_devcom_comp(dev);
mlx5_devcom_unregister_device(dev->priv.devc);
return err;
@ -1171,7 +1148,6 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_events_cleanup(dev);
mlx5_eq_table_cleanup(dev);
mlx5_irq_table_cleanup(dev);
mlx5_unregister_hca_devcom_comp(dev);
mlx5_devcom_unregister_device(dev->priv.devc);
}