Merge branch 'net-mlx5-fixes-for-socket-direct'

Tariq Toukan says:

====================
net/mlx5: Fixes for Socket-Direct

This series fixes several race conditions and bugs in the mlx5
Socket-Direct (SD) single netdev flow.

Patch 1 serializes mlx5_sd_init()/mlx5_sd_cleanup() with
mlx5_devcom_comp_lock() and tracks the SD group state on the primary
device, preventing concurrent or duplicate bring-up/tear-down.

Patch 2 fixes the debugfs "multi-pf" directory being stored on the
calling device's sd struct instead of the primary's, which caused
memory leaks and recreation errors when cleanup ran from a different PF.

Patch 3 fixes a race where a secondary PF could access the primary's
auxiliary device after it had been unbound, by holding the primary's
device lock while operating on its auxiliary device.

Patch 4 fixes missing cleanup on ETH probe errors. The analogous gap on
the resume path requires introducing sd_suspend/resume APIs that only
destroy FW resources and is left for a follow-up series.
====================

Link: https://patch.msgid.link/20260504180206.268568-1-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2026-05-05 19:13:12 -07:00
commit 22675f0726
3 changed files with 122 additions and 20 deletions

View File

@ -6774,9 +6774,11 @@ static int mlx5e_resume(struct auxiliary_device *adev)
return err;
actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx);
if (actual_adev)
return _mlx5e_resume(actual_adev);
return 0;
if (actual_adev) {
err = _mlx5e_resume(actual_adev);
mlx5_sd_put_adev(actual_adev, adev);
}
return err;
}
static int _mlx5e_suspend(struct auxiliary_device *adev, bool pre_netdev_reg)
@ -6815,6 +6817,8 @@ static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)
err = _mlx5e_suspend(actual_adev, false);
mlx5_sd_cleanup(mdev);
if (actual_adev)
mlx5_sd_put_adev(actual_adev, adev);
return err;
}
@ -6912,9 +6916,19 @@ static int mlx5e_probe(struct auxiliary_device *adev,
return err;
actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx);
if (actual_adev)
return _mlx5e_probe(actual_adev);
if (actual_adev) {
err = _mlx5e_probe(actual_adev);
if (err)
goto sd_cleanup;
mlx5_sd_put_adev(actual_adev, adev);
}
return 0;
sd_cleanup:
mlx5_sd_cleanup(mdev);
if (actual_adev)
mlx5_sd_put_adev(actual_adev, adev);
return err;
}
static void _mlx5e_remove(struct auxiliary_device *adev)
@ -6966,6 +6980,8 @@ static void mlx5e_remove(struct auxiliary_device *adev)
_mlx5e_remove(actual_adev);
mlx5_sd_cleanup(mdev);
if (actual_adev)
mlx5_sd_put_adev(actual_adev, adev);
}
static const struct auxiliary_device_id mlx5e_id_table[] = {

View File

@ -18,6 +18,7 @@ struct mlx5_sd {
u8 host_buses;
struct mlx5_devcom_comp_dev *devcom;
struct dentry *dfs;
u8 state;
bool primary;
union {
struct { /* primary */
@ -31,6 +32,11 @@ struct mlx5_sd {
};
};
enum mlx5_sd_state {
MLX5_SD_STATE_DOWN = 0,
MLX5_SD_STATE_UP,
};
static int mlx5_sd_get_host_buses(struct mlx5_core_dev *dev)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);
@ -270,9 +276,6 @@ static void sd_unregister(struct mlx5_core_dev *dev)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);
mlx5_devcom_comp_lock(sd->devcom);
mlx5_devcom_comp_set_ready(sd->devcom, false);
mlx5_devcom_comp_unlock(sd->devcom);
mlx5_devcom_unregister_component(sd->devcom);
}
@ -426,6 +429,7 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
struct mlx5_core_dev *primary, *pos, *to;
struct mlx5_sd *sd = mlx5_get_sd(dev);
u8 alias_key[ACCESS_KEY_LEN];
struct mlx5_sd *primary_sd;
int err, i;
err = sd_init(dev);
@ -440,10 +444,17 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
if (err)
goto err_sd_cleanup;
mlx5_devcom_comp_lock(sd->devcom);
if (!mlx5_devcom_comp_is_ready(sd->devcom))
return 0;
goto out;
primary = mlx5_sd_get_primary(dev);
if (!primary)
goto out;
primary_sd = mlx5_get_sd(primary);
if (primary_sd->state != MLX5_SD_STATE_DOWN)
goto out;
for (i = 0; i < ACCESS_KEY_LEN; i++)
alias_key[i] = get_random_u8();
@ -452,9 +463,13 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
if (err)
goto err_sd_unregister;
sd->dfs = debugfs_create_dir("multi-pf", mlx5_debugfs_get_dev_root(primary));
debugfs_create_x32("group_id", 0400, sd->dfs, &sd->group_id);
debugfs_create_file("primary", 0400, sd->dfs, primary, &dev_fops);
primary_sd->dfs =
debugfs_create_dir("multi-pf",
mlx5_debugfs_get_dev_root(primary));
debugfs_create_x32("group_id", 0400, primary_sd->dfs,
&primary_sd->group_id);
debugfs_create_file("primary", 0400, primary_sd->dfs, primary,
&dev_fops);
mlx5_sd_for_each_secondary(i, primary, pos) {
char name[32];
@ -464,7 +479,8 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
goto err_unset_secondaries;
snprintf(name, sizeof(name), "secondary_%d", i - 1);
debugfs_create_file(name, 0400, sd->dfs, pos, &dev_fops);
debugfs_create_file(name, 0400, primary_sd->dfs, pos,
&dev_fops);
}
@ -472,6 +488,9 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
sd->group_id, mlx5_devcom_comp_get_size(sd->devcom));
sd_print_group(primary);
primary_sd->state = MLX5_SD_STATE_UP;
out:
mlx5_devcom_comp_unlock(sd->devcom);
return 0;
err_unset_secondaries:
@ -479,8 +498,18 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
mlx5_sd_for_each_secondary_to(i, primary, to, pos)
sd_cmd_unset_secondary(pos);
sd_cmd_unset_primary(primary);
debugfs_remove_recursive(sd->dfs);
debugfs_remove_recursive(primary_sd->dfs);
primary_sd->dfs = NULL;
err_sd_unregister:
mlx5_sd_for_each_secondary(i, primary, pos) {
struct mlx5_sd *peer_sd = mlx5_get_sd(pos);
primary_sd->secondaries[i - 1] = NULL;
peer_sd->primary_dev = NULL;
}
primary_sd->primary = false;
mlx5_devcom_comp_set_ready(sd->devcom, false);
mlx5_devcom_comp_unlock(sd->devcom);
sd_unregister(dev);
err_sd_cleanup:
sd_cleanup(dev);
@ -491,42 +520,97 @@ void mlx5_sd_cleanup(struct mlx5_core_dev *dev)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);
struct mlx5_core_dev *primary, *pos;
struct mlx5_sd *primary_sd;
int i;
if (!sd)
return;
mlx5_devcom_comp_lock(sd->devcom);
if (!mlx5_devcom_comp_is_ready(sd->devcom))
goto out;
goto out_unlock;
primary = mlx5_sd_get_primary(dev);
if (!primary)
goto out_ready_false;
primary_sd = mlx5_get_sd(primary);
if (primary_sd->state != MLX5_SD_STATE_UP)
goto out_clear_peers;
mlx5_sd_for_each_secondary(i, primary, pos)
sd_cmd_unset_secondary(pos);
sd_cmd_unset_primary(primary);
debugfs_remove_recursive(sd->dfs);
debugfs_remove_recursive(primary_sd->dfs);
primary_sd->dfs = NULL;
sd_info(primary, "group id %#x, uncombined\n", sd->group_id);
out:
primary_sd->state = MLX5_SD_STATE_DOWN;
out_clear_peers:
mlx5_sd_for_each_secondary(i, primary, pos) {
struct mlx5_sd *peer_sd = mlx5_get_sd(pos);
primary_sd->secondaries[i - 1] = NULL;
peer_sd->primary_dev = NULL;
}
primary_sd->primary = false;
out_ready_false:
mlx5_devcom_comp_set_ready(sd->devcom, false);
out_unlock:
mlx5_devcom_comp_unlock(sd->devcom);
sd_unregister(dev);
sd_cleanup(dev);
}
/* Lock order:
* primary: actual_adev_lock -> SD devcom comp lock
* secondary: SD devcom comp lock -> (drop) -> actual_adev_lock
* The two locks are never held together, so no ABBA.
*/
struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev,
struct auxiliary_device *adev,
int idx)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);
struct mlx5_core_dev *primary;
struct mlx5_adev *primary_adev;
if (!sd)
return adev;
if (!mlx5_devcom_comp_is_ready(sd->devcom))
mlx5_devcom_comp_lock(sd->devcom);
if (!mlx5_devcom_comp_is_ready(sd->devcom)) {
mlx5_devcom_comp_unlock(sd->devcom);
return NULL;
}
primary = mlx5_sd_get_primary(dev);
if (dev == primary)
if (!primary || dev == primary) {
mlx5_devcom_comp_unlock(sd->devcom);
return adev;
}
return &primary->priv.adev[idx]->adev;
primary_adev = primary->priv.adev[idx];
get_device(&primary_adev->adev.dev);
mlx5_devcom_comp_unlock(sd->devcom);
device_lock(&primary_adev->adev.dev);
/* Primary may have completed remove between dropping devcom and
* acquiring device_lock; recheck.
*/
if (!mlx5_devcom_comp_is_ready(sd->devcom)) {
device_unlock(&primary_adev->adev.dev);
put_device(&primary_adev->adev.dev);
return NULL;
}
return &primary_adev->adev;
}
void mlx5_sd_put_adev(struct auxiliary_device *actual_adev,
struct auxiliary_device *adev)
{
if (actual_adev != adev) {
device_unlock(&actual_adev->dev);
put_device(&actual_adev->dev);
}
}

View File

@ -15,6 +15,8 @@ struct mlx5_core_dev *mlx5_sd_ch_ix_get_dev(struct mlx5_core_dev *primary, int c
struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev,
struct auxiliary_device *adev,
int idx);
void mlx5_sd_put_adev(struct auxiliary_device *actual_adev,
struct auxiliary_device *adev);
int mlx5_sd_init(struct mlx5_core_dev *dev);
void mlx5_sd_cleanup(struct mlx5_core_dev *dev);