From 904e112ad431492b34f235f59738e8312802bbf9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 6 Jan 2022 01:11:12 +0200 Subject: [PATCH 1/6] net: dsa: reorder PHY initialization with MTU setup in slave.c In dsa_slave_create() there are 2 sections that take rtnl_lock(): MTU change and netdev registration. They are separated by PHY initialization. There isn't any strict ordering requirement except for the fact that netdev registration should be last. Therefore, we can perform the MTU change a bit later, after the PHY setup. A future change will then be able to merge the two rtnl_lock sections into one. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 88f7b8686dac..88bcdba92fa7 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -2011,13 +2011,6 @@ int dsa_slave_create(struct dsa_port *port) port->slave = slave_dev; dsa_slave_setup_tagger(slave_dev); - rtnl_lock(); - ret = dsa_slave_change_mtu(slave_dev, ETH_DATA_LEN); - rtnl_unlock(); - if (ret && ret != -EOPNOTSUPP) - dev_warn(ds->dev, "nonfatal error %d setting MTU to %d on port %d\n", - ret, ETH_DATA_LEN, port->index); - netif_carrier_off(slave_dev); ret = dsa_slave_phy_setup(slave_dev); @@ -2028,6 +2021,13 @@ int dsa_slave_create(struct dsa_port *port) goto out_gcells; } + rtnl_lock(); + ret = dsa_slave_change_mtu(slave_dev, ETH_DATA_LEN); + rtnl_unlock(); + if (ret && ret != -EOPNOTSUPP) + dev_warn(ds->dev, "nonfatal error %d setting MTU to %d on port %d\n", + ret, ETH_DATA_LEN, port->index); + rtnl_lock(); ret = register_netdevice(slave_dev); From e31dbd3b6aba585231cd84a87adeb22e7c6a8c19 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 6 Jan 2022 01:11:13 +0200 Subject: [PATCH 2/6] net: dsa: merge rtnl_lock sections in dsa_slave_create Currently dsa_slave_create() has two sequences of rtnl_lock/rtnl_unlock in a row. Remove the rtnl_unlock() and rtnl_lock() in between, such that the operation can execute slighly faster. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 88bcdba92fa7..22241afcac81 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -2022,14 +2022,12 @@ int dsa_slave_create(struct dsa_port *port) } rtnl_lock(); + ret = dsa_slave_change_mtu(slave_dev, ETH_DATA_LEN); - rtnl_unlock(); if (ret && ret != -EOPNOTSUPP) dev_warn(ds->dev, "nonfatal error %d setting MTU to %d on port %d\n", ret, ETH_DATA_LEN, port->index); - rtnl_lock(); - ret = register_netdevice(slave_dev); if (ret) { netdev_err(master, "error %d registering interface %s\n", From a1ff94c2973c43bc1e2677ac63ebb15b1d1ff846 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 6 Jan 2022 01:11:14 +0200 Subject: [PATCH 3/6] net: dsa: stop updating master MTU from master.c At present there are two paths for changing the MTU of the DSA master. The first is: dsa_tree_setup -> dsa_tree_setup_ports -> dsa_port_setup -> dsa_slave_create -> dsa_slave_change_mtu -> dev_set_mtu(master) The second is: dsa_tree_setup -> dsa_tree_setup_master -> dsa_master_setup -> dev_set_mtu(dev) So the dev_set_mtu() call from dsa_master_setup() has been effectively superseded by the dsa_slave_change_mtu(slave_dev, ETH_DATA_LEN) that is done from dsa_slave_create() for each user port. The later function also updates the master MTU according to the largest user port MTU from the tree. Therefore, updating the master MTU through a separate code path isn't needed. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/master.c | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/net/dsa/master.c b/net/dsa/master.c index e8e19857621b..f4efb244f91d 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -330,28 +330,13 @@ static const struct attribute_group dsa_group = { .attrs = dsa_slave_attrs, }; -static void dsa_master_reset_mtu(struct net_device *dev) -{ - int err; - - rtnl_lock(); - err = dev_set_mtu(dev, ETH_DATA_LEN); - if (err) - netdev_dbg(dev, - "Unable to reset MTU to exclude DSA overheads\n"); - rtnl_unlock(); -} - static struct lock_class_key dsa_master_addr_list_lock_key; int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { - const struct dsa_device_ops *tag_ops = cpu_dp->tag_ops; struct dsa_switch *ds = cpu_dp->ds; struct device_link *consumer_link; - int mtu, ret; - - mtu = ETH_DATA_LEN + dsa_tag_protocol_overhead(tag_ops); + int ret; /* The DSA master must use SET_NETDEV_DEV for this to work. */ consumer_link = device_link_add(ds->dev, dev->dev.parent, @@ -361,13 +346,6 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) "Failed to create a device link to DSA switch %s\n", dev_name(ds->dev)); - rtnl_lock(); - ret = dev_set_mtu(dev, mtu); - rtnl_unlock(); - if (ret) - netdev_warn(dev, "error %d setting MTU to %d to include DSA overhead\n", - ret, mtu); - /* If we use a tagging format that doesn't have an ethertype * field, make sure that all packets from this point on get * sent to the tag format's receive function. @@ -405,7 +383,6 @@ void dsa_master_teardown(struct net_device *dev) sysfs_remove_group(&dev->dev.kobj, &dsa_group); dsa_netdev_ops_set(dev, NULL); dsa_master_ethtool_teardown(dev); - dsa_master_reset_mtu(dev); dsa_master_set_promiscuity(dev, -1); dev->dsa_ptr = NULL; From c146f9bc195a9dc3ad7fd000a14540e7c9df952d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 6 Jan 2022 01:11:15 +0200 Subject: [PATCH 4/6] net: dsa: hold rtnl_mutex when calling dsa_master_{setup,teardown} DSA needs to simulate master tracking events when a binding is first with a DSA master established and torn down, in order to give drivers the simplifying guarantee that ->master_state_change calls are made only when the master's readiness state to pass traffic changes. master_state_change() provide a operational bool that DSA driver can use to understand if DSA master is operational or not. To avoid races, we need to block the reception of NETDEV_UP/NETDEV_CHANGE/NETDEV_GOING_DOWN events in the netdev notifier chain while we are changing the master's dev->dsa_ptr (this changes what netdev_uses_dsa(dev) reports). The dsa_master_setup() and dsa_master_teardown() functions optionally require the rtnl_mutex to be held, if the tagger needs the master to be promiscuous, these functions call dev_set_promiscuity(). Move the rtnl_lock() from that function and make it top-level. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 8 ++++++++ net/dsa/master.c | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index a0d84f9f864f..52fb1958b535 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1038,6 +1038,8 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst) struct dsa_port *dp; int err; + rtnl_lock(); + list_for_each_entry(dp, &dst->ports, list) { if (dsa_port_is_cpu(dp)) { err = dsa_master_setup(dp->master, dp); @@ -1046,6 +1048,8 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst) } } + rtnl_unlock(); + return 0; } @@ -1053,9 +1057,13 @@ static void dsa_tree_teardown_master(struct dsa_switch_tree *dst) { struct dsa_port *dp; + rtnl_lock(); + list_for_each_entry(dp, &dst->ports, list) if (dsa_port_is_cpu(dp)) dsa_master_teardown(dp->master); + + rtnl_unlock(); } static int dsa_tree_setup_lags(struct dsa_switch_tree *dst) diff --git a/net/dsa/master.c b/net/dsa/master.c index f4efb244f91d..2199104ca7df 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -267,9 +267,9 @@ static void dsa_master_set_promiscuity(struct net_device *dev, int inc) if (!ops->promisc_on_master) return; - rtnl_lock(); + ASSERT_RTNL(); + dev_set_promiscuity(dev, inc); - rtnl_unlock(); } static ssize_t tagging_show(struct device *d, struct device_attribute *attr, From 1e3f407f3cacc5dcfe27166c412ed9bc263d82bf Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 6 Jan 2022 01:11:16 +0200 Subject: [PATCH 5/6] net: dsa: first set up shared ports, then non-shared ports After commit a57d8c217aad ("net: dsa: flush switchdev workqueue before tearing down CPU/DSA ports"), the port setup and teardown procedure became asymmetric. The fact of the matter is that user ports need the shared ports to be up before they can be used for CPU-initiated termination. And since we register net devices for the user ports, those won't be functional until we also call the setup for the shared (CPU, DSA) ports. But we may do that later, depending on the port numbering scheme of the hardware we are dealing with. It just makes sense that all shared ports are brought up before any user port is. I can't pinpoint any issue due to the current behavior, but let's change it nonetheless, for consistency's sake. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 50 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 52fb1958b535..ea0f02a24b8b 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1003,23 +1003,28 @@ static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst) dsa_switch_teardown(dp->ds); } -static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) +/* Bring shared ports up first, then non-shared ports */ +static int dsa_tree_setup_ports(struct dsa_switch_tree *dst) { struct dsa_port *dp; - int err; + int err = 0; list_for_each_entry(dp, &dst->ports, list) { - err = dsa_switch_setup(dp->ds); - if (err) - goto teardown; + if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) { + err = dsa_port_setup(dp); + if (err) + goto teardown; + } } list_for_each_entry(dp, &dst->ports, list) { - err = dsa_port_setup(dp); - if (err) { - err = dsa_port_reinit_as_unused(dp); - if (err) - goto teardown; + if (dsa_port_is_user(dp) || dsa_port_is_unused(dp)) { + err = dsa_port_setup(dp); + if (err) { + err = dsa_port_reinit_as_unused(dp); + if (err) + goto teardown; + } } } @@ -1028,7 +1033,21 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) teardown: dsa_tree_teardown_ports(dst); - dsa_tree_teardown_switches(dst); + return err; +} + +static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) +{ + struct dsa_port *dp; + int err = 0; + + list_for_each_entry(dp, &dst->ports, list) { + err = dsa_switch_setup(dp->ds); + if (err) { + dsa_tree_teardown_switches(dst); + break; + } + } return err; } @@ -1115,10 +1134,14 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) if (err) goto teardown_cpu_ports; - err = dsa_tree_setup_master(dst); + err = dsa_tree_setup_ports(dst); if (err) goto teardown_switches; + err = dsa_tree_setup_master(dst); + if (err) + goto teardown_ports; + err = dsa_tree_setup_lags(dst); if (err) goto teardown_master; @@ -1131,8 +1154,9 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) teardown_master: dsa_tree_teardown_master(dst); -teardown_switches: +teardown_ports: dsa_tree_teardown_ports(dst); +teardown_switches: dsa_tree_teardown_switches(dst); teardown_cpu_ports: dsa_tree_teardown_cpu_ports(dst); From 11fd667dac315ea3f2469961f6d2869271a46cae Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 6 Jan 2022 01:11:17 +0200 Subject: [PATCH 6/6] net: dsa: setup master before ports It is said that as soon as a network interface is registered, all its resources should have already been prepared, so that it is available for sending and receiving traffic. One of the resources needed by a DSA slave interface is the master. dsa_tree_setup -> dsa_tree_setup_ports -> dsa_port_setup -> dsa_slave_create -> register_netdevice -> dsa_tree_setup_master -> dsa_master_setup -> sets up master->dsa_ptr, which enables reception Therefore, there is a short period of time after register_netdevice() during which the master isn't prepared to pass traffic to the DSA layer (master->dsa_ptr is checked by eth_type_trans). Same thing during unregistration, there is a time frame in which packets might be missed. Note that this change opens us to another race: dsa_master_find_slave() will get invoked potentially earlier than the slave creation, and later than the slave deletion. Since dp->slave starts off as a NULL pointer, the earlier calls aren't a problem, but the later calls are. To avoid use-after-free, we should zeroize dp->slave before calling dsa_slave_destroy(). In practice I cannot really test real life improvements brought by this change, since in my systems, netdevice creation races with PHY autoneg which takes a few seconds to complete, and that masks quite a few races. Effects might be noticeable in a setup with fixed links all the way to an external system. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index ea0f02a24b8b..3d21521453fe 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -561,6 +561,7 @@ static void dsa_port_teardown(struct dsa_port *dp) struct devlink_port *dlp = &dp->devlink_port; struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a, *tmp; + struct net_device *slave; if (!dp->setup) return; @@ -582,9 +583,11 @@ static void dsa_port_teardown(struct dsa_port *dp) dsa_port_link_unregister_of(dp); break; case DSA_PORT_TYPE_USER: - if (dp->slave) { - dsa_slave_destroy(dp->slave); + slave = dp->slave; + + if (slave) { dp->slave = NULL; + dsa_slave_destroy(slave); } break; } @@ -1134,17 +1137,17 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) if (err) goto teardown_cpu_ports; - err = dsa_tree_setup_ports(dst); + err = dsa_tree_setup_master(dst); if (err) goto teardown_switches; - err = dsa_tree_setup_master(dst); + err = dsa_tree_setup_ports(dst); if (err) - goto teardown_ports; + goto teardown_master; err = dsa_tree_setup_lags(dst); if (err) - goto teardown_master; + goto teardown_ports; dst->setup = true; @@ -1152,10 +1155,10 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) return 0; -teardown_master: - dsa_tree_teardown_master(dst); teardown_ports: dsa_tree_teardown_ports(dst); +teardown_master: + dsa_tree_teardown_master(dst); teardown_switches: dsa_tree_teardown_switches(dst); teardown_cpu_ports: @@ -1173,10 +1176,10 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dsa_tree_teardown_lags(dst); - dsa_tree_teardown_master(dst); - dsa_tree_teardown_ports(dst); + dsa_tree_teardown_master(dst); + dsa_tree_teardown_switches(dst); dsa_tree_teardown_cpu_ports(dst);