mirror of
https://github.com/torvalds/linux.git
synced 2026-06-03 03:53:37 +02:00
Merge branch 'net-sysfs-remove-the-rtnl_trylock-restart_syscall-construction'
Antoine Tenart says: ==================== net-sysfs: remove the rtnl_trylock/restart_syscall construction The series initially aimed at improving spins (and thus delays) while accessing net sysfs under rtnl lock contention[1]. The culprit was the trylock/restart_syscall constructions. There wasn't much interest at the time but it got traction recently for other reasons (lowering the rtnl lock pressure). Since v1[2]: - Do not export rtnl_lock_interruptible [Stephen]. - Add netdev_warn_once messages in rx_queue_add_kobject [Jakub]. Since the RFC[1]: - Limit the breaking of the sysfs protection to sysfs_rtnl_lock() only as this is not needed in the whole rtnl locking section thanks to the additional check on dev_isalive(). This simplifies error handling as well as the unlocking path. - Used an interruptible version of rtnl_lock, as done by Jakub in his experiments. - Removed a WARN_ONCE_ONCE [Greg]. - Removed explicit inline markers [Stephen]. Most of the reasoning is explained in comments added in patch 1. This was tested by stress-testing net sysfs attributes (read/write ops) while adding/removing queues and adding/removing veths, all in parallel. I also used an OCP single node cluster, spawning lots of pods. [1] https://lore.kernel.org/all/20231018154804.420823-1-atenart@kernel.org/T/ [2] https://lore.kernel.org/all/20250117102612.132644-1-atenart@kernel.org/T/ ==================== Link: https://patch.msgid.link/20250204170314.146022-1-atenart@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
fadbe52b3b
|
|
@ -658,6 +658,7 @@ struct netdev_queue {
|
|||
struct Qdisc __rcu *qdisc_sleeping;
|
||||
#ifdef CONFIG_SYSFS
|
||||
struct kobject kobj;
|
||||
const struct attribute_group **groups;
|
||||
#endif
|
||||
unsigned long tx_maxrate;
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ extern void rtnl_lock(void);
|
|||
extern void rtnl_unlock(void);
|
||||
extern int rtnl_trylock(void);
|
||||
extern int rtnl_is_locked(void);
|
||||
extern int rtnl_lock_interruptible(void);
|
||||
extern int rtnl_lock_killable(void);
|
||||
extern bool refcount_dec_and_rtnl_lock(refcount_t *r);
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ struct netdev_rx_queue {
|
|||
struct rps_dev_flow_table __rcu *rps_flow_table;
|
||||
#endif
|
||||
struct kobject kobj;
|
||||
const struct attribute_group **groups;
|
||||
struct net_device *dev;
|
||||
netdevice_tracker dev_tracker;
|
||||
|
||||
|
|
|
|||
|
|
@ -42,6 +42,87 @@ static inline int dev_isalive(const struct net_device *dev)
|
|||
return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED;
|
||||
}
|
||||
|
||||
/* There is a possible ABBA deadlock between rtnl_lock and kernfs_node->active,
|
||||
* when unregistering a net device and accessing associated sysfs files. The
|
||||
* potential deadlock is as follow:
|
||||
*
|
||||
* CPU 0 CPU 1
|
||||
*
|
||||
* rtnl_lock vfs_read
|
||||
* unregister_netdevice_many kernfs_seq_start
|
||||
* device_del / kobject_put kernfs_get_active (kn->active++)
|
||||
* kernfs_drain sysfs_kf_seq_show
|
||||
* wait_event( rtnl_lock
|
||||
* kn->active == KN_DEACTIVATED_BIAS) -> waits on CPU 0 to release
|
||||
* -> waits on CPU 1 to decrease kn->active the rtnl lock.
|
||||
*
|
||||
* The historical fix was to use rtnl_trylock with restart_syscall to bail out
|
||||
* of sysfs operations when the lock couldn't be taken. This fixed the above
|
||||
* issue as it allowed CPU 1 to bail out of the ABBA situation.
|
||||
*
|
||||
* But it came with performances issues, as syscalls are being restarted in
|
||||
* loops when there was contention on the rtnl lock, with huge slow downs in
|
||||
* specific scenarios (e.g. lots of virtual interfaces created and userspace
|
||||
* daemons querying their attributes).
|
||||
*
|
||||
* The idea below is to bail out of the active kernfs_node protection
|
||||
* (kn->active) while trying to take the rtnl lock.
|
||||
*
|
||||
* This replaces rtnl_lock() and still has to be used with rtnl_unlock(). The
|
||||
* net device is guaranteed to be alive if this returns successfully.
|
||||
*/
|
||||
static int sysfs_rtnl_lock(struct kobject *kobj, struct attribute *attr,
|
||||
struct net_device *ndev)
|
||||
{
|
||||
struct kernfs_node *kn;
|
||||
int ret = 0;
|
||||
|
||||
/* First, we hold a reference to the net device as the unregistration
|
||||
* path might run in parallel. This will ensure the net device and the
|
||||
* associated sysfs objects won't be freed while we try to take the rtnl
|
||||
* lock.
|
||||
*/
|
||||
dev_hold(ndev);
|
||||
/* sysfs_break_active_protection was introduced to allow self-removal of
|
||||
* devices and their associated sysfs files by bailing out of the
|
||||
* sysfs/kernfs protection. We do this here to allow the unregistration
|
||||
* path to complete in parallel. The following takes a reference on the
|
||||
* kobject and the kernfs_node being accessed.
|
||||
*
|
||||
* This works because we hold a reference onto the net device and the
|
||||
* unregistration path will wait for us eventually in netdev_run_todo
|
||||
* (outside an rtnl lock section).
|
||||
*/
|
||||
kn = sysfs_break_active_protection(kobj, attr);
|
||||
/* We can now try to take the rtnl lock. This can't deadlock us as the
|
||||
* unregistration path is able to drain sysfs files (kernfs_node) thanks
|
||||
* to the above dance.
|
||||
*/
|
||||
if (rtnl_lock_interruptible()) {
|
||||
ret = -ERESTARTSYS;
|
||||
goto unbreak;
|
||||
}
|
||||
/* Check dismantle on the device hasn't started, otherwise deny the
|
||||
* operation.
|
||||
*/
|
||||
if (!dev_isalive(ndev)) {
|
||||
rtnl_unlock();
|
||||
ret = -ENODEV;
|
||||
goto unbreak;
|
||||
}
|
||||
/* We are now sure the device dismantle hasn't started nor that it can
|
||||
* start before we exit the locking section as we hold the rtnl lock.
|
||||
* There's no need to keep unbreaking the sysfs protection nor to hold
|
||||
* a net device reference from that point; that was only needed to take
|
||||
* the rtnl lock.
|
||||
*/
|
||||
unbreak:
|
||||
sysfs_unbreak_active_protection(kn);
|
||||
dev_put(ndev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* use same locking rules as GIF* ioctl's */
|
||||
static ssize_t netdev_show(const struct device *dev,
|
||||
struct device_attribute *attr, char *buf,
|
||||
|
|
@ -95,14 +176,14 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
|
|||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (!rtnl_trylock())
|
||||
return restart_syscall();
|
||||
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = (*set)(netdev, new);
|
||||
if (ret == 0)
|
||||
ret = len;
|
||||
|
||||
if (dev_isalive(netdev)) {
|
||||
ret = (*set)(netdev, new);
|
||||
if (ret == 0)
|
||||
ret = len;
|
||||
}
|
||||
rtnl_unlock();
|
||||
err:
|
||||
return ret;
|
||||
|
|
@ -220,7 +301,7 @@ static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
|
|||
struct net_device *netdev = to_net_dev(dev);
|
||||
|
||||
/* The check is also done in change_carrier; this helps returning early
|
||||
* without hitting the trylock/restart in netdev_store.
|
||||
* without hitting the locking section in netdev_store.
|
||||
*/
|
||||
if (!netdev->netdev_ops->ndo_change_carrier)
|
||||
return -EOPNOTSUPP;
|
||||
|
|
@ -234,8 +315,9 @@ static ssize_t carrier_show(struct device *dev,
|
|||
struct net_device *netdev = to_net_dev(dev);
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (!rtnl_trylock())
|
||||
return restart_syscall();
|
||||
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (netif_running(netdev)) {
|
||||
/* Synchronize carrier state with link watch,
|
||||
|
|
@ -245,8 +327,8 @@ static ssize_t carrier_show(struct device *dev,
|
|||
|
||||
ret = sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev));
|
||||
}
|
||||
rtnl_unlock();
|
||||
|
||||
rtnl_unlock();
|
||||
return ret;
|
||||
}
|
||||
static DEVICE_ATTR_RW(carrier);
|
||||
|
|
@ -258,13 +340,14 @@ static ssize_t speed_show(struct device *dev,
|
|||
int ret = -EINVAL;
|
||||
|
||||
/* The check is also done in __ethtool_get_link_ksettings; this helps
|
||||
* returning early without hitting the trylock/restart below.
|
||||
* returning early without hitting the locking section below.
|
||||
*/
|
||||
if (!netdev->ethtool_ops->get_link_ksettings)
|
||||
return ret;
|
||||
|
||||
if (!rtnl_trylock())
|
||||
return restart_syscall();
|
||||
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (netif_running(netdev)) {
|
||||
struct ethtool_link_ksettings cmd;
|
||||
|
|
@ -284,13 +367,14 @@ static ssize_t duplex_show(struct device *dev,
|
|||
int ret = -EINVAL;
|
||||
|
||||
/* The check is also done in __ethtool_get_link_ksettings; this helps
|
||||
* returning early without hitting the trylock/restart below.
|
||||
* returning early without hitting the locking section below.
|
||||
*/
|
||||
if (!netdev->ethtool_ops->get_link_ksettings)
|
||||
return ret;
|
||||
|
||||
if (!rtnl_trylock())
|
||||
return restart_syscall();
|
||||
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (netif_running(netdev)) {
|
||||
struct ethtool_link_ksettings cmd;
|
||||
|
|
@ -490,16 +574,15 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
|
|||
if (len > 0 && buf[len - 1] == '\n')
|
||||
--count;
|
||||
|
||||
if (!rtnl_trylock())
|
||||
return restart_syscall();
|
||||
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (dev_isalive(netdev)) {
|
||||
ret = dev_set_alias(netdev, buf, count);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
ret = len;
|
||||
netdev_state_change(netdev);
|
||||
}
|
||||
ret = dev_set_alias(netdev, buf, count);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
ret = len;
|
||||
netdev_state_change(netdev);
|
||||
err:
|
||||
rtnl_unlock();
|
||||
|
||||
|
|
@ -551,24 +634,23 @@ static ssize_t phys_port_id_show(struct device *dev,
|
|||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct net_device *netdev = to_net_dev(dev);
|
||||
struct netdev_phys_item_id ppid;
|
||||
ssize_t ret = -EINVAL;
|
||||
|
||||
/* The check is also done in dev_get_phys_port_id; this helps returning
|
||||
* early without hitting the trylock/restart below.
|
||||
* early without hitting the locking section below.
|
||||
*/
|
||||
if (!netdev->netdev_ops->ndo_get_phys_port_id)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!rtnl_trylock())
|
||||
return restart_syscall();
|
||||
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (dev_isalive(netdev)) {
|
||||
struct netdev_phys_item_id ppid;
|
||||
ret = dev_get_phys_port_id(netdev, &ppid);
|
||||
if (!ret)
|
||||
ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
|
||||
|
||||
ret = dev_get_phys_port_id(netdev, &ppid);
|
||||
if (!ret)
|
||||
ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
|
||||
}
|
||||
rtnl_unlock();
|
||||
|
||||
return ret;
|
||||
|
|
@ -580,24 +662,23 @@ static ssize_t phys_port_name_show(struct device *dev,
|
|||
{
|
||||
struct net_device *netdev = to_net_dev(dev);
|
||||
ssize_t ret = -EINVAL;
|
||||
char name[IFNAMSIZ];
|
||||
|
||||
/* The checks are also done in dev_get_phys_port_name; this helps
|
||||
* returning early without hitting the trylock/restart below.
|
||||
* returning early without hitting the locking section below.
|
||||
*/
|
||||
if (!netdev->netdev_ops->ndo_get_phys_port_name &&
|
||||
!netdev->devlink_port)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!rtnl_trylock())
|
||||
return restart_syscall();
|
||||
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (dev_isalive(netdev)) {
|
||||
char name[IFNAMSIZ];
|
||||
ret = dev_get_phys_port_name(netdev, name, sizeof(name));
|
||||
if (!ret)
|
||||
ret = sysfs_emit(buf, "%s\n", name);
|
||||
|
||||
ret = dev_get_phys_port_name(netdev, name, sizeof(name));
|
||||
if (!ret)
|
||||
ret = sysfs_emit(buf, "%s\n", name);
|
||||
}
|
||||
rtnl_unlock();
|
||||
|
||||
return ret;
|
||||
|
|
@ -608,26 +689,25 @@ static ssize_t phys_switch_id_show(struct device *dev,
|
|||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct net_device *netdev = to_net_dev(dev);
|
||||
struct netdev_phys_item_id ppid = { };
|
||||
ssize_t ret = -EINVAL;
|
||||
|
||||
/* The checks are also done in dev_get_phys_port_name; this helps
|
||||
* returning early without hitting the trylock/restart below. This works
|
||||
* returning early without hitting the locking section below. This works
|
||||
* because recurse is false when calling dev_get_port_parent_id.
|
||||
*/
|
||||
if (!netdev->netdev_ops->ndo_get_port_parent_id &&
|
||||
!netdev->devlink_port)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!rtnl_trylock())
|
||||
return restart_syscall();
|
||||
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (dev_isalive(netdev)) {
|
||||
struct netdev_phys_item_id ppid = { };
|
||||
ret = dev_get_port_parent_id(netdev, &ppid, false);
|
||||
if (!ret)
|
||||
ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
|
||||
|
||||
ret = dev_get_port_parent_id(netdev, &ppid, false);
|
||||
if (!ret)
|
||||
ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
|
||||
}
|
||||
rtnl_unlock();
|
||||
|
||||
return ret;
|
||||
|
|
@ -1108,7 +1188,6 @@ static void rx_queue_get_ownership(const struct kobject *kobj,
|
|||
static const struct kobj_type rx_queue_ktype = {
|
||||
.sysfs_ops = &rx_queue_sysfs_ops,
|
||||
.release = rx_queue_release,
|
||||
.default_groups = rx_queue_default_groups,
|
||||
.namespace = rx_queue_namespace,
|
||||
.get_ownership = rx_queue_get_ownership,
|
||||
};
|
||||
|
|
@ -1131,6 +1210,22 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
|
|||
struct kobject *kobj = &queue->kobj;
|
||||
int error = 0;
|
||||
|
||||
/* Rx queues are cleared in rx_queue_release to allow later
|
||||
* re-registration. This is triggered when their kobj refcount is
|
||||
* dropped.
|
||||
*
|
||||
* If a queue is removed while both a read (or write) operation and a
|
||||
* the re-addition of the same queue are pending (waiting on rntl_lock)
|
||||
* it might happen that the re-addition will execute before the read,
|
||||
* making the initial removal to never happen (queue's kobj refcount
|
||||
* won't drop enough because of the pending read). In such rare case,
|
||||
* return to allow the removal operation to complete.
|
||||
*/
|
||||
if (unlikely(kobj->state_initialized)) {
|
||||
netdev_warn_once(dev, "Cannot re-add rx queues before their removal completed");
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
/* Kobject_put later will trigger rx_queue_release call which
|
||||
* decreases dev refcount: Take that reference here
|
||||
*/
|
||||
|
|
@ -1142,20 +1237,27 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
|
|||
if (error)
|
||||
goto err;
|
||||
|
||||
queue->groups = rx_queue_default_groups;
|
||||
error = sysfs_create_groups(kobj, queue->groups);
|
||||
if (error)
|
||||
goto err;
|
||||
|
||||
if (dev->sysfs_rx_queue_group) {
|
||||
error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
|
||||
if (error)
|
||||
goto err;
|
||||
goto err_default_groups;
|
||||
}
|
||||
|
||||
error = rx_queue_default_mask(dev, queue);
|
||||
if (error)
|
||||
goto err;
|
||||
goto err_default_groups;
|
||||
|
||||
kobject_uevent(kobj, KOBJ_ADD);
|
||||
|
||||
return error;
|
||||
|
||||
err_default_groups:
|
||||
sysfs_remove_groups(kobj, queue->groups);
|
||||
err:
|
||||
kobject_put(kobj);
|
||||
return error;
|
||||
|
|
@ -1200,12 +1302,14 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
|
|||
}
|
||||
|
||||
while (--i >= new_num) {
|
||||
struct kobject *kobj = &dev->_rx[i].kobj;
|
||||
struct netdev_rx_queue *queue = &dev->_rx[i];
|
||||
struct kobject *kobj = &queue->kobj;
|
||||
|
||||
if (!refcount_read(&dev_net(dev)->ns.count))
|
||||
kobj->uevent_suppress = 1;
|
||||
if (dev->sysfs_rx_queue_group)
|
||||
sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
|
||||
sysfs_remove_groups(kobj, queue->groups);
|
||||
kobject_put(kobj);
|
||||
}
|
||||
|
||||
|
|
@ -1244,9 +1348,11 @@ static int net_rx_queue_change_owner(struct net_device *dev, int num,
|
|||
*/
|
||||
struct netdev_queue_attribute {
|
||||
struct attribute attr;
|
||||
ssize_t (*show)(struct netdev_queue *queue, char *buf);
|
||||
ssize_t (*store)(struct netdev_queue *queue,
|
||||
const char *buf, size_t len);
|
||||
ssize_t (*show)(struct kobject *kobj, struct attribute *attr,
|
||||
struct netdev_queue *queue, char *buf);
|
||||
ssize_t (*store)(struct kobject *kobj, struct attribute *attr,
|
||||
struct netdev_queue *queue, const char *buf,
|
||||
size_t len);
|
||||
};
|
||||
#define to_netdev_queue_attr(_attr) \
|
||||
container_of(_attr, struct netdev_queue_attribute, attr)
|
||||
|
|
@ -1263,7 +1369,7 @@ static ssize_t netdev_queue_attr_show(struct kobject *kobj,
|
|||
if (!attribute->show)
|
||||
return -EIO;
|
||||
|
||||
return attribute->show(queue, buf);
|
||||
return attribute->show(kobj, attr, queue, buf);
|
||||
}
|
||||
|
||||
static ssize_t netdev_queue_attr_store(struct kobject *kobj,
|
||||
|
|
@ -1277,7 +1383,7 @@ static ssize_t netdev_queue_attr_store(struct kobject *kobj,
|
|||
if (!attribute->store)
|
||||
return -EIO;
|
||||
|
||||
return attribute->store(queue, buf, count);
|
||||
return attribute->store(kobj, attr, queue, buf, count);
|
||||
}
|
||||
|
||||
static const struct sysfs_ops netdev_queue_sysfs_ops = {
|
||||
|
|
@ -1285,7 +1391,8 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = {
|
|||
.store = netdev_queue_attr_store,
|
||||
};
|
||||
|
||||
static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf)
|
||||
static ssize_t tx_timeout_show(struct kobject *kobj, struct attribute *attr,
|
||||
struct netdev_queue *queue, char *buf)
|
||||
{
|
||||
unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout);
|
||||
|
||||
|
|
@ -1303,18 +1410,18 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
|
|||
return i;
|
||||
}
|
||||
|
||||
static ssize_t traffic_class_show(struct netdev_queue *queue,
|
||||
char *buf)
|
||||
static ssize_t traffic_class_show(struct kobject *kobj, struct attribute *attr,
|
||||
struct netdev_queue *queue, char *buf)
|
||||
{
|
||||
struct net_device *dev = queue->dev;
|
||||
int num_tc, tc;
|
||||
int index;
|
||||
int num_tc, tc, index, ret;
|
||||
|
||||
if (!netif_is_multiqueue(dev))
|
||||
return -ENOENT;
|
||||
|
||||
if (!rtnl_trylock())
|
||||
return restart_syscall();
|
||||
ret = sysfs_rtnl_lock(kobj, attr, queue->dev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
index = get_netdev_queue_index(queue);
|
||||
|
||||
|
|
@ -1341,24 +1448,25 @@ static ssize_t traffic_class_show(struct netdev_queue *queue,
|
|||
}
|
||||
|
||||
#ifdef CONFIG_XPS
|
||||
static ssize_t tx_maxrate_show(struct netdev_queue *queue,
|
||||
char *buf)
|
||||
static ssize_t tx_maxrate_show(struct kobject *kobj, struct attribute *attr,
|
||||
struct netdev_queue *queue, char *buf)
|
||||
{
|
||||
return sysfs_emit(buf, "%lu\n", queue->tx_maxrate);
|
||||
}
|
||||
|
||||
static ssize_t tx_maxrate_store(struct netdev_queue *queue,
|
||||
const char *buf, size_t len)
|
||||
static ssize_t tx_maxrate_store(struct kobject *kobj, struct attribute *attr,
|
||||
struct netdev_queue *queue, const char *buf,
|
||||
size_t len)
|
||||
{
|
||||
struct net_device *dev = queue->dev;
|
||||
int err, index = get_netdev_queue_index(queue);
|
||||
struct net_device *dev = queue->dev;
|
||||
u32 rate = 0;
|
||||
|
||||
if (!capable(CAP_NET_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
/* The check is also done later; this helps returning early without
|
||||
* hitting the trylock/restart below.
|
||||
* hitting the locking section below.
|
||||
*/
|
||||
if (!dev->netdev_ops->ndo_set_tx_maxrate)
|
||||
return -EOPNOTSUPP;
|
||||
|
|
@ -1367,18 +1475,21 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue,
|
|||
if (err < 0)
|
||||
return err;
|
||||
|
||||
if (!rtnl_trylock())
|
||||
return restart_syscall();
|
||||
err = sysfs_rtnl_lock(kobj, attr, dev);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = -EOPNOTSUPP;
|
||||
if (dev->netdev_ops->ndo_set_tx_maxrate)
|
||||
err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
|
||||
|
||||
rtnl_unlock();
|
||||
if (!err) {
|
||||
queue->tx_maxrate = rate;
|
||||
rtnl_unlock();
|
||||
return len;
|
||||
}
|
||||
|
||||
rtnl_unlock();
|
||||
return err;
|
||||
}
|
||||
|
||||
|
|
@ -1422,16 +1533,17 @@ static ssize_t bql_set(const char *buf, const size_t count,
|
|||
return count;
|
||||
}
|
||||
|
||||
static ssize_t bql_show_hold_time(struct netdev_queue *queue,
|
||||
char *buf)
|
||||
static ssize_t bql_show_hold_time(struct kobject *kobj, struct attribute *attr,
|
||||
struct netdev_queue *queue, char *buf)
|
||||
{
|
||||
struct dql *dql = &queue->dql;
|
||||
|
||||
return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
|
||||
}
|
||||
|
||||
static ssize_t bql_set_hold_time(struct netdev_queue *queue,
|
||||
const char *buf, size_t len)
|
||||
static ssize_t bql_set_hold_time(struct kobject *kobj, struct attribute *attr,
|
||||
struct netdev_queue *queue, const char *buf,
|
||||
size_t len)
|
||||
{
|
||||
struct dql *dql = &queue->dql;
|
||||
unsigned int value;
|
||||
|
|
@ -1450,15 +1562,17 @@ static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
|
|||
= __ATTR(hold_time, 0644,
|
||||
bql_show_hold_time, bql_set_hold_time);
|
||||
|
||||
static ssize_t bql_show_stall_thrs(struct netdev_queue *queue, char *buf)
|
||||
static ssize_t bql_show_stall_thrs(struct kobject *kobj, struct attribute *attr,
|
||||
struct netdev_queue *queue, char *buf)
|
||||
{
|
||||
struct dql *dql = &queue->dql;
|
||||
|
||||
return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs));
|
||||
}
|
||||
|
||||
static ssize_t bql_set_stall_thrs(struct netdev_queue *queue,
|
||||
const char *buf, size_t len)
|
||||
static ssize_t bql_set_stall_thrs(struct kobject *kobj, struct attribute *attr,
|
||||
struct netdev_queue *queue, const char *buf,
|
||||
size_t len)
|
||||
{
|
||||
struct dql *dql = &queue->dql;
|
||||
unsigned int value;
|
||||
|
|
@ -1484,13 +1598,15 @@ static ssize_t bql_set_stall_thrs(struct netdev_queue *queue,
|
|||
static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init =
|
||||
__ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs);
|
||||
|
||||
static ssize_t bql_show_stall_max(struct netdev_queue *queue, char *buf)
|
||||
static ssize_t bql_show_stall_max(struct kobject *kobj, struct attribute *attr,
|
||||
struct netdev_queue *queue, char *buf)
|
||||
{
|
||||
return sysfs_emit(buf, "%u\n", READ_ONCE(queue->dql.stall_max));
|
||||
}
|
||||
|
||||
static ssize_t bql_set_stall_max(struct netdev_queue *queue,
|
||||
const char *buf, size_t len)
|
||||
static ssize_t bql_set_stall_max(struct kobject *kobj, struct attribute *attr,
|
||||
struct netdev_queue *queue, const char *buf,
|
||||
size_t len)
|
||||
{
|
||||
WRITE_ONCE(queue->dql.stall_max, 0);
|
||||
return len;
|
||||
|
|
@ -1499,7 +1615,8 @@ static ssize_t bql_set_stall_max(struct netdev_queue *queue,
|
|||
static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init =
|
||||
__ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max);
|
||||
|
||||
static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf)
|
||||
static ssize_t bql_show_stall_cnt(struct kobject *kobj, struct attribute *attr,
|
||||
struct netdev_queue *queue, char *buf)
|
||||
{
|
||||
struct dql *dql = &queue->dql;
|
||||
|
||||
|
|
@ -1509,8 +1626,8 @@ static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf)
|
|||
static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init =
|
||||
__ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL);
|
||||
|
||||
static ssize_t bql_show_inflight(struct netdev_queue *queue,
|
||||
char *buf)
|
||||
static ssize_t bql_show_inflight(struct kobject *kobj, struct attribute *attr,
|
||||
struct netdev_queue *queue, char *buf)
|
||||
{
|
||||
struct dql *dql = &queue->dql;
|
||||
|
||||
|
|
@ -1521,13 +1638,16 @@ static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
|
|||
__ATTR(inflight, 0444, bql_show_inflight, NULL);
|
||||
|
||||
#define BQL_ATTR(NAME, FIELD) \
|
||||
static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \
|
||||
char *buf) \
|
||||
static ssize_t bql_show_ ## NAME(struct kobject *kobj, \
|
||||
struct attribute *attr, \
|
||||
struct netdev_queue *queue, char *buf) \
|
||||
{ \
|
||||
return bql_show(buf, queue->dql.FIELD); \
|
||||
} \
|
||||
\
|
||||
static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \
|
||||
static ssize_t bql_set_ ## NAME(struct kobject *kobj, \
|
||||
struct attribute *attr, \
|
||||
struct netdev_queue *queue, \
|
||||
const char *buf, size_t len) \
|
||||
{ \
|
||||
return bql_set(buf, len, &queue->dql.FIELD); \
|
||||
|
|
@ -1613,19 +1733,21 @@ static ssize_t xps_queue_show(struct net_device *dev, unsigned int index,
|
|||
return len < PAGE_SIZE ? len : -EINVAL;
|
||||
}
|
||||
|
||||
static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf)
|
||||
static ssize_t xps_cpus_show(struct kobject *kobj, struct attribute *attr,
|
||||
struct netdev_queue *queue, char *buf)
|
||||
{
|
||||
struct net_device *dev = queue->dev;
|
||||
unsigned int index;
|
||||
int len, tc;
|
||||
int len, tc, ret;
|
||||
|
||||
if (!netif_is_multiqueue(dev))
|
||||
return -ENOENT;
|
||||
|
||||
index = get_netdev_queue_index(queue);
|
||||
|
||||
if (!rtnl_trylock())
|
||||
return restart_syscall();
|
||||
ret = sysfs_rtnl_lock(kobj, attr, queue->dev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* If queue belongs to subordinate dev use its map */
|
||||
dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
|
||||
|
|
@ -1636,18 +1758,21 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Make sure the subordinate device can't be freed */
|
||||
get_device(&dev->dev);
|
||||
/* Increase the net device refcnt to make sure it won't be freed while
|
||||
* xps_queue_show is running.
|
||||
*/
|
||||
dev_hold(dev);
|
||||
rtnl_unlock();
|
||||
|
||||
len = xps_queue_show(dev, index, tc, buf, XPS_CPUS);
|
||||
|
||||
put_device(&dev->dev);
|
||||
dev_put(dev);
|
||||
return len;
|
||||
}
|
||||
|
||||
static ssize_t xps_cpus_store(struct netdev_queue *queue,
|
||||
const char *buf, size_t len)
|
||||
static ssize_t xps_cpus_store(struct kobject *kobj, struct attribute *attr,
|
||||
struct netdev_queue *queue, const char *buf,
|
||||
size_t len)
|
||||
{
|
||||
struct net_device *dev = queue->dev;
|
||||
unsigned int index;
|
||||
|
|
@ -1671,9 +1796,10 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
|
|||
return err;
|
||||
}
|
||||
|
||||
if (!rtnl_trylock()) {
|
||||
err = sysfs_rtnl_lock(kobj, attr, dev);
|
||||
if (err) {
|
||||
free_cpumask_var(mask);
|
||||
return restart_syscall();
|
||||
return err;
|
||||
}
|
||||
|
||||
err = netif_set_xps_queue(dev, mask, index);
|
||||
|
|
@ -1687,26 +1813,34 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
|
|||
static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
|
||||
= __ATTR_RW(xps_cpus);
|
||||
|
||||
static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
|
||||
static ssize_t xps_rxqs_show(struct kobject *kobj, struct attribute *attr,
|
||||
struct netdev_queue *queue, char *buf)
|
||||
{
|
||||
struct net_device *dev = queue->dev;
|
||||
unsigned int index;
|
||||
int tc;
|
||||
int tc, ret;
|
||||
|
||||
index = get_netdev_queue_index(queue);
|
||||
|
||||
if (!rtnl_trylock())
|
||||
return restart_syscall();
|
||||
ret = sysfs_rtnl_lock(kobj, attr, dev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
tc = netdev_txq_to_tc(dev, index);
|
||||
rtnl_unlock();
|
||||
if (tc < 0)
|
||||
return -EINVAL;
|
||||
|
||||
return xps_queue_show(dev, index, tc, buf, XPS_RXQS);
|
||||
/* Increase the net device refcnt to make sure it won't be freed while
|
||||
* xps_queue_show is running.
|
||||
*/
|
||||
dev_hold(dev);
|
||||
rtnl_unlock();
|
||||
|
||||
ret = tc >= 0 ? xps_queue_show(dev, index, tc, buf, XPS_RXQS) : -EINVAL;
|
||||
dev_put(dev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
|
||||
static ssize_t xps_rxqs_store(struct kobject *kobj, struct attribute *attr,
|
||||
struct netdev_queue *queue, const char *buf,
|
||||
size_t len)
|
||||
{
|
||||
struct net_device *dev = queue->dev;
|
||||
|
|
@ -1730,9 +1864,10 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
|
|||
return err;
|
||||
}
|
||||
|
||||
if (!rtnl_trylock()) {
|
||||
err = sysfs_rtnl_lock(kobj, attr, dev);
|
||||
if (err) {
|
||||
bitmap_free(mask);
|
||||
return restart_syscall();
|
||||
return err;
|
||||
}
|
||||
|
||||
cpus_read_lock();
|
||||
|
|
@ -1792,7 +1927,6 @@ static void netdev_queue_get_ownership(const struct kobject *kobj,
|
|||
static const struct kobj_type netdev_queue_ktype = {
|
||||
.sysfs_ops = &netdev_queue_sysfs_ops,
|
||||
.release = netdev_queue_release,
|
||||
.default_groups = netdev_queue_default_groups,
|
||||
.namespace = netdev_queue_namespace,
|
||||
.get_ownership = netdev_queue_get_ownership,
|
||||
};
|
||||
|
|
@ -1811,6 +1945,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
|
|||
struct kobject *kobj = &queue->kobj;
|
||||
int error = 0;
|
||||
|
||||
/* Tx queues are cleared in netdev_queue_release to allow later
|
||||
* re-registration. This is triggered when their kobj refcount is
|
||||
* dropped.
|
||||
*
|
||||
* If a queue is removed while both a read (or write) operation and a
|
||||
* the re-addition of the same queue are pending (waiting on rntl_lock)
|
||||
* it might happen that the re-addition will execute before the read,
|
||||
* making the initial removal to never happen (queue's kobj refcount
|
||||
* won't drop enough because of the pending read). In such rare case,
|
||||
* return to allow the removal operation to complete.
|
||||
*/
|
||||
if (unlikely(kobj->state_initialized)) {
|
||||
netdev_warn_once(dev, "Cannot re-add tx queues before their removal completed");
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
/* Kobject_put later will trigger netdev_queue_release call
|
||||
* which decreases dev refcount: Take that reference here
|
||||
*/
|
||||
|
|
@ -1822,15 +1972,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
|
|||
if (error)
|
||||
goto err;
|
||||
|
||||
queue->groups = netdev_queue_default_groups;
|
||||
error = sysfs_create_groups(kobj, queue->groups);
|
||||
if (error)
|
||||
goto err;
|
||||
|
||||
if (netdev_uses_bql(dev)) {
|
||||
error = sysfs_create_group(kobj, &dql_group);
|
||||
if (error)
|
||||
goto err;
|
||||
goto err_default_groups;
|
||||
}
|
||||
|
||||
kobject_uevent(kobj, KOBJ_ADD);
|
||||
return 0;
|
||||
|
||||
err_default_groups:
|
||||
sysfs_remove_groups(kobj, queue->groups);
|
||||
err:
|
||||
kobject_put(kobj);
|
||||
return error;
|
||||
|
|
@ -1885,6 +2042,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
|
|||
if (netdev_uses_bql(dev))
|
||||
sysfs_remove_group(&queue->kobj, &dql_group);
|
||||
|
||||
sysfs_remove_groups(&queue->kobj, queue->groups);
|
||||
kobject_put(&queue->kobj);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -80,6 +80,11 @@ void rtnl_lock(void)
|
|||
}
|
||||
EXPORT_SYMBOL(rtnl_lock);
|
||||
|
||||
int rtnl_lock_interruptible(void)
|
||||
{
|
||||
return mutex_lock_interruptible(&rtnl_mutex);
|
||||
}
|
||||
|
||||
int rtnl_lock_killable(void)
|
||||
{
|
||||
return mutex_lock_killable(&rtnl_mutex);
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user