Merge branch 'net-skip-taking-rtnl_lock-for-queue-get'

Jakub Kicinski says:

====================
net: skip taking rtnl_lock for queue GET (prep)

Skip taking rtnl_lock for queue GET ops on devices which opt
into running all ops under the instance lock. In preparating
for performing queue ops without rtnl lock clarify the protection
of queue-related fields.

v1: https://lore.kernel.org/20250312223507.805719-1-kuba@kernel.org
====================

Link: https://patch.msgid.link/20250324224537.248800-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2025-03-25 10:04:55 -07:00
commit 7bd2e6b74a
9 changed files with 65 additions and 35 deletions

View File

@ -710,7 +710,7 @@ struct netdev_queue {
* slow- / control-path part
*/
/* NAPI instance for the queue
* Readers and writers must hold RTNL
* "ops protected", see comment about net_device::lock
*/
struct napi_struct *napi;
@ -2496,18 +2496,38 @@ struct net_device {
* Should always be taken using netdev_lock() / netdev_unlock() helpers.
* Drivers are free to use it for other protection.
*
* Protects:
* For the drivers that implement shaper or queue API, the scope
* of this lock is expanded to cover most ndo/queue/ethtool/sysfs
* operations. Drivers may opt-in to this behavior by setting
* @request_ops_lock.
*
* @lock protection mixes with rtnl_lock in multiple ways, fields are
* either:
*
* - simply protected by the instance @lock;
*
* - double protected - writers hold both locks, readers hold either;
*
* - ops protected - protected by the lock held around the NDOs
* and other callbacks, that is the instance lock on devices for
* which netdev_need_ops_lock() returns true, otherwise by rtnl_lock;
*
* - double ops protected - always protected by rtnl_lock but for
* devices for which netdev_need_ops_lock() returns true - also
* the instance lock.
*
* Simply protects:
* @gro_flush_timeout, @napi_defer_hard_irqs, @napi_list,
* @net_shaper_hierarchy, @reg_state, @threaded
*
* Partially protects (writers must hold both @lock and rtnl_lock):
* Double protects:
* @up
*
* Also protects some fields in struct napi_struct.
* Double ops protects:
* @real_num_rx_queues, @real_num_tx_queues
*
* For the drivers that implement shaper or queue API, the scope
* of this lock is expanded to cover most ndo/queue/ethtool/sysfs
* operations.
* Also protects some fields in:
* struct napi_struct, struct netdev_queue, struct netdev_rx_queue
*
* Ordering: take after rtnl_lock.
*/
@ -4062,17 +4082,7 @@ static inline bool netif_is_multiqueue(const struct net_device *dev)
}
int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq);
#ifdef CONFIG_SYSFS
int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq);
#else
static inline int netif_set_real_num_rx_queues(struct net_device *dev,
unsigned int rxqs)
{
dev->real_num_rx_queues = rxqs;
return 0;
}
#endif
int netif_set_real_num_queues(struct net_device *dev,
unsigned int txq, unsigned int rxq);

View File

@ -5,25 +5,27 @@
#include <linux/lockdep.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
static inline bool netdev_trylock(struct net_device *dev)
{
return mutex_trylock(&dev->lock);
}
static inline void netdev_assert_locked(struct net_device *dev)
static inline void netdev_assert_locked(const struct net_device *dev)
{
lockdep_assert_held(&dev->lock);
}
static inline void netdev_assert_locked_or_invisible(struct net_device *dev)
static inline void
netdev_assert_locked_or_invisible(const struct net_device *dev)
{
if (dev->reg_state == NETREG_REGISTERED ||
dev->reg_state == NETREG_UNREGISTERING)
netdev_assert_locked(dev);
}
static inline bool netdev_need_ops_lock(struct net_device *dev)
static inline bool netdev_need_ops_lock(const struct net_device *dev)
{
bool ret = dev->request_ops_lock || !!dev->queue_mgmt_ops;
@ -46,10 +48,20 @@ static inline void netdev_unlock_ops(struct net_device *dev)
netdev_unlock(dev);
}
static inline void netdev_ops_assert_locked(struct net_device *dev)
static inline void netdev_ops_assert_locked(const struct net_device *dev)
{
if (netdev_need_ops_lock(dev))
lockdep_assert_held(&dev->lock);
else
ASSERT_RTNL();
}
static inline void
netdev_ops_assert_locked_or_invisible(const struct net_device *dev)
{
if (dev->reg_state == NETREG_REGISTERED ||
dev->reg_state == NETREG_UNREGISTERING)
netdev_ops_assert_locked(dev);
}
static inline int netdev_lock_cmp_fn(const struct lockdep_map *a,

View File

@ -24,7 +24,7 @@ struct netdev_rx_queue {
struct xsk_buff_pool *pool;
#endif
/* NAPI instance for the queue
* Readers and writers must hold RTNL
* "ops protected", see comment about net_device::lock
*/
struct napi_struct *napi;
struct pp_memory_provider_params mp_params;

View File

@ -3130,6 +3130,7 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
if (dev->reg_state == NETREG_REGISTERED ||
dev->reg_state == NETREG_UNREGISTERING) {
ASSERT_RTNL();
netdev_ops_assert_locked(dev);
rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
txq);
@ -3160,7 +3161,6 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
}
EXPORT_SYMBOL(netif_set_real_num_tx_queues);
#ifdef CONFIG_SYSFS
/**
* netif_set_real_num_rx_queues - set actual number of RX queues used
* @dev: Network device
@ -3180,6 +3180,7 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
if (dev->reg_state == NETREG_REGISTERED) {
ASSERT_RTNL();
netdev_ops_assert_locked(dev);
rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
rxq);
@ -3191,7 +3192,6 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
return 0;
}
EXPORT_SYMBOL(netif_set_real_num_rx_queues);
#endif
/**
* netif_set_real_num_queues - set actual number of RX and TX queues used
@ -6901,8 +6901,7 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
if (WARN_ON_ONCE(napi && !napi->dev))
return;
if (dev->reg_state >= NETREG_REGISTERED)
ASSERT_RTNL();
netdev_ops_assert_locked_or_invisible(dev);
switch (type) {
case NETDEV_QUEUE_TYPE_RX:
@ -10359,7 +10358,7 @@ u32 dev_get_min_mp_channel_count(const struct net_device *dev)
{
int i;
ASSERT_RTNL();
netdev_ops_assert_locked(dev);
for (i = dev->real_num_rx_queues - 1; i >= 0; i--)
if (dev->_rx[i].mp_params.mp_priv)
@ -11963,9 +11962,9 @@ void unregister_netdevice_many_notify(struct list_head *head,
dev_tcx_uninstall(dev);
netdev_lock_ops(dev);
dev_xdp_uninstall(dev);
dev_memory_provider_uninstall(dev);
netdev_unlock_ops(dev);
bpf_dev_bound_netdev_unregister(dev);
dev_memory_provider_uninstall(dev);
netdev_offload_xstats_disable_all(dev);

View File

@ -128,12 +128,10 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
rxq->mp_params.mp_priv = NULL;
rxq->mp_params.mp_ops = NULL;
netdev_lock(binding->dev);
rxq_idx = get_netdev_rx_queue_index(rxq);
err = netdev_rx_queue_restart(binding->dev, rxq_idx);
WARN_ON(err && err != -ENETDOWN);
netdev_unlock(binding->dev);
}
xa_erase(&net_devmem_dmabuf_bindings, binding->id);

View File

@ -2148,8 +2148,10 @@ static void remove_queue_kobjects(struct net_device *dev)
net_rx_queue_update_kobjects(dev, real_rx, 0);
netdev_queue_update_kobjects(dev, real_tx, 0);
netdev_lock_ops(dev);
dev->real_num_rx_queues = 0;
dev->real_num_tx_queues = 0;
netdev_unlock_ops(dev);
#ifdef CONFIG_SYSFS
kset_unregister(dev->queues_kset);
#endif

View File

@ -867,6 +867,13 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
goto err_unlock_sock;
}
if (!netdev_need_ops_lock(netdev)) {
err = -EOPNOTSUPP;
NL_SET_BAD_ATTR(info->extack,
info->attrs[NETDEV_A_DEV_IFINDEX]);
goto err_unlock;
}
if (dev_xdp_prog_count(netdev)) {
NL_SET_ERR_MSG(info->extack, "unable to bind dmabuf to device with XDP program attached");
err = -EEXIST;
@ -947,7 +954,9 @@ void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv)
mutex_lock(&priv->lock);
list_for_each_entry_safe(binding, temp, &priv->bindings, list) {
netdev_lock(binding->dev);
net_devmem_unbind_dmabuf(binding);
netdev_unlock(binding->dev);
}
mutex_unlock(&priv->lock);
}

View File

@ -92,6 +92,9 @@ static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
struct netdev_rx_queue *rxq;
int ret;
if (!netdev_need_ops_lock(dev))
return -EOPNOTSUPP;
if (ifq_idx >= dev->real_num_rx_queues)
return -EINVAL;
ifq_idx = array_index_nospec(ifq_idx, dev->real_num_rx_queues);

View File

@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/device.h>
#include <net/netdev_lock.h>
#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
#include <net/page_pool/memory_provider.h>
@ -279,11 +280,7 @@ static int page_pool_init(struct page_pool *pool,
get_device(pool->p.dev);
if (pool->slow.flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) {
/* We rely on rtnl_lock()ing to make sure netdev_rx_queue
* configuration doesn't change while we're initializing
* the page_pool.
*/
ASSERT_RTNL();
netdev_assert_locked(pool->slow.netdev);
rxq = __netif_get_rx_queue(pool->slow.netdev,
pool->slow.queue_idx);
pool->mp_priv = rxq->mp_params.mp_priv;