From ba6f418fbf64bb8c0e98dc1b548c151beeedd16c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 24 Mar 2025 15:45:27 -0700 Subject: [PATCH 1/7] net: bubble up taking netdev instance lock to callers of net_devmem_unbind_dmabuf() A recent commit added taking the netdev instance lock in netdev_nl_bind_rx_doit(), but didn't remove it in net_devmem_unbind_dmabuf() which it calls from an error path. Always expect the callers of net_devmem_unbind_dmabuf() to hold the lock. This is consistent with net_devmem_bind_dmabuf(). (Not so) coincidentally this also protects mp_param with the instance lock, which the rest of this series needs. Fixes: 1d22d3060b9b ("net: drop rtnl_lock for queue_mgmt operations") Reviewed-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250324224537.248800-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/devmem.c | 2 -- net/core/netdev-genl.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/devmem.c b/net/core/devmem.c index 6802e82a4d03..ee145a2aa41c 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -128,12 +128,10 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) rxq->mp_params.mp_priv = NULL; rxq->mp_params.mp_ops = NULL; - netdev_lock(binding->dev); rxq_idx = get_netdev_rx_queue_index(rxq); err = netdev_rx_queue_restart(binding->dev, rxq_idx); WARN_ON(err && err != -ENETDOWN); - netdev_unlock(binding->dev); } xa_erase(&net_devmem_dmabuf_bindings, binding->id); diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index a186fea63c09..9e4882a22407 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -947,7 +947,9 @@ void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv) mutex_lock(&priv->lock); list_for_each_entry_safe(binding, temp, &priv->bindings, list) { + netdev_lock(binding->dev); net_devmem_unbind_dmabuf(binding); + netdev_unlock(binding->dev); } mutex_unlock(&priv->lock); } From bae2da826196ff4ab439b57683dce883e274faef Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 24 Mar 2025 15:45:28 -0700 Subject: [PATCH 2/7] net: remove netif_set_real_num_rx_queues() helper for when SYSFS=n Since commit a953be53ce40 ("net-sysfs: add support for device-specific rx queue sysfs attributes"), so for at least a decade now it is safe to call net_rx_queue_update_kobjects() when SYSFS=n. That function does its own ifdef-inery and will return 0. Remove the unnecessary stub for netif_set_real_num_rx_queues(). Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250324224537.248800-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 10 ---------- net/core/dev.c | 2 -- 2 files changed, 12 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f22cca7c03ad..55859c565f84 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4062,17 +4062,7 @@ static inline bool netif_is_multiqueue(const struct net_device *dev) } int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq); - -#ifdef CONFIG_SYSFS int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq); -#else -static inline int netif_set_real_num_rx_queues(struct net_device *dev, - unsigned int rxqs) -{ - dev->real_num_rx_queues = rxqs; - return 0; -} -#endif int netif_set_real_num_queues(struct net_device *dev, unsigned int txq, unsigned int rxq); diff --git a/net/core/dev.c b/net/core/dev.c index b6b1c7898281..6295f00e97a7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3160,7 +3160,6 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) } EXPORT_SYMBOL(netif_set_real_num_tx_queues); -#ifdef CONFIG_SYSFS /** * netif_set_real_num_rx_queues - set actual number of RX queues used * @dev: Network device @@ -3191,7 +3190,6 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) return 0; } EXPORT_SYMBOL(netif_set_real_num_rx_queues); -#endif /** * netif_set_real_num_queues - set actual number of RX and TX queues used From e2f81e8f4d0c3109e1a18620c931fe16bfb235ef Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 24 Mar 2025 15:45:29 -0700 Subject: [PATCH 3/7] net: constify dev pointer in misc instance lock helpers lockdep asserts and predicates can operate on const pointers. In the future this will let us add asserts in functions which operate on const pointers like dev_get_min_mp_channel_count(). Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250324224537.248800-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/netdev_lock.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index 99631fbd7f54..689ffdfae50d 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -11,19 +11,20 @@ static inline bool netdev_trylock(struct net_device *dev) return mutex_trylock(&dev->lock); } -static inline void netdev_assert_locked(struct net_device *dev) +static inline void netdev_assert_locked(const struct net_device *dev) { lockdep_assert_held(&dev->lock); } -static inline void netdev_assert_locked_or_invisible(struct net_device *dev) +static inline void +netdev_assert_locked_or_invisible(const struct net_device *dev) { if (dev->reg_state == NETREG_REGISTERED || dev->reg_state == NETREG_UNREGISTERING) netdev_assert_locked(dev); } -static inline bool netdev_need_ops_lock(struct net_device *dev) +static inline bool netdev_need_ops_lock(const struct net_device *dev) { bool ret = dev->request_ops_lock || !!dev->queue_mgmt_ops; @@ -46,7 +47,7 @@ static inline void netdev_unlock_ops(struct net_device *dev) netdev_unlock(dev); } -static inline void netdev_ops_assert_locked(struct net_device *dev) +static inline void netdev_ops_assert_locked(const struct net_device *dev) { if (netdev_need_ops_lock(dev)) lockdep_assert_held(&dev->lock); From 4b702f8b72c7b05daa1b763fdc0840aa78178c3a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 24 Mar 2025 15:45:30 -0700 Subject: [PATCH 4/7] net: explain "protection types" for the instance lock Try to define some terminology for which fields are protected by which lock and how. Some fields are protected by both rtnl_lock and instance lock which is hard to talk about without having a "key phrase" to refer to a particular protection scheme. "ops protected" fields are defined later in the series, one by one. Add ASSERT_RTNL() to netdev_ops_assert_locked() for drivers not other instance protection of ops. Hopefully it's not too confusion that netdev_lock_ops() does not match the lock which netdev_ops_assert_locked() will assert, exactly. The noun "ops" is in a different place in the name, so I think it's acceptable... Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250324224537.248800-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 28 ++++++++++++++++++++++------ include/net/netdev_lock.h | 3 +++ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 55859c565f84..2b91fb96a411 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2496,19 +2496,35 @@ struct net_device { * Should always be taken using netdev_lock() / netdev_unlock() helpers. * Drivers are free to use it for other protection. * - * Protects: + * For the drivers that implement shaper or queue API, the scope + * of this lock is expanded to cover most ndo/queue/ethtool/sysfs + * operations. Drivers may opt-in to this behavior by setting + * @request_ops_lock. + * + * @lock protection mixes with rtnl_lock in multiple ways, fields are + * either: + * + * - simply protected by the instance @lock; + * + * - double protected - writers hold both locks, readers hold either; + * + * - ops protected - protected by the lock held around the NDOs + * and other callbacks, that is the instance lock on devices for + * which netdev_need_ops_lock() returns true, otherwise by rtnl_lock; + * + * - double ops protected - always protected by rtnl_lock but for + * devices for which netdev_need_ops_lock() returns true - also + * the instance lock. + * + * Simply protects: * @gro_flush_timeout, @napi_defer_hard_irqs, @napi_list, * @net_shaper_hierarchy, @reg_state, @threaded * - * Partially protects (writers must hold both @lock and rtnl_lock): + * Double protects: * @up * * Also protects some fields in struct napi_struct. * - * For the drivers that implement shaper or queue API, the scope - * of this lock is expanded to cover most ndo/queue/ethtool/sysfs - * operations. - * * Ordering: take after rtnl_lock. */ struct mutex lock; diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index 689ffdfae50d..efd302375ef2 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -5,6 +5,7 @@ #include #include +#include static inline bool netdev_trylock(struct net_device *dev) { @@ -51,6 +52,8 @@ static inline void netdev_ops_assert_locked(const struct net_device *dev) { if (netdev_need_ops_lock(dev)) lockdep_assert_held(&dev->lock); + else + ASSERT_RTNL(); } static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, From 0a65dcf6249b75c841b4218426b0d246a805c7e0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 24 Mar 2025 15:45:31 -0700 Subject: [PATCH 5/7] net: designate queue counts as "double ops protected" by instance lock Drivers which opt into instance lock protection of ops should only call set_real_num_*_queues() under the instance lock. This means that queue counts are double protected (writes are under both rtnl_lock and instance lock, readers under either). Some readers may still be under the rtnl_lock, however, so for now we need double protection of writers. OTOH queue API paths are only under the protection of the instance lock, so we need to validate that the instance is actually locking ops, otherwise the input checks we do against queue count are racy. Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250324224537.248800-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 3 +++ net/core/dev.c | 2 ++ net/core/net-sysfs.c | 2 ++ net/core/netdev-genl.c | 7 +++++++ net/core/netdev_rx_queue.c | 3 +++ 5 files changed, 17 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2b91fb96a411..60ef367d8575 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2523,6 +2523,9 @@ struct net_device { * Double protects: * @up * + * Double ops protects: + * @real_num_rx_queues, @real_num_tx_queues + * * Also protects some fields in struct napi_struct. * * Ordering: take after rtnl_lock. diff --git a/net/core/dev.c b/net/core/dev.c index 6295f00e97a7..2d9be3ecd5e6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3130,6 +3130,7 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (dev->reg_state == NETREG_REGISTERED || dev->reg_state == NETREG_UNREGISTERING) { ASSERT_RTNL(); + netdev_ops_assert_locked(dev); rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, txq); @@ -3179,6 +3180,7 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) if (dev->reg_state == NETREG_REGISTERED) { ASSERT_RTNL(); + netdev_ops_assert_locked(dev); rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues, rxq); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index b6fbe629ccee..1ace0cd01adc 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -2148,8 +2148,10 @@ static void remove_queue_kobjects(struct net_device *dev) net_rx_queue_update_kobjects(dev, real_rx, 0); netdev_queue_update_kobjects(dev, real_tx, 0); + netdev_lock_ops(dev); dev->real_num_rx_queues = 0; dev->real_num_tx_queues = 0; + netdev_unlock_ops(dev); #ifdef CONFIG_SYSFS kset_unregister(dev->queues_kset); #endif diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 9e4882a22407..fd1cfa9707dc 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -867,6 +867,13 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) goto err_unlock_sock; } + if (!netdev_need_ops_lock(netdev)) { + err = -EOPNOTSUPP; + NL_SET_BAD_ATTR(info->extack, + info->attrs[NETDEV_A_DEV_IFINDEX]); + goto err_unlock; + } + if (dev_xdp_prog_count(netdev)) { NL_SET_ERR_MSG(info->extack, "unable to bind dmabuf to device with XDP program attached"); err = -EEXIST; diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index a5b234b33cd5..3af716f77a13 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -92,6 +92,9 @@ static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, struct netdev_rx_queue *rxq; int ret; + if (!netdev_need_ops_lock(dev)) + return -EOPNOTSUPP; + if (ifq_idx >= dev->real_num_rx_queues) return -EINVAL; ifq_idx = array_index_nospec(ifq_idx, dev->real_num_rx_queues); From 310ae9eb2617c62deedef8f121d7ca1ae774fa76 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 24 Mar 2025 15:45:32 -0700 Subject: [PATCH 6/7] net: designate queue -> napi linking as "ops protected" netdev netlink is the only reader of netdev_{,rx_}queue->napi, and it already holds netdev->lock. Switch protection of the writes to netdev->lock to "ops protected". The expectation will be now that accessing queue->napi will require netdev->lock for "ops locked" drivers, and rtnl_lock for all other drivers. Current "ops locked" drivers don't require any changes. gve and netdevsim use _locked() helpers right next to netif_queue_set_napi() so they must be holding the instance lock. iavf doesn't call it. bnxt is a bit messy but all paths seem locked. Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250324224537.248800-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 5 +++-- include/net/netdev_lock.h | 8 ++++++++ include/net/netdev_rx_queue.h | 2 +- net/core/dev.c | 3 +-- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 60ef367d8575..fa79145518d1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -710,7 +710,7 @@ struct netdev_queue { * slow- / control-path part */ /* NAPI instance for the queue - * Readers and writers must hold RTNL + * "ops protected", see comment about net_device::lock */ struct napi_struct *napi; @@ -2526,7 +2526,8 @@ struct net_device { * Double ops protects: * @real_num_rx_queues, @real_num_tx_queues * - * Also protects some fields in struct napi_struct. + * Also protects some fields in: + * struct napi_struct, struct netdev_queue, struct netdev_rx_queue * * Ordering: take after rtnl_lock. */ diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index efd302375ef2..1c0c9a94cc22 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -56,6 +56,14 @@ static inline void netdev_ops_assert_locked(const struct net_device *dev) ASSERT_RTNL(); } +static inline void +netdev_ops_assert_locked_or_invisible(const struct net_device *dev) +{ + if (dev->reg_state == NETREG_REGISTERED || + dev->reg_state == NETREG_UNREGISTERING) + netdev_ops_assert_locked(dev); +} + static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b) { diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index af40842f229d..b2238b551dce 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -24,7 +24,7 @@ struct netdev_rx_queue { struct xsk_buff_pool *pool; #endif /* NAPI instance for the queue - * Readers and writers must hold RTNL + * "ops protected", see comment about net_device::lock */ struct napi_struct *napi; struct pp_memory_provider_params mp_params; diff --git a/net/core/dev.c b/net/core/dev.c index 2d9be3ecd5e6..ab74e1f005d2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6901,8 +6901,7 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index, if (WARN_ON_ONCE(napi && !napi->dev)) return; - if (dev->reg_state >= NETREG_REGISTERED) - ASSERT_RTNL(); + netdev_ops_assert_locked_or_invisible(dev); switch (type) { case NETDEV_QUEUE_TYPE_RX: From b52458652eca5a551ddb55605201b136f091b04d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 24 Mar 2025 15:45:33 -0700 Subject: [PATCH 7/7] net: protect rxq->mp_params with the instance lock Ensure that all accesses to mp_params are under the netdev instance lock. The only change we need is to move dev_memory_provider_uninstall() under the lock. Appropriately swap the asserts. Reviewed-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250324224537.248800-8-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/dev.c | 4 ++-- net/core/page_pool.c | 7 ++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index ab74e1f005d2..b597cc27a115 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10358,7 +10358,7 @@ u32 dev_get_min_mp_channel_count(const struct net_device *dev) { int i; - ASSERT_RTNL(); + netdev_ops_assert_locked(dev); for (i = dev->real_num_rx_queues - 1; i >= 0; i--) if (dev->_rx[i].mp_params.mp_priv) @@ -11962,9 +11962,9 @@ void unregister_netdevice_many_notify(struct list_head *head, dev_tcx_uninstall(dev); netdev_lock_ops(dev); dev_xdp_uninstall(dev); + dev_memory_provider_uninstall(dev); netdev_unlock_ops(dev); bpf_dev_bound_netdev_unregister(dev); - dev_memory_provider_uninstall(dev); netdev_offload_xstats_disable_all(dev); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index acef1fcd8ddc..7745ad924ae2 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -279,11 +280,7 @@ static int page_pool_init(struct page_pool *pool, get_device(pool->p.dev); if (pool->slow.flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) { - /* We rely on rtnl_lock()ing to make sure netdev_rx_queue - * configuration doesn't change while we're initializing - * the page_pool. - */ - ASSERT_RTNL(); + netdev_assert_locked(pool->slow.netdev); rxq = __netif_get_rx_queue(pool->slow.netdev, pool->slow.queue_idx); pool->mp_priv = rxq->mp_params.mp_priv;