From 434efd3d0cdd935d46c7448061537a2adcf8aeab Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Apr 2025 17:32:32 -0700 Subject: [PATCH 1/3] net: Drop hold_rtnl arg from ops_undo_list(). ops_undo_list() first iterates over ops_list for ->pre_exit(). Let's check if any of the ops has ->exit_rtnl() there and drop the hold_rtnl argument. Note that nexthop uses ->exit_rtnl() and is built-in, so hold_rtnl is always true for setup_net() and cleanup_net() for now. Suggested-by: Jakub Kicinski Link: https://lore.kernel.org/netdev/20250414170148.21f3523c@kernel.org/ Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250418003259.48017-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/core/net_namespace.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 0a2b24af4028..48dd6dc603c9 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -220,17 +220,20 @@ static void ops_free_list(const struct pernet_operations *ops, static void ops_undo_list(const struct list_head *ops_list, const struct pernet_operations *ops, struct list_head *net_exit_list, - bool expedite_rcu, bool hold_rtnl) + bool expedite_rcu) { const struct pernet_operations *saved_ops; + bool hold_rtnl = false; if (!ops) ops = list_entry(ops_list, typeof(*ops), list); saved_ops = ops; - list_for_each_entry_continue_reverse(ops, ops_list, list) + list_for_each_entry_continue_reverse(ops, ops_list, list) { + hold_rtnl |= !!ops->exit_rtnl; ops_pre_exit_list(ops, net_exit_list); + } /* Another CPU might be rcu-iterating the list, wait for it. * This needs to be before calling the exit() notifiers, so the @@ -257,11 +260,10 @@ static void ops_undo_list(const struct list_head *ops_list, static void ops_undo_single(struct pernet_operations *ops, struct list_head *net_exit_list) { - bool hold_rtnl = !!ops->exit_rtnl; LIST_HEAD(ops_list); list_add(&ops->list, &ops_list); - ops_undo_list(&ops_list, NULL, net_exit_list, false, hold_rtnl); + ops_undo_list(&ops_list, NULL, net_exit_list, false); list_del(&ops->list); } @@ -452,7 +454,7 @@ static __net_init int setup_net(struct net *net) * for the pernet modules whose init functions did not fail. */ list_add(&net->exit_list, &net_exit_list); - ops_undo_list(&pernet_list, ops, &net_exit_list, false, true); + ops_undo_list(&pernet_list, ops, &net_exit_list, false); rcu_barrier(); goto out; } @@ -681,7 +683,7 @@ static void cleanup_net(struct work_struct *work) list_add_tail(&net->exit_list, &net_exit_list); } - ops_undo_list(&pernet_list, NULL, &net_exit_list, true, true); + ops_undo_list(&pernet_list, NULL, &net_exit_list, true); up_read(&pernet_ops_rwsem); From 81eccc131bc1d53c9f7fa0d8c241589c514adb4e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Apr 2025 17:32:33 -0700 Subject: [PATCH 2/3] pfcp: Convert pfcp_net_exit() to ->exit_rtnl(). pfcp_net_exit() holds RTNL and cleans up all devices in the netns and other devices tied to sockets in the netns. We can use ->exit_rtnl() to save RTNL dance for all dying netns. Note that we delegate the for_each_netdev() part to default_device_exit_batch() to avoid a list corruption splat like the one reported in commit 4ccacf86491d ("gtp: Suppress list corruption splat in gtp_net_exit_batch_rtnl().") Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250418003259.48017-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/pfcp.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/net/pfcp.c b/drivers/net/pfcp.c index f873a92d2445..28e6bc4a1f14 100644 --- a/drivers/net/pfcp.c +++ b/drivers/net/pfcp.c @@ -245,30 +245,21 @@ static int __net_init pfcp_net_init(struct net *net) return 0; } -static void __net_exit pfcp_net_exit(struct net *net) +static void __net_exit pfcp_net_exit_rtnl(struct net *net, + struct list_head *dev_to_kill) { struct pfcp_net *pn = net_generic(net, pfcp_net_id); struct pfcp_dev *pfcp, *pfcp_next; - struct net_device *dev; - LIST_HEAD(list); - - rtnl_lock(); - for_each_netdev(net, dev) - if (dev->rtnl_link_ops == &pfcp_link_ops) - pfcp_dellink(dev, &list); list_for_each_entry_safe(pfcp, pfcp_next, &pn->pfcp_dev_list, list) - pfcp_dellink(pfcp->dev, &list); - - unregister_netdevice_many(&list); - rtnl_unlock(); + pfcp_dellink(pfcp->dev, dev_to_kill); } static struct pernet_operations pfcp_net_ops = { - .init = pfcp_net_init, - .exit = pfcp_net_exit, - .id = &pfcp_net_id, - .size = sizeof(struct pfcp_net), + .init = pfcp_net_init, + .exit_rtnl = pfcp_net_exit_rtnl, + .id = &pfcp_net_id, + .size = sizeof(struct pfcp_net), }; static int __init pfcp_init(void) From 7ee32072c732799bcb7221cb97adc0e9e52e3792 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Apr 2025 17:32:34 -0700 Subject: [PATCH 3/3] ppp: Split ppp_exit_net() to ->exit_rtnl(). ppp_exit_net() unregisters devices related to the netns under RTNL and destroys lists and IDR. Let's use ->exit_rtnl() for the device unregistration part to save RTNL dances for each netns. Note that we delegate the for_each_netdev_safe() part to default_device_exit_batch() and replace unregister_netdevice_queue() with ppp_nl_dellink() to align with bond, geneve, gtp, and pfcp. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250418003259.48017-4-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/ppp/ppp_generic.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 53463767cc43..def84e87e05b 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1131,6 +1131,8 @@ static const struct file_operations ppp_device_fops = { .llseek = noop_llseek, }; +static void ppp_nl_dellink(struct net_device *dev, struct list_head *head); + static __net_init int ppp_init_net(struct net *net) { struct ppp_net *pn = net_generic(net, ppp_net_id); @@ -1146,28 +1148,20 @@ static __net_init int ppp_init_net(struct net *net) return 0; } +static __net_exit void ppp_exit_rtnl_net(struct net *net, + struct list_head *dev_to_kill) +{ + struct ppp_net *pn = net_generic(net, ppp_net_id); + struct ppp *ppp; + int id; + + idr_for_each_entry(&pn->units_idr, ppp, id) + ppp_nl_dellink(ppp->dev, dev_to_kill); +} + static __net_exit void ppp_exit_net(struct net *net) { struct ppp_net *pn = net_generic(net, ppp_net_id); - struct net_device *dev; - struct net_device *aux; - struct ppp *ppp; - LIST_HEAD(list); - int id; - - rtnl_lock(); - for_each_netdev_safe(net, dev, aux) { - if (dev->netdev_ops == &ppp_netdev_ops) - unregister_netdevice_queue(dev, &list); - } - - idr_for_each_entry(&pn->units_idr, ppp, id) - /* Skip devices already unregistered by previous loop */ - if (!net_eq(dev_net(ppp->dev), net)) - unregister_netdevice_queue(ppp->dev, &list); - - unregister_netdevice_many(&list); - rtnl_unlock(); mutex_destroy(&pn->all_ppp_mutex); idr_destroy(&pn->units_idr); @@ -1177,6 +1171,7 @@ static __net_exit void ppp_exit_net(struct net *net) static struct pernet_operations ppp_net_ops = { .init = ppp_init_net, + .exit_rtnl = ppp_exit_rtnl_net, .exit = ppp_exit_net, .id = &ppp_net_id, .size = sizeof(struct ppp_net),