From ae3cdfd4e0b86e5d23b46b80e8b010f5392c9635 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 26 Mar 2026 22:32:31 +0200 Subject: [PATCH 1/3] vrf: Remove unnecessary NULL check The VRF driver always allocates an IPv4 dst entry for a VRF device and prevents the device from being registered if the allocation fails. Therefore, there is no need to check if the entry exists when tearing down a VRF device. Remove the check. Note that the same is not true for the IPv6 dst entry. Its creation can be skipped if IPv6 is administratively disabled (i.e., 'ipv6.disable=1'). Reviewed-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20260326203233.1128554-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vrf.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 8c009bcaa8e7..0952ab6a2571 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1007,13 +1007,11 @@ static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf) /* move dev in dst's to loopback so this VRF device can be deleted * - based on dst_ifdown */ - if (rth) { - dst = &rth->dst; - netdev_ref_replace(dst->dev, net->loopback_dev, - &dst->dev_tracker, GFP_KERNEL); - dst->dev = net->loopback_dev; - dst_release(dst); - } + dst = &rth->dst; + netdev_ref_replace(dst->dev, net->loopback_dev, + &dst->dev_tracker, GFP_KERNEL); + dst->dev = net->loopback_dev; + dst_release(dst); } static int vrf_rtable_create(struct net_device *dev) From 50504e2579c1e0379bc0ffeaec67884e1d6c9212 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 26 Mar 2026 22:32:32 +0200 Subject: [PATCH 2/3] vrf: Use dst_dev_put() instead of using loopback device Use dst_dev_put() to clean up the device referenced by the dst entry instead of partially open coding it. Internally, the helper uses the blackhole device instead of the loopback device. Reviewed-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20260326203233.1128554-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vrf.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 0952ab6a2571..bfc9ea91ac20 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -751,21 +751,13 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { struct rt6_info *rt6 = rtnl_dereference(vrf->rt6); - struct net *net = dev_net(dev); - struct dst_entry *dst; RCU_INIT_POINTER(vrf->rt6, NULL); synchronize_rcu(); - /* move dev in dst's to loopback so this VRF device can be deleted - * - based on dst_ifdown - */ if (rt6) { - dst = &rt6->dst; - netdev_ref_replace(dst->dev, net->loopback_dev, - &dst->dev_tracker, GFP_KERNEL); - dst->dev = net->loopback_dev; - dst_release(dst); + dst_dev_put(&rt6->dst); + dst_release(&rt6->dst); } } @@ -998,20 +990,12 @@ static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev, static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf) { struct rtable *rth = rtnl_dereference(vrf->rth); - struct net *net = dev_net(dev); - struct dst_entry *dst; RCU_INIT_POINTER(vrf->rth, NULL); synchronize_rcu(); - /* move dev in dst's to loopback so this VRF device can be deleted - * - based on dst_ifdown - */ - dst = &rth->dst; - netdev_ref_replace(dst->dev, net->loopback_dev, - &dst->dev_tracker, GFP_KERNEL); - dst->dev = net->loopback_dev; - dst_release(dst); + dst_dev_put(&rth->dst); + dst_release(&rth->dst); } static int vrf_rtable_create(struct net_device *dev) From 075196489a3797c2a931de68c438e06f8303dd5c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 26 Mar 2026 22:32:33 +0200 Subject: [PATCH 3/3] vrf: Remove unnecessary RCU protection around dst entries During initialization of a VRF device, the VRF driver creates two dst entries (for IPv4 and IPv6). They are attached to locally generated packets that are transmitted out of the VRF ports (via the l3mdev_l3_out() hook). Their purpose is to redirect packets towards the VRF device instead of having the packets egress directly out of the VRF ports. This is useful, for example, when a queuing discipline is configured on the VRF device. In order to avoid a NULL pointer dereference, commit b0e95ccdd775 ("net: vrf: protect changes to private data with rcu") made the pointers to the dst entries RCU protected. As far as I can tell, this was needed because back then the dst entries were released (and the pointers reset to NULL) before removing the VRF ports. Later on, commit f630c38ef0d7 ("vrf: fix bug_on triggered by rx when destroying a vrf") moved the removal of the VRF ports to the VRF device's dellink() callback. As such, the tear down sequence of a VRF device looks as follows: 1. VRF ports are removed. 2. VRF device is unregistered. a. Device is closed. b. An RCU grace period passes. c. ndo_uninit() is called. i. dst entries are released. Given the above, the Tx path will always see the same fully initialized dst entries and will never race with the ndo_uninit() callback. Therefore, there is no need to make the pointers to the dst entries RCU protected. Remove it as well as the unnecessary NULL checks in the Tx path. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Link: https://patch.msgid.link/20260326203233.1128554-4-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vrf.c | 56 ++++++++++------------------------------------- 1 file changed, 12 insertions(+), 44 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index bfc9ea91ac20..2cf2dbd1c12f 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -112,8 +112,8 @@ struct netns_vrf { }; struct net_vrf { - struct rtable __rcu *rth; - struct rt6_info __rcu *rt6; + struct rtable *rth; + struct rt6_info *rt6; #if IS_ENABLED(CONFIG_IPV6) struct fib6_table *fib6_table; #endif @@ -648,26 +648,13 @@ static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev, struct sk_buff *skb) { struct net_vrf *vrf = netdev_priv(vrf_dev); - struct dst_entry *dst = NULL; struct rt6_info *rt6; - rcu_read_lock(); - - rt6 = rcu_dereference(vrf->rt6); - if (likely(rt6)) { - dst = &rt6->dst; - dst_hold(dst); - } - - rcu_read_unlock(); - - if (unlikely(!dst)) { - vrf_tx_error(vrf_dev, skb); - return NULL; - } + rt6 = vrf->rt6; + dst_hold(&rt6->dst); skb_dst_drop(skb); - skb_dst_set(skb, dst); + skb_dst_set(skb, &rt6->dst); return skb; } @@ -750,10 +737,7 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, /* holding rtnl */ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { - struct rt6_info *rt6 = rtnl_dereference(vrf->rt6); - - RCU_INIT_POINTER(vrf->rt6, NULL); - synchronize_rcu(); + struct rt6_info *rt6 = vrf->rt6; if (rt6) { dst_dev_put(&rt6->dst); @@ -784,7 +768,7 @@ static int vrf_rt6_create(struct net_device *dev) rt6->dst.output = vrf_output6; - rcu_assign_pointer(vrf->rt6, rt6); + vrf->rt6 = rt6; rc = 0; out: @@ -870,26 +854,13 @@ static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev, struct sk_buff *skb) { struct net_vrf *vrf = netdev_priv(vrf_dev); - struct dst_entry *dst = NULL; struct rtable *rth; - rcu_read_lock(); - - rth = rcu_dereference(vrf->rth); - if (likely(rth)) { - dst = &rth->dst; - dst_hold(dst); - } - - rcu_read_unlock(); - - if (unlikely(!dst)) { - vrf_tx_error(vrf_dev, skb); - return NULL; - } + rth = vrf->rth; + dst_hold(&rth->dst); skb_dst_drop(skb); - skb_dst_set(skb, dst); + skb_dst_set(skb, &rth->dst); return skb; } @@ -989,10 +960,7 @@ static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev, /* holding rtnl */ static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf) { - struct rtable *rth = rtnl_dereference(vrf->rth); - - RCU_INIT_POINTER(vrf->rth, NULL); - synchronize_rcu(); + struct rtable *rth = vrf->rth; dst_dev_put(&rth->dst); dst_release(&rth->dst); @@ -1013,7 +981,7 @@ static int vrf_rtable_create(struct net_device *dev) rth->dst.output = vrf_output; - rcu_assign_pointer(vrf->rth, rth); + vrf->rth = rth; return 0; }