ipv6: Get rid of RTNL for SIOCDELRT and RTM_DELROUTE.

Basically, removing an IPv6 route does not require RTNL because
the IPv6 routing tables are protected by per table lock.

inet6_rtm_delroute() calls nexthop_find_by_id() to check if the
nexthop specified by RTA_NH_ID exists.  nexthop uses rbtree and
the top-down walk can be safely performed under RCU.

ip6_route_del() already relies on RCU and the table lock, but we
need to extend the RCU critical section a bit more to cover
__ip6_del_rt().  For example, nexthop_for_each_fib6_nh() and
inet6_rt_notify() needs RCU.

Let's call nexthop_find_by_id() and __ip6_del_rt() under RCU and
get rid of RTNL from inet6_rtm_delroute() and SIOCDELRT.

Even if the nexthop is removed after rcu_read_unlock() in
inet6_rtm_delroute(), __remove_nexthop_fib() cleans up the routes
tied to the nexthop, and ip6_route_del() returns -ESRCH.  So the
request was at least valid as of nexthop_find_by_id(), and it's just
a matter of timing.

Note that we need to pass false to lwtunnel_valid_encap_type_attr().
The following patches also use the newroute bool.

Note also that fib6_get_table() does not require RCU because once
allocated fib6_table is not freed until netns dismantle.  I will
post a follow-up series to convert such callers to RCU-lockless
version.  [0]

Link: https://lore.kernel.org/netdev/20250417174557.65721-1-kuniyu@amazon.com/ #[0]
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250418000443.43734-3-kuniyu@amazon.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Kuniyuki Iwashima 2025-04-17 17:03:43 -07:00 committed by Paolo Abeni
parent 4cb4861d8c
commit bd11ff421d

View File

@ -4125,9 +4125,9 @@ static int ip6_route_del(struct fib6_config *cfg,
if (rt->nh) {
if (!fib6_info_hold_safe(rt))
continue;
rcu_read_unlock();
return __ip6_del_rt(rt, &cfg->fc_nlinfo);
err = __ip6_del_rt(rt, &cfg->fc_nlinfo);
break;
}
if (cfg->fc_nh_id)
continue;
@ -4142,13 +4142,13 @@ static int ip6_route_del(struct fib6_config *cfg,
continue;
if (!fib6_info_hold_safe(rt))
continue;
rcu_read_unlock();
/* if gateway was specified only delete the one hop */
if (cfg->fc_flags & RTF_GATEWAY)
return __ip6_del_rt(rt, &cfg->fc_nlinfo);
return __ip6_del_rt_siblings(rt, cfg);
err = __ip6_del_rt(rt, &cfg->fc_nlinfo);
else
err = __ip6_del_rt_siblings(rt, cfg);
break;
}
}
rcu_read_unlock();
@ -4517,19 +4517,20 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
rtmsg_to_fib6_config(net, rtmsg, &cfg);
rtnl_lock();
switch (cmd) {
case SIOCADDRT:
rtnl_lock();
/* Only do the default setting of fc_metric in route adding */
if (cfg.fc_metric == 0)
cfg.fc_metric = IP6_RT_PRIO_USER;
err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
rtnl_unlock();
break;
case SIOCDELRT:
err = ip6_route_del(&cfg, NULL);
break;
}
rtnl_unlock();
return err;
}
@ -5052,7 +5053,8 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
};
static int rtm_to_fib6_multipath_config(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
struct netlink_ext_ack *extack,
bool newroute)
{
struct rtnexthop *rtnh;
int remaining;
@ -5086,15 +5088,16 @@ static int rtm_to_fib6_multipath_config(struct fib6_config *cfg,
} while (rtnh_ok(rtnh, remaining));
return lwtunnel_valid_encap_type_attr(cfg->fc_mp, cfg->fc_mp_len,
extack, true);
extack, newroute);
}
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
struct rtmsg *rtm;
bool newroute = nlh->nlmsg_type == RTM_NEWROUTE;
struct nlattr *tb[RTA_MAX+1];
struct rtmsg *rtm;
unsigned int pref;
int err;
@ -5203,7 +5206,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
err = rtm_to_fib6_multipath_config(cfg, extack);
err = rtm_to_fib6_multipath_config(cfg, extack, newroute);
if (err < 0)
goto errout;
}
@ -5223,7 +5226,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
err = lwtunnel_valid_encap_type(cfg->fc_encap_type,
extack, true);
extack, newroute);
if (err < 0)
goto errout;
}
@ -5546,15 +5549,20 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
if (cfg.fc_nh_id &&
!nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) {
NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
return -EINVAL;
if (cfg.fc_nh_id) {
rcu_read_lock();
err = !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id);
rcu_read_unlock();
if (err) {
NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
return -EINVAL;
}
}
if (cfg.fc_mp)
if (cfg.fc_mp) {
return ip6_route_multipath_del(&cfg, extack);
else {
} else {
cfg.fc_delete_all_nh = 1;
return ip6_route_del(&cfg, extack);
}
@ -6766,7 +6774,7 @@ static const struct rtnl_msg_handler ip6_route_rtnl_msg_handlers[] __initconst_o
{.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWROUTE,
.doit = inet6_rtm_newroute},
{.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELROUTE,
.doit = inet6_rtm_delroute},
.doit = inet6_rtm_delroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
{.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE,
.doit = inet6_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
};