From 1dbdce30f040a87f5aa6a9dbe43be398737f090f Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 16 Dec 2024 18:21:44 +0100 Subject: [PATCH 1/5] ipv4: Define inet_sk_init_flowi4() and use it in inet_sk_rebuild_header(). IPv4 code commonly has to initialise a flowi4 structure from an IPv4 socket. This requires looking at potential IPv4 options to set the proper destination address, call flowi4_init_output() with the correct set of parameters and run the sk_classify_flow security hook. Instead of reimplementing these operations in different parts of the stack, let's define inet_sk_init_flowi4() which does all these operations. The first user is inet_sk_rebuild_header(), where inet_sk_init_flowi4() replaces ip_route_output_ports(). Unlike ip_route_output_ports(), which sets the flowi4 structure and performs the route lookup in one go, inet_sk_init_flowi4() only initialises the flow. The route lookup is then done by ip_route_output_flow(). Decoupling flow initialisation from route lookup makes this new interface applicable more broadly as it will allow some users to overwrite specific struct flowi4 members before the route lookup. Signed-off-by: Guillaume Nault Link: https://patch.msgid.link/fd416275262b1f518d5abfcef740ce4f4a1a6522.1734357769.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/route.h | 28 ++++++++++++++++++++++++++++ net/ipv4/af_inet.c | 14 ++------------ 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index 6947a155d501..f86775be3e29 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -129,6 +130,33 @@ struct in_device; int ip_rt_init(void); void rt_cache_flush(struct net *net); void rt_flush_dev(struct net_device *dev); + +static inline void inet_sk_init_flowi4(const struct inet_sock *inet, + struct flowi4 *fl4) +{ + const struct ip_options_rcu *ip4_opt; + const struct sock *sk; + __be32 daddr; + + rcu_read_lock(); + ip4_opt = rcu_dereference(inet->inet_opt); + + /* Source routing option overrides the socket destination address */ + if (ip4_opt && ip4_opt->opt.srr) + daddr = ip4_opt->opt.faddr; + else + daddr = inet->inet_daddr; + rcu_read_unlock(); + + sk = &inet->sk; + flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark), + ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), + sk->sk_protocol, inet_sk_flowi_flags(sk), daddr, + inet->inet_saddr, inet->inet_dport, + inet->inet_sport, sk->sk_uid); + security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); +} + struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *flp, const struct sk_buff *skb); struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *flp, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8095e82de808..21f46ee7b6e9 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1309,8 +1309,6 @@ int inet_sk_rebuild_header(struct sock *sk) { struct rtable *rt = dst_rtable(__sk_dst_check(sk, 0)); struct inet_sock *inet = inet_sk(sk); - __be32 daddr; - struct ip_options_rcu *inet_opt; struct flowi4 *fl4; int err; @@ -1319,17 +1317,9 @@ int inet_sk_rebuild_header(struct sock *sk) return 0; /* Reroute. */ - rcu_read_lock(); - inet_opt = rcu_dereference(inet->inet_opt); - daddr = inet->inet_daddr; - if (inet_opt && inet_opt->opt.srr) - daddr = inet_opt->opt.faddr; - rcu_read_unlock(); fl4 = &inet->cork.fl.u.ip4; - rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, inet->inet_saddr, - inet->inet_dport, inet->inet_sport, - sk->sk_protocol, ip_sock_rt_tos(sk), - sk->sk_bound_dev_if); + inet_sk_init_flowi4(inet, fl4); + rt = ip_route_output_flow(sock_net(sk), fl4, sk); if (!IS_ERR(rt)) { err = 0; sk_setup_caps(sk, &rt->dst); From 5be1323b5041d806716c80be4f8b11cfb64fa24c Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 16 Dec 2024 18:21:46 +0100 Subject: [PATCH 2/5] ipv4: Use inet_sk_init_flowi4() in ip4_datagram_release_cb(). Use inet_sk_init_flowi4() to automatically initialise the flowi4 structure in ip4_datagram_release_cb() instead of passing parameters manually to ip_route_output_ports(). Signed-off-by: Guillaume Nault Link: https://patch.msgid.link/9c326b8d9e919478f7952b21473d31da07eba2dd.1734357769.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv4/datagram.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 4aca1f05edd3..4b5bc6eb52e7 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -102,8 +102,6 @@ EXPORT_SYMBOL(ip4_datagram_connect); void ip4_datagram_release_cb(struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); - const struct ip_options_rcu *inet_opt; - __be32 daddr = inet->inet_daddr; struct dst_entry *dst; struct flowi4 fl4; struct rtable *rt; @@ -115,14 +113,9 @@ void ip4_datagram_release_cb(struct sock *sk) rcu_read_unlock(); return; } - inet_opt = rcu_dereference(inet->inet_opt); - if (inet_opt && inet_opt->opt.srr) - daddr = inet_opt->opt.faddr; - rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr, - inet->inet_saddr, inet->inet_dport, - inet->inet_sport, sk->sk_protocol, - ip_sock_rt_tos(sk), sk->sk_bound_dev_if); + inet_sk_init_flowi4(inet, &fl4); + rt = ip_route_output_flow(sock_net(sk), &fl4, sk); dst = !IS_ERR(rt) ? &rt->dst : NULL; sk_dst_set(sk, dst); From 42e5ffc385f3b0790c6cd5b54d3396a6c772d3b6 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 16 Dec 2024 18:21:48 +0100 Subject: [PATCH 3/5] ipv4: Use inet_sk_init_flowi4() in inet_csk_rebuild_route(). Use inet_sk_init_flowi4() to automatically initialise the flowi4 structure in inet_csk_rebuild_route() instead of passing parameters manually to ip_route_output_ports(). Signed-off-by: Guillaume Nault Link: https://patch.msgid.link/b270931636effa1095508e0f0a3e8c3a0e6d357f.1734357769.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv4/inet_connection_sock.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 6872b5aff73e..e4decfb270fa 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1561,20 +1561,13 @@ EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) { const struct inet_sock *inet = inet_sk(sk); - const struct ip_options_rcu *inet_opt; - __be32 daddr = inet->inet_daddr; struct flowi4 *fl4; struct rtable *rt; rcu_read_lock(); - inet_opt = rcu_dereference(inet->inet_opt); - if (inet_opt && inet_opt->opt.srr) - daddr = inet_opt->opt.faddr; fl4 = &fl->u.ip4; - rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, - inet->inet_saddr, inet->inet_dport, - inet->inet_sport, sk->sk_protocol, - ip_sock_rt_tos(sk), sk->sk_bound_dev_if); + inet_sk_init_flowi4(inet, fl4); + rt = ip_route_output_flow(sock_net(sk), fl4, sk); if (IS_ERR(rt)) rt = NULL; if (rt) From 148721f8e04a10a3b9c51f030c9be0d15b0a4d17 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 16 Dec 2024 18:21:51 +0100 Subject: [PATCH 4/5] ipv4: Use inet_sk_init_flowi4() in __ip_queue_xmit(). Use inet_sk_init_flowi4() to automatically initialise the flowi4 structure in __ip_queue_xmit() instead of passing parameters manually to ip_route_output_ports(). Override ->flowi4_tos with the value passed as parameter since that's required by SCTP. Signed-off-by: Guillaume Nault Link: https://patch.msgid.link/37e64ffbd9adac187b14aa9097b095f5c86e85be.1734357769.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv4/ip_output.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f45a083f2c13..ea7a260bec8a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -478,24 +478,16 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, /* Make sure we can route this packet. */ rt = dst_rtable(__sk_dst_check(sk, 0)); if (!rt) { - __be32 daddr; + inet_sk_init_flowi4(inet, fl4); - /* Use correct destination address if we have options. */ - daddr = inet->inet_daddr; - if (inet_opt && inet_opt->opt.srr) - daddr = inet_opt->opt.faddr; + /* sctp_v4_xmit() uses its own DSCP value */ + fl4->flowi4_tos = tos & INET_DSCP_MASK; /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times * itself out. */ - rt = ip_route_output_ports(net, fl4, sk, - daddr, inet->inet_saddr, - inet->inet_dport, - inet->inet_sport, - sk->sk_protocol, - tos & INET_DSCP_MASK, - sk->sk_bound_dev_if); + rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; sk_setup_caps(sk, &rt->dst); From c63e9f3b89d3f96220a1c99466fed4563c14a259 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 16 Dec 2024 18:21:54 +0100 Subject: [PATCH 5/5] l2tp: Use inet_sk_init_flowi4() in l2tp_ip_sendmsg(). Use inet_sk_init_flowi4() to automatically initialise the flowi4 structure in l2tp_ip_sendmsg() instead of passing parameters manually to ip_route_output_ports(). Override ->daddr with the value passed in the msghdr structure if provided. Signed-off-by: Guillaume Nault Reviewed-by: James Chapman Link: https://patch.msgid.link/2ff22a3560c5050228928456662b80b9c84a8fe4.1734357769.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- net/l2tp/l2tp_ip.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 4bc24fddfd52..29795d2839e8 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -425,7 +425,6 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int rc; struct inet_sock *inet = inet_sk(sk); struct rtable *rt = NULL; - struct flowi4 *fl4; int connected = 0; __be32 daddr; @@ -455,7 +454,6 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (sk->sk_state != TCP_ESTABLISHED) goto out; - daddr = inet->inet_daddr; connected = 1; } @@ -482,29 +480,24 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto error; } - fl4 = &inet->cork.fl.u.ip4; if (connected) rt = dst_rtable(__sk_dst_check(sk, 0)); rcu_read_lock(); if (!rt) { - const struct ip_options_rcu *inet_opt; + struct flowi4 *fl4 = &inet->cork.fl.u.ip4; - inet_opt = rcu_dereference(inet->inet_opt); + inet_sk_init_flowi4(inet, fl4); - /* Use correct destination address if we have options. */ - if (inet_opt && inet_opt->opt.srr) - daddr = inet_opt->opt.faddr; + /* Overwrite ->daddr if msg->msg_name was provided */ + if (!connected) + fl4->daddr = daddr; /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times * itself out. */ - rt = ip_route_output_ports(sock_net(sk), fl4, sk, - daddr, inet->inet_saddr, - inet->inet_dport, inet->inet_sport, - sk->sk_protocol, ip_sock_rt_tos(sk), - sk->sk_bound_dev_if); + rt = ip_route_output_flow(sock_net(sk), fl4, sk); if (IS_ERR(rt)) goto no_route; if (connected) {