From de32bc6aad09131a30b4a9a738e2bf2ba5a9a5aa Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 28 Apr 2022 11:58:44 +0100 Subject: [PATCH 1/5] net: inline sock_alloc_send_skb sock_alloc_send_skb() is simple and just proxying to another function, so we can inline it and cut associated overhead. Signed-off-by: Pavel Begunkov Signed-off-by: David S. Miller --- include/net/sock.h | 10 ++++++++-- net/core/sock.c | 7 ------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 663041b92c21..73063c88a249 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1823,11 +1823,17 @@ int sock_getsockopt(struct socket *sock, int level, int op, char __user *optval, int __user *optlen); int sock_gettstamp(struct socket *sock, void __user *userstamp, bool timeval, bool time32); -struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, - int noblock, int *errcode); struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, unsigned long data_len, int noblock, int *errcode, int max_page_order); + +static inline struct sk_buff *sock_alloc_send_skb(struct sock *sk, + unsigned long size, + int noblock, int *errcode) +{ + return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0); +} + void *sock_kmalloc(struct sock *sk, int size, gfp_t priority); void sock_kfree_s(struct sock *sk, void *mem, int size); void sock_kzfree_s(struct sock *sk, void *mem, int size); diff --git a/net/core/sock.c b/net/core/sock.c index 770408fd935f..b164618f2cb6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2635,13 +2635,6 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, } EXPORT_SYMBOL(sock_alloc_send_pskb); -struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, - int noblock, int *errcode) -{ - return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0); -} -EXPORT_SYMBOL(sock_alloc_send_skb); - int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg, struct sockcm_cookie *sockc) { From 657dd5f97b2ed16cdaa6339f42f9130240af1c04 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 28 Apr 2022 11:58:45 +0100 Subject: [PATCH 2/5] net: inline skb_zerocopy_iter_dgram skb_zerocopy_iter_dgram() is a small proxy function, inline it. For that, move __zerocopy_sg_from_iter into linux/skbuff.h Signed-off-by: Pavel Begunkov Signed-off-by: David S. Miller --- include/linux/skbuff.h | 36 ++++++++++++++++++++++-------------- net/core/datagram.c | 2 -- net/core/datagram.h | 15 --------------- net/core/skbuff.c | 7 ------- 4 files changed, 22 insertions(+), 38 deletions(-) delete mode 100644 net/core/datagram.h diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index dc4b3e1cf21b..3270cb72e4d8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -684,20 +684,6 @@ struct ubuf_info { int mm_account_pinned_pages(struct mmpin *mmp, size_t size); void mm_unaccount_pinned_pages(struct mmpin *mmp); -struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size); -struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, - struct ubuf_info *uarg); - -void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); - -void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, - bool success); - -int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len); -int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, - struct msghdr *msg, int len, - struct ubuf_info *uarg); - /* This data is invariant across clones and lives at * the end of the header data, ie. at skb->end. */ @@ -1679,6 +1665,28 @@ static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) } #endif +struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size); +struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, + struct ubuf_info *uarg); + +void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); + +void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, + bool success); + +int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, + struct iov_iter *from, size_t length); + +static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb, + struct msghdr *msg, int len) +{ + return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len); +} + +int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, + struct msghdr *msg, int len, + struct ubuf_info *uarg); + /* Internal */ #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB))) diff --git a/net/core/datagram.c b/net/core/datagram.c index 70126d15ca6e..50f4faeea76c 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -62,8 +62,6 @@ #include #include -#include "datagram.h" - /* * Is a socket 'connection oriented' ? */ diff --git a/net/core/datagram.h b/net/core/datagram.h deleted file mode 100644 index bcfb75bfa3b2..000000000000 --- a/net/core/datagram.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef _NET_CORE_DATAGRAM_H_ -#define _NET_CORE_DATAGRAM_H_ - -#include - -struct sock; -struct sk_buff; -struct iov_iter; - -int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, - struct iov_iter *from, size_t length); - -#endif /* _NET_CORE_DATAGRAM_H_ */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 00980e62415b..b92e93dcaa91 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -80,7 +80,6 @@ #include #include -#include "datagram.h" #include "sock_destructor.h" struct kmem_cache *skbuff_head_cache __ro_after_init; @@ -1340,12 +1339,6 @@ void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref) } EXPORT_SYMBOL_GPL(msg_zerocopy_put_abort); -int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) -{ - return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len); -} -EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram); - int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct msghdr *msg, int len, struct ubuf_info *uarg) From c526fd8f9f4f21cb83c0b1c9a1ee9c0ac9be9e2e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 28 Apr 2022 11:58:46 +0100 Subject: [PATCH 3/5] net: inline dev_queue_xmit() Inline dev_queue_xmit() and dev_queue_xmit_accel(), they both are small proxy functions doing nothing but redirecting the control flow to __dev_queue_xmit(). Signed-off-by: Pavel Begunkov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 ++++++++++++-- net/core/dev.c | 15 ++------------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b75ca2d095ae..4aba92a4042a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2940,10 +2940,20 @@ u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); -int dev_queue_xmit(struct sk_buff *skb); -int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev); +int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev); int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id); +static inline int dev_queue_xmit(struct sk_buff *skb) +{ + return __dev_queue_xmit(skb, NULL); +} + +static inline int dev_queue_xmit_accel(struct sk_buff *skb, + struct net_device *sb_dev) +{ + return __dev_queue_xmit(skb, sb_dev); +} + static inline int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) { int ret; diff --git a/net/core/dev.c b/net/core/dev.c index 7a61690d3137..d127164771f2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4111,7 +4111,7 @@ struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, * the BH enable code must have IRQs enabled so that it will not deadlock. * --BLG */ -static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) +int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) { struct net_device *dev = skb->dev; struct netdev_queue *txq = NULL; @@ -4235,18 +4235,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) rcu_read_unlock_bh(); return rc; } - -int dev_queue_xmit(struct sk_buff *skb) -{ - return __dev_queue_xmit(skb, NULL); -} -EXPORT_SYMBOL(dev_queue_xmit); - -int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev) -{ - return __dev_queue_xmit(skb, sb_dev); -} -EXPORT_SYMBOL(dev_queue_xmit_accel); +EXPORT_SYMBOL(__dev_queue_xmit); int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id) { From 4b143ed7dde59d8a4f94c39aa7c4e92842c3ecc1 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 28 Apr 2022 11:58:47 +0100 Subject: [PATCH 4/5] ipv6: help __ip6_finish_output() inlining There are two callers of __ip6_finish_output(), both are in ip6_finish_output(). We can combine the call sites into one and handle return code after, that will inline __ip6_finish_output(). Note, error handling under NET_XMIT_CN will only return 0 if __ip6_finish_output() succeded, and in this case it return 0. Considering that NET_XMIT_SUCCESS is 0, it'll be returning exactly the same result for it as before. Signed-off-by: Pavel Begunkov Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1f3d777e7694..bda1d9f76f7e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -198,7 +198,6 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); switch (ret) { case NET_XMIT_SUCCESS: - return __ip6_finish_output(net, sk, skb); case NET_XMIT_CN: return __ip6_finish_output(net, sk, skb) ? : ret; default: From 58f71be58b8713e41f8568938a0199190f723d1d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 28 Apr 2022 11:58:48 +0100 Subject: [PATCH 5/5] ipv6: refactor ip6_finish_output2() Throw neigh checks in ip6_finish_output2() under a single slow path if, so we don't have the overhead in the hot path. Signed-off-by: Pavel Begunkov Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index bda1d9f76f7e..afa5bd4ad167 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -119,19 +119,21 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * rcu_read_lock_bh(); nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); - if (unlikely(!neigh)) - neigh = __neigh_create(&nd_tbl, nexthop, dev, false); - if (!IS_ERR(neigh)) { - sock_confirm_neigh(skb, neigh); - ret = neigh_output(neigh, skb, false); - rcu_read_unlock_bh(); - return ret; - } - rcu_read_unlock_bh(); - IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); - kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); - return -EINVAL; + if (unlikely(IS_ERR_OR_NULL(neigh))) { + if (unlikely(!neigh)) + neigh = __neigh_create(&nd_tbl, nexthop, dev, false); + if (IS_ERR(neigh)) { + rcu_read_unlock_bh(); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); + kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); + return -EINVAL; + } + } + sock_confirm_neigh(skb, neigh); + ret = neigh_output(neigh, skb, false); + rcu_read_unlock_bh(); + return ret; } static int