mirror of
https://github.com/torvalds/linux.git
synced 2026-05-28 09:04:39 +02:00
Merge branch 'gro-inline-tcp6_gro_-receive-complete'
Eric Dumazet says:
====================
gro: inline tcp6_gro_{receive,complete}
On some platforms, GRO stack is too deep and causes cpu stalls.
Decreasing call depths by one shows a 1.5 % gain on Zen2 cpus.
(32 RX queues, 100Gbit NIC, RFS enabled, tcp_rr with 128 threads and 10,000 flows)
We can go further by inlining ipv6_gro_{receive,complete}
and take care of IPv4 if there is interest.
Note: two temporary __always_inline will be replaced with
inline_for_performance when/if available.
Cumulative size increase for this series (of 3):
$ scripts/bloat-o-meter -t vmlinux.0 vmlinux.3
add/remove: 2/2 grow/shrink: 5/1 up/down: 1572/-471 (1101)
Function old new delta
ipv6_gro_receive 1069 1846 +777
ipv6_gro_complete 433 733 +300
tcp6_check_fraglist_gro - 272 +272
tcp6_gro_complete 227 306 +79
tcp4_gro_complete 325 397 +72
ipv6_offload_init 218 274 +56
__pfx_tcp6_check_fraglist_gro - 16 +16
__pfx___skb_incr_checksum_unnecessary 32 - -32
__skb_incr_checksum_unnecessary 186 - -186
tcp6_gro_receive 959 706 -253
Total: Before=22592724, After=22593825, chg +0.00%
====================
Link: https://patch.msgid.link/20260120164903.1912995-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
9de76f55b9
|
|
@ -4763,7 +4763,7 @@ static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb)
|
|||
}
|
||||
}
|
||||
|
||||
static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
|
||||
static __always_inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
|
||||
{
|
||||
if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
|
||||
if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
|
||||
|
|
|
|||
|
|
@ -405,9 +405,8 @@ INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
|
|||
struct sk_buff *));
|
||||
INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
|
||||
|
||||
INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
|
||||
struct sk_buff *));
|
||||
INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
|
||||
struct sk_buff *udp6_gro_receive(struct list_head *, struct sk_buff *);
|
||||
int udp6_gro_complete(struct sk_buff *, int);
|
||||
|
||||
#define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \
|
||||
({ \
|
||||
|
|
|
|||
|
|
@ -2324,8 +2324,6 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
|
|||
struct tcphdr *th);
|
||||
INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff));
|
||||
INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb));
|
||||
INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff));
|
||||
INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb));
|
||||
#ifdef CONFIG_INET
|
||||
void tcp_gro_complete(struct sk_buff *skb);
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ obj-$(CONFIG_IPV6_FOU) += fou6.o
|
|||
|
||||
obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o
|
||||
obj-$(CONFIG_INET) += output_core.o protocol.o \
|
||||
ip6_offload.o tcpv6_offload.o exthdrs_offload.o
|
||||
ip6_offload.o exthdrs_offload.o
|
||||
|
||||
obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
|
||||
|
||||
|
|
|
|||
|
|
@ -19,23 +19,7 @@
|
|||
#include <net/gso.h>
|
||||
|
||||
#include "ip6_offload.h"
|
||||
|
||||
/* All GRO functions are always builtin, except UDP over ipv6, which lays in
|
||||
* ipv6 module, as it depends on UDPv6 lookup function, so we need special care
|
||||
* when ipv6 is built as a module
|
||||
*/
|
||||
#if IS_BUILTIN(CONFIG_IPV6)
|
||||
#define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__)
|
||||
#else
|
||||
#define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#define indirect_call_gro_receive_l4(f2, f1, cb, head, skb) \
|
||||
({ \
|
||||
unlikely(gro_recursion_inc_test(skb)) ? \
|
||||
NAPI_GRO_CB(skb)->flush |= 1, NULL : \
|
||||
INDIRECT_CALL_L4(cb, f2, f1, head, skb); \
|
||||
})
|
||||
#include "tcpv6_offload.c"
|
||||
|
||||
static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto)
|
||||
{
|
||||
|
|
@ -298,9 +282,19 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
|
|||
|
||||
skb_gro_postpull_rcsum(skb, iph, nlen);
|
||||
|
||||
pp = indirect_call_gro_receive_l4(tcp6_gro_receive, udp6_gro_receive,
|
||||
ops->callbacks.gro_receive, head, skb);
|
||||
if (unlikely(gro_recursion_inc_test(skb))) {
|
||||
flush = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (likely(proto == IPPROTO_TCP))
|
||||
pp = tcp6_gro_receive(head, skb);
|
||||
#if IS_BUILTIN(CONFIG_IPV6)
|
||||
else if (likely(proto == IPPROTO_UDP))
|
||||
pp = udp6_gro_receive(head, skb);
|
||||
#endif
|
||||
else
|
||||
pp = ops->callbacks.gro_receive(head, skb);
|
||||
out:
|
||||
skb_gro_flush_final(skb, pp, flush);
|
||||
|
||||
|
|
@ -379,11 +373,18 @@ INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
|
|||
}
|
||||
|
||||
nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops);
|
||||
|
||||
if (likely(ops == &net_hotdata.tcpv6_offload))
|
||||
return tcp6_gro_complete(skb, nhoff);
|
||||
#if IS_BUILTIN(CONFIG_IPV6)
|
||||
if (ops == &net_hotdata.udpv6_offload)
|
||||
return udp6_gro_complete(skb, nhoff);
|
||||
#endif
|
||||
|
||||
if (WARN_ON(!ops || !ops->callbacks.gro_complete))
|
||||
goto out;
|
||||
|
||||
err = INDIRECT_CALL_L4(ops->callbacks.gro_complete, tcp6_gro_complete,
|
||||
udp6_gro_complete, skb, nhoff);
|
||||
err = ops->callbacks.gro_complete(skb, nhoff);
|
||||
|
||||
out:
|
||||
return err;
|
||||
|
|
|
|||
|
|
@ -24,9 +24,6 @@ static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
|
|||
struct net *net;
|
||||
int iif, sdif;
|
||||
|
||||
if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST)))
|
||||
return;
|
||||
|
||||
p = tcp_gro_lookup(head, th);
|
||||
if (p) {
|
||||
NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist;
|
||||
|
|
@ -45,8 +42,8 @@ static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
|
|||
#endif /* IS_ENABLED(CONFIG_IPV6) */
|
||||
}
|
||||
|
||||
INDIRECT_CALLABLE_SCOPE
|
||||
struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)
|
||||
static __always_inline struct sk_buff *tcp6_gro_receive(struct list_head *head,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
struct tcphdr *th;
|
||||
|
||||
|
|
@ -60,7 +57,8 @@ struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)
|
|||
if (!th)
|
||||
goto flush;
|
||||
|
||||
tcp6_check_fraglist_gro(head, skb, th);
|
||||
if (unlikely(skb->dev->features & NETIF_F_GRO_FRAGLIST))
|
||||
tcp6_check_fraglist_gro(head, skb, th);
|
||||
|
||||
return tcp_gro_receive(head, skb, th);
|
||||
|
||||
|
|
@ -69,7 +67,7 @@ struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
|
||||
static __always_inline int tcp6_gro_complete(struct sk_buff *skb, int thoff)
|
||||
{
|
||||
const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
|
||||
const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset);
|
||||
|
|
|
|||
|
|
@ -132,7 +132,6 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
|
|||
sdif, net->ipv4.udp_table, NULL);
|
||||
}
|
||||
|
||||
INDIRECT_CALLABLE_SCOPE
|
||||
struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
|
||||
{
|
||||
struct udphdr *uh = udp_gro_udphdr(skb);
|
||||
|
|
@ -165,7 +164,7 @@ struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
|
||||
int udp6_gro_complete(struct sk_buff *skb, int nhoff)
|
||||
{
|
||||
const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
|
||||
const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + offset);
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user