From 40fc3423b983b864bf70b03199191260ae9b2ea6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 3 Dec 2016 11:14:50 -0800 Subject: [PATCH 1/8] tcp: tsq: add tsq_flags / tsq_enum This is a cleanup, to ease code review of following patches. Old 'enum tsq_flags' is renamed, and a new enumeration is added with the flags used in cmpxchg() operations as opposed to single bit operations. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 11 ++++++++++- net/ipv4/tcp_output.c | 16 ++++++++-------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 734bab4c3bef..d8be083ab0b0 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -364,7 +364,7 @@ struct tcp_sock { u32 *saved_syn; }; -enum tsq_flags { +enum tsq_enum { TSQ_THROTTLED, TSQ_QUEUED, TCP_TSQ_DEFERRED, /* tcp_tasklet_func() found socket was owned */ @@ -375,6 +375,15 @@ enum tsq_flags { */ }; +enum tsq_flags { + TSQF_THROTTLED = (1UL << TSQ_THROTTLED), + TSQF_QUEUED = (1UL << TSQ_QUEUED), + TCPF_TSQ_DEFERRED = (1UL << TCP_TSQ_DEFERRED), + TCPF_WRITE_TIMER_DEFERRED = (1UL << TCP_WRITE_TIMER_DEFERRED), + TCPF_DELACK_TIMER_DEFERRED = (1UL << TCP_DELACK_TIMER_DEFERRED), + TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED), +}; + static inline struct tcp_sock *tcp_sk(const struct sock *sk) { return (struct tcp_sock *)sk; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c7adcb57654e..8f0289b0fb24 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -784,10 +784,10 @@ static void tcp_tasklet_func(unsigned long data) } } -#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \ - (1UL << TCP_WRITE_TIMER_DEFERRED) | \ - (1UL << TCP_DELACK_TIMER_DEFERRED) | \ - (1UL << TCP_MTU_REDUCED_DEFERRED)) +#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \ + TCPF_WRITE_TIMER_DEFERRED | \ + TCPF_DELACK_TIMER_DEFERRED | \ + TCPF_MTU_REDUCED_DEFERRED) /** * tcp_release_cb - tcp release_sock() callback * @sk: socket @@ -808,7 +808,7 @@ void tcp_release_cb(struct sock *sk) nflags = flags & ~TCP_DEFERRED_ALL; } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags); - if (flags & (1UL << TCP_TSQ_DEFERRED)) + if (flags & TCPF_TSQ_DEFERRED) tcp_tsq_handler(sk); /* Here begins the tricky part : @@ -822,15 +822,15 @@ void tcp_release_cb(struct sock *sk) */ sock_release_ownership(sk); - if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { + if (flags & TCPF_WRITE_TIMER_DEFERRED) { tcp_write_timer_handler(sk); __sock_put(sk); } - if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) { + if (flags & TCPF_DELACK_TIMER_DEFERRED) { tcp_delack_timer_handler(sk); __sock_put(sk); } - if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { + if (flags & TCPF_MTU_REDUCED_DEFERRED) { inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); __sock_put(sk); } From 408f0a6c21e124cc4f6c7aa370b38aa47e55428d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 3 Dec 2016 11:14:51 -0800 Subject: [PATCH 2/8] tcp: tsq: remove one locked operation in tcp_wfree() Instead of atomically clear TSQ_THROTTLED and atomically set TSQ_QUEUED bits, use one cmpxchg() to perform a single locked operation. Since the following patch will also set TCP_TSQ_DEFERRED here, this cmpxchg() will make this addition free. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8f0289b0fb24..4adaf8e1bb63 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -860,6 +860,7 @@ void tcp_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; struct tcp_sock *tp = tcp_sk(sk); + unsigned long flags, nval, oval; int wmem; /* Keep one reference on sk_wmem_alloc. @@ -877,11 +878,17 @@ void tcp_wfree(struct sk_buff *skb) if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) goto out; - if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) && - !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) { - unsigned long flags; + for (oval = READ_ONCE(tp->tsq_flags);; oval = nval) { struct tsq_tasklet *tsq; + if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED)) + goto out; + + nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED; + nval = cmpxchg(&tp->tsq_flags, oval, nval); + if (nval != oval) + continue; + /* queue this socket to tasklet queue */ local_irq_save(flags); tsq = this_cpu_ptr(&tsq_tasklet); From b223feb9de2a65c533ff95c08e834fa732906ea5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 3 Dec 2016 11:14:52 -0800 Subject: [PATCH 3/8] tcp: tsq: add shortcut in tcp_tasklet_func() Under high stress, I've seen tcp_tasklet_func() consuming ~700 usec, handling ~150 tcp sockets. By setting TCP_TSQ_DEFERRED in tcp_wfree(), we give a chance for other cpus/threads entering tcp_write_xmit() to grab it, allowing tcp_tasklet_func() to skip sockets that already did an xmit cycle. In the future, we might give to ACK processing an increased budget to reduce even more tcp_tasklet_func() amount of work. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4adaf8e1bb63..fa23b688a6f3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -767,19 +767,19 @@ static void tcp_tasklet_func(unsigned long data) list_for_each_safe(q, n, &list) { tp = list_entry(q, struct tcp_sock, tsq_node); list_del(&tp->tsq_node); + clear_bit(TSQ_QUEUED, &tp->tsq_flags); sk = (struct sock *)tp; - bh_lock_sock(sk); - - if (!sock_owned_by_user(sk)) { - tcp_tsq_handler(sk); - } else { - /* defer the work to tcp_release_cb() */ - set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags); + if (!sk->sk_lock.owned && + test_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags)) { + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) { + clear_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags); + tcp_tsq_handler(sk); + } + bh_unlock_sock(sk); } - bh_unlock_sock(sk); - clear_bit(TSQ_QUEUED, &tp->tsq_flags); sk_free(sk); } } @@ -884,7 +884,7 @@ void tcp_wfree(struct sk_buff *skb) if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED)) goto out; - nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED; + nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED; nval = cmpxchg(&tp->tsq_flags, oval, nval); if (nval != oval) continue; @@ -2229,6 +2229,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) break; + if (test_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags)) + clear_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags); if (tcp_small_queue_check(sk, skb, 0)) break; From a9b204d1564702b704ad6fe74f10a102c7b87ba3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 3 Dec 2016 11:14:53 -0800 Subject: [PATCH 4/8] tcp: tsq: avoid one atomic in tcp_wfree() Under high load, tcp_wfree() has an atomic operation trying to schedule a tasklet over and over. We can schedule it only if our per cpu list was empty. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index fa23b688a6f3..0db63efe5b8b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -880,6 +880,7 @@ void tcp_wfree(struct sk_buff *skb) for (oval = READ_ONCE(tp->tsq_flags);; oval = nval) { struct tsq_tasklet *tsq; + bool empty; if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED)) goto out; @@ -892,8 +893,10 @@ void tcp_wfree(struct sk_buff *skb) /* queue this socket to tasklet queue */ local_irq_save(flags); tsq = this_cpu_ptr(&tsq_tasklet); + empty = list_empty(&tsq->head); list_add(&tp->tsq_node, &tsq->head); - tasklet_schedule(&tsq->tasklet); + if (empty) + tasklet_schedule(&tsq->tasklet); local_irq_restore(flags); return; } From 75eefc6c59fd2c5f1ab95a3a113c217237d12a31 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 3 Dec 2016 11:14:54 -0800 Subject: [PATCH 5/8] tcp: tsq: add a shortcut in tcp_small_queue_check() Always allow the two first skbs in write queue to be sent, regardless of sk_wmem_alloc/sk_pacing_rate values. This helps a lot in situations where TX completions are delayed either because of driver latencies or softirq latencies. Test is done with no cache line misses. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0db63efe5b8b..d5c46749adab 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2091,6 +2091,15 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, limit <<= factor; if (atomic_read(&sk->sk_wmem_alloc) > limit) { + /* Always send the 1st or 2nd skb in write queue. + * No need to wait for TX completion to call us back, + * after softirq/tasklet schedule. + * This helps when TX completions are delayed too much. + */ + if (skb == sk->sk_write_queue.next || + skb->prev == sk->sk_write_queue.next) + return false; + set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags); /* It is possible TX completion already happened * before we set TSQ_THROTTLED, so we must From 12a59abc22d6664f7d3944f625ceefee92de8820 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 3 Dec 2016 11:14:55 -0800 Subject: [PATCH 6/8] tcp: tcp_mtu_probe() is likely to exit early Adding a likely() in tcp_mtu_probe() moves its code which used to be inlined in front of tcp_write_xmit() We still have a cache line miss to access icsk->icsk_mtup.enabled, we will probably have to reorganize fields to help data locality. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d5c46749adab..5f04bee4c86a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1932,26 +1932,26 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk) */ static int tcp_mtu_probe(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb, *nskb, *next; struct net *net = sock_net(sk); - int len; int probe_size; int size_needed; - int copy; + int copy, len; int mss_now; int interval; /* Not currently probing/verifying, * not in recovery, * have enough cwnd, and - * not SACKing (the variable headers throw things off) */ - if (!icsk->icsk_mtup.enabled || - icsk->icsk_mtup.probe_size || - inet_csk(sk)->icsk_ca_state != TCP_CA_Open || - tp->snd_cwnd < 11 || - tp->rx_opt.num_sacks || tp->rx_opt.dsack) + * not SACKing (the variable headers throw things off) + */ + if (likely(!icsk->icsk_mtup.enabled || + icsk->icsk_mtup.probe_size || + inet_csk(sk)->icsk_ca_state != TCP_CA_Open || + tp->snd_cwnd < 11 || + tp->rx_opt.num_sacks || tp->rx_opt.dsack)) return -1; /* Use binary search for probe_size between tcp_mss_base, From 9115e8cd2a0c6eaaa900c462721f12e1d45f326c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 3 Dec 2016 11:14:56 -0800 Subject: [PATCH 7/8] net: reorganize struct sock for better data locality Group fields used in TX path, and keep some cache lines mostly read to permit sharing among cpus. Gained two 4 bytes holes on 64bit arches. Added a place holder for tcp tsq_flags, next to sk_wmem_alloc to speed up tcp_wfree() in the following patch. I have not added ____cacheline_aligned_in_smp, this might be done later. I prefer doing this once inet and tcp/udp sockets reorg is also done. Tested with both TCP and UDP. UDP receiver performance under flood increased by ~20 % : Accessing sk_filter/sk_wq/sk_napi_id no longer stalls because sk_drops was moved away from a critical cache line, now mostly read and shared. /* --- cacheline 4 boundary (256 bytes) --- */ unsigned int sk_napi_id; /* 0x100 0x4 */ int sk_rcvbuf; /* 0x104 0x4 */ struct sk_filter * sk_filter; /* 0x108 0x8 */ union { struct socket_wq * sk_wq; /* 0x8 */ struct socket_wq * sk_wq_raw; /* 0x8 */ }; /* 0x110 0x8 */ struct xfrm_policy * sk_policy[2]; /* 0x118 0x10 */ struct dst_entry * sk_rx_dst; /* 0x128 0x8 */ struct dst_entry * sk_dst_cache; /* 0x130 0x8 */ atomic_t sk_omem_alloc; /* 0x138 0x4 */ int sk_sndbuf; /* 0x13c 0x4 */ /* --- cacheline 5 boundary (320 bytes) --- */ int sk_wmem_queued; /* 0x140 0x4 */ atomic_t sk_wmem_alloc; /* 0x144 0x4 */ long unsigned int sk_tsq_flags; /* 0x148 0x8 */ struct sk_buff * sk_send_head; /* 0x150 0x8 */ struct sk_buff_head sk_write_queue; /* 0x158 0x18 */ __s32 sk_peek_off; /* 0x170 0x4 */ int sk_write_pending; /* 0x174 0x4 */ long int sk_sndtimeo; /* 0x178 0x8 */ Signed-off-by: Eric Dumazet Tested-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/sock.h | 51 ++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 69afda6bea15..6dfe3aa22b97 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -343,6 +343,9 @@ struct sock { #define sk_rxhash __sk_common.skc_rxhash socket_lock_t sk_lock; + atomic_t sk_drops; + int sk_rcvlowat; + struct sk_buff_head sk_error_queue; struct sk_buff_head sk_receive_queue; /* * The backlog queue is special, it is always used with @@ -359,14 +362,13 @@ struct sock { struct sk_buff *tail; } sk_backlog; #define sk_rmem_alloc sk_backlog.rmem_alloc - int sk_forward_alloc; - __u32 sk_txhash; + int sk_forward_alloc; #ifdef CONFIG_NET_RX_BUSY_POLL - unsigned int sk_napi_id; unsigned int sk_ll_usec; + /* ===== mostly read cache line ===== */ + unsigned int sk_napi_id; #endif - atomic_t sk_drops; int sk_rcvbuf; struct sk_filter __rcu *sk_filter; @@ -379,11 +381,30 @@ struct sock { #endif struct dst_entry *sk_rx_dst; struct dst_entry __rcu *sk_dst_cache; - /* Note: 32bit hole on 64bit arches */ - atomic_t sk_wmem_alloc; atomic_t sk_omem_alloc; int sk_sndbuf; + + /* ===== cache line for TX ===== */ + int sk_wmem_queued; + atomic_t sk_wmem_alloc; + unsigned long sk_tsq_flags; + struct sk_buff *sk_send_head; struct sk_buff_head sk_write_queue; + __s32 sk_peek_off; + int sk_write_pending; + long sk_sndtimeo; + struct timer_list sk_timer; + __u32 sk_priority; + __u32 sk_mark; + u32 sk_pacing_rate; /* bytes per second */ + u32 sk_max_pacing_rate; + struct page_frag sk_frag; + netdev_features_t sk_route_caps; + netdev_features_t sk_route_nocaps; + int sk_gso_type; + unsigned int sk_gso_max_size; + gfp_t sk_allocation; + __u32 sk_txhash; /* * Because of non atomicity rules, all @@ -414,42 +435,24 @@ struct sock { #define SK_PROTOCOL_MAX U8_MAX kmemcheck_bitfield_end(flags); - int sk_wmem_queued; - gfp_t sk_allocation; - u32 sk_pacing_rate; /* bytes per second */ - u32 sk_max_pacing_rate; - netdev_features_t sk_route_caps; - netdev_features_t sk_route_nocaps; - int sk_gso_type; - unsigned int sk_gso_max_size; u16 sk_gso_max_segs; - int sk_rcvlowat; unsigned long sk_lingertime; - struct sk_buff_head sk_error_queue; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; int sk_err, sk_err_soft; u32 sk_ack_backlog; u32 sk_max_ack_backlog; - __u32 sk_priority; - __u32 sk_mark; kuid_t sk_uid; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; long sk_rcvtimeo; - long sk_sndtimeo; - struct timer_list sk_timer; ktime_t sk_stamp; u16 sk_tsflags; u8 sk_shutdown; u32 sk_tskey; struct socket *sk_socket; void *sk_user_data; - struct page_frag sk_frag; - struct sk_buff *sk_send_head; - __s32 sk_peek_off; - int sk_write_pending; #ifdef CONFIG_SECURITY void *sk_security; #endif From 7aa5470c2c09265902b5e4289afa82e4e7c2987e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 3 Dec 2016 11:14:57 -0800 Subject: [PATCH 8/8] tcp: tsq: move tsq_flags close to sk_wmem_alloc tsq_flags being in the same cache line than sk_wmem_alloc makes a lot of sense. Both fields are changed from tcp_wfree() and more generally by various TSQ related functions. Prior patch made room in struct sock and added sk_tsq_flags, this patch deletes tsq_flags from struct tcp_sock. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 - net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_output.c | 24 +++++++++++------------- net/ipv4/tcp_timer.c | 4 ++-- net/ipv6/tcp_ipv6.c | 2 +- 6 files changed, 17 insertions(+), 20 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index d8be083ab0b0..fc5848dad7a4 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -186,7 +186,6 @@ struct tcp_sock { u32 tsoffset; /* timestamp offset */ struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ - unsigned long tsq_flags; /* Data for direct copy to user */ struct { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1149b48700a1..1ef3165114ba 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -663,9 +663,9 @@ static void tcp_push(struct sock *sk, int flags, int mss_now, if (tcp_should_autocork(sk, skb, size_goal)) { /* avoid atomic op if TSQ_THROTTLED bit is already set */ - if (!test_bit(TSQ_THROTTLED, &tp->tsq_flags)) { + if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING); - set_bit(TSQ_THROTTLED, &tp->tsq_flags); + set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); } /* It is possible TX completion already happened * before we set TSQ_THROTTLED. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b50f05905ced..30d81f533ada 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -443,7 +443,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (!sock_owned_by_user(sk)) { tcp_v4_mtu_reduced(sk); } else { - if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags)) + if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags)) sock_hold(sk); } goto out; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5f04bee4c86a..b45101f3d2bd 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -767,14 +767,15 @@ static void tcp_tasklet_func(unsigned long data) list_for_each_safe(q, n, &list) { tp = list_entry(q, struct tcp_sock, tsq_node); list_del(&tp->tsq_node); - clear_bit(TSQ_QUEUED, &tp->tsq_flags); sk = (struct sock *)tp; + clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags); + if (!sk->sk_lock.owned && - test_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags)) { + test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) { bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { - clear_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags); + clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags); tcp_tsq_handler(sk); } bh_unlock_sock(sk); @@ -797,16 +798,15 @@ static void tcp_tasklet_func(unsigned long data) */ void tcp_release_cb(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); unsigned long flags, nflags; /* perform an atomic operation only if at least one flag is set */ do { - flags = tp->tsq_flags; + flags = sk->sk_tsq_flags; if (!(flags & TCP_DEFERRED_ALL)) return; nflags = flags & ~TCP_DEFERRED_ALL; - } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags); + } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags); if (flags & TCPF_TSQ_DEFERRED) tcp_tsq_handler(sk); @@ -878,7 +878,7 @@ void tcp_wfree(struct sk_buff *skb) if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) goto out; - for (oval = READ_ONCE(tp->tsq_flags);; oval = nval) { + for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) { struct tsq_tasklet *tsq; bool empty; @@ -886,7 +886,7 @@ void tcp_wfree(struct sk_buff *skb) goto out; nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED; - nval = cmpxchg(&tp->tsq_flags, oval, nval); + nval = cmpxchg(&sk->sk_tsq_flags, oval, nval); if (nval != oval) continue; @@ -2100,7 +2100,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, skb->prev == sk->sk_write_queue.next) return false; - set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags); + set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); /* It is possible TX completion already happened * before we set TSQ_THROTTLED, so we must * test again the condition. @@ -2241,8 +2241,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) break; - if (test_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags)) - clear_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags); + if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) + clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags); if (tcp_small_queue_check(sk, skb, 0)) break; @@ -3545,8 +3545,6 @@ void tcp_send_ack(struct sock *sk) /* We do not want pure acks influencing TCP Small Queues or fq/pacing * too much. * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784 - * We also avoid tcp_wfree() overhead (cache line miss accessing - * tp->tsq_flags) by using regular sock_wfree() */ skb_set_tcp_pure_ack(buff); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 3ea1cf804748..3705075f42c3 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -310,7 +310,7 @@ static void tcp_delack_timer(unsigned long data) inet_csk(sk)->icsk_ack.blocked = 1; __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); /* deleguate our work to tcp_release_cb() */ - if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) + if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags)) sock_hold(sk); } bh_unlock_sock(sk); @@ -592,7 +592,7 @@ static void tcp_write_timer(unsigned long data) tcp_write_timer_handler(sk); } else { /* delegate our work to tcp_release_cb() */ - if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) + if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags)) sock_hold(sk); } bh_unlock_sock(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a2185a214abc..73bc8fc68acd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -399,7 +399,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!sock_owned_by_user(sk)) tcp_v6_mtu_reduced(sk); else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, - &tp->tsq_flags)) + &sk->sk_tsq_flags)) sock_hold(sk); goto out; }