mirror of
https://github.com/torvalds/linux.git
synced 2026-05-28 17:13:52 +02:00
Merge branch 'reorganize-remaining-patch-of-networking-struct-cachelines'
Coco Li says: ==================== Reorganize remaining patch of networking struct cachelines Rebase patches to top-of-head in https://lwn.net/Articles/951321/ to ensure the results of the cacheline savings are still accurate. ==================== Link: https://lore.kernel.org/r/20231204201232.520025-1-lixiaoyan@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
faf4cf7495
|
|
@ -2097,6 +2097,70 @@ enum netdev_stat_type {
|
|||
*/
|
||||
|
||||
struct net_device {
|
||||
/* Cacheline organization can be found documented in
|
||||
* Documentation/networking/net_cachelines/net_device.rst.
|
||||
* Please update the document when adding new fields.
|
||||
*/
|
||||
|
||||
/* TX read-mostly hotpath */
|
||||
__cacheline_group_begin(net_device_read_tx);
|
||||
unsigned long long priv_flags;
|
||||
const struct net_device_ops *netdev_ops;
|
||||
const struct header_ops *header_ops;
|
||||
struct netdev_queue *_tx;
|
||||
unsigned int real_num_tx_queues;
|
||||
unsigned int gso_max_size;
|
||||
unsigned int gso_ipv4_max_size;
|
||||
u16 gso_max_segs;
|
||||
s16 num_tc;
|
||||
/* Note : dev->mtu is often read without holding a lock.
|
||||
* Writers usually hold RTNL.
|
||||
* It is recommended to use READ_ONCE() to annotate the reads,
|
||||
* and to use WRITE_ONCE() to annotate the writes.
|
||||
*/
|
||||
unsigned int mtu;
|
||||
unsigned short needed_headroom;
|
||||
struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
|
||||
#ifdef CONFIG_XPS
|
||||
struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
|
||||
#endif
|
||||
#ifdef CONFIG_NETFILTER_EGRESS
|
||||
struct nf_hook_entries __rcu *nf_hooks_egress;
|
||||
#endif
|
||||
#ifdef CONFIG_NET_XGRESS
|
||||
struct bpf_mprog_entry __rcu *tcx_egress;
|
||||
#endif
|
||||
__cacheline_group_end(net_device_read_tx);
|
||||
|
||||
/* TXRX read-mostly hotpath */
|
||||
__cacheline_group_begin(net_device_read_txrx);
|
||||
unsigned int flags;
|
||||
unsigned short hard_header_len;
|
||||
netdev_features_t features;
|
||||
struct inet6_dev __rcu *ip6_ptr;
|
||||
__cacheline_group_end(net_device_read_txrx);
|
||||
|
||||
/* RX read-mostly hotpath */
|
||||
__cacheline_group_begin(net_device_read_rx);
|
||||
struct list_head ptype_specific;
|
||||
int ifindex;
|
||||
unsigned int real_num_rx_queues;
|
||||
struct netdev_rx_queue *_rx;
|
||||
unsigned long gro_flush_timeout;
|
||||
int napi_defer_hard_irqs;
|
||||
unsigned int gro_max_size;
|
||||
unsigned int gro_ipv4_max_size;
|
||||
rx_handler_func_t __rcu *rx_handler;
|
||||
void __rcu *rx_handler_data;
|
||||
possible_net_t nd_net;
|
||||
#ifdef CONFIG_NETPOLL
|
||||
struct netpoll_info __rcu *npinfo;
|
||||
#endif
|
||||
#ifdef CONFIG_NET_XGRESS
|
||||
struct bpf_mprog_entry __rcu *tcx_ingress;
|
||||
#endif
|
||||
__cacheline_group_end(net_device_read_rx);
|
||||
|
||||
char name[IFNAMSIZ];
|
||||
struct netdev_name_node *name_node;
|
||||
struct dev_ifalias __rcu *ifalias;
|
||||
|
|
@ -2121,7 +2185,6 @@ struct net_device {
|
|||
struct list_head unreg_list;
|
||||
struct list_head close_list;
|
||||
struct list_head ptype_all;
|
||||
struct list_head ptype_specific;
|
||||
|
||||
struct {
|
||||
struct list_head upper;
|
||||
|
|
@ -2129,26 +2192,13 @@ struct net_device {
|
|||
} adj_list;
|
||||
|
||||
/* Read-mostly cache-line for fast-path access */
|
||||
unsigned int flags;
|
||||
xdp_features_t xdp_features;
|
||||
unsigned long long priv_flags;
|
||||
const struct net_device_ops *netdev_ops;
|
||||
const struct xdp_metadata_ops *xdp_metadata_ops;
|
||||
const struct xsk_tx_metadata_ops *xsk_tx_metadata_ops;
|
||||
int ifindex;
|
||||
unsigned short gflags;
|
||||
unsigned short hard_header_len;
|
||||
|
||||
/* Note : dev->mtu is often read without holding a lock.
|
||||
* Writers usually hold RTNL.
|
||||
* It is recommended to use READ_ONCE() to annotate the reads,
|
||||
* and to use WRITE_ONCE() to annotate the writes.
|
||||
*/
|
||||
unsigned int mtu;
|
||||
unsigned short needed_headroom;
|
||||
unsigned short needed_tailroom;
|
||||
|
||||
netdev_features_t features;
|
||||
netdev_features_t hw_features;
|
||||
netdev_features_t wanted_features;
|
||||
netdev_features_t vlan_features;
|
||||
|
|
@ -2192,8 +2242,6 @@ struct net_device {
|
|||
const struct tlsdev_ops *tlsdev_ops;
|
||||
#endif
|
||||
|
||||
const struct header_ops *header_ops;
|
||||
|
||||
unsigned char operstate;
|
||||
unsigned char link_mode;
|
||||
|
||||
|
|
@ -2234,9 +2282,7 @@ struct net_device {
|
|||
|
||||
|
||||
/* Protocol-specific pointers */
|
||||
|
||||
struct in_device __rcu *ip_ptr;
|
||||
struct inet6_dev __rcu *ip6_ptr;
|
||||
#if IS_ENABLED(CONFIG_VLAN_8021Q)
|
||||
struct vlan_info __rcu *vlan_info;
|
||||
#endif
|
||||
|
|
@ -2271,26 +2317,14 @@ struct net_device {
|
|||
/* Interface address info used in eth_type_trans() */
|
||||
const unsigned char *dev_addr;
|
||||
|
||||
struct netdev_rx_queue *_rx;
|
||||
unsigned int num_rx_queues;
|
||||
unsigned int real_num_rx_queues;
|
||||
|
||||
struct bpf_prog __rcu *xdp_prog;
|
||||
unsigned long gro_flush_timeout;
|
||||
int napi_defer_hard_irqs;
|
||||
#define GRO_LEGACY_MAX_SIZE 65536u
|
||||
/* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE),
|
||||
* and shinfo->gso_segs is a 16bit field.
|
||||
*/
|
||||
#define GRO_MAX_SIZE (8 * 65535u)
|
||||
unsigned int gro_max_size;
|
||||
unsigned int gro_ipv4_max_size;
|
||||
unsigned int xdp_zc_max_segs;
|
||||
rx_handler_func_t __rcu *rx_handler;
|
||||
void __rcu *rx_handler_data;
|
||||
#ifdef CONFIG_NET_XGRESS
|
||||
struct bpf_mprog_entry __rcu *tcx_ingress;
|
||||
#endif
|
||||
struct netdev_queue __rcu *ingress_queue;
|
||||
#ifdef CONFIG_NETFILTER_INGRESS
|
||||
struct nf_hook_entries __rcu *nf_hooks_ingress;
|
||||
|
|
@ -2305,25 +2339,13 @@ struct net_device {
|
|||
/*
|
||||
* Cache lines mostly used on transmit path
|
||||
*/
|
||||
struct netdev_queue *_tx ____cacheline_aligned_in_smp;
|
||||
unsigned int num_tx_queues;
|
||||
unsigned int real_num_tx_queues;
|
||||
struct Qdisc __rcu *qdisc;
|
||||
unsigned int tx_queue_len;
|
||||
spinlock_t tx_global_lock;
|
||||
|
||||
struct xdp_dev_bulk_queue __percpu *xdp_bulkq;
|
||||
|
||||
#ifdef CONFIG_XPS
|
||||
struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
|
||||
#endif
|
||||
#ifdef CONFIG_NET_XGRESS
|
||||
struct bpf_mprog_entry __rcu *tcx_egress;
|
||||
#endif
|
||||
#ifdef CONFIG_NETFILTER_EGRESS
|
||||
struct nf_hook_entries __rcu *nf_hooks_egress;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NET_SCHED
|
||||
DECLARE_HASHTABLE (qdisc_hash, 4);
|
||||
#endif
|
||||
|
|
@ -2362,12 +2384,6 @@ struct net_device {
|
|||
bool needs_free_netdev;
|
||||
void (*priv_destructor)(struct net_device *dev);
|
||||
|
||||
#ifdef CONFIG_NETPOLL
|
||||
struct netpoll_info __rcu *npinfo;
|
||||
#endif
|
||||
|
||||
possible_net_t nd_net;
|
||||
|
||||
/* mid-layer private */
|
||||
void *ml_priv;
|
||||
enum netdev_ml_priv_type ml_priv_type;
|
||||
|
|
@ -2402,20 +2418,15 @@ struct net_device {
|
|||
*/
|
||||
#define GSO_MAX_SIZE (8 * GSO_MAX_SEGS)
|
||||
|
||||
unsigned int gso_max_size;
|
||||
#define TSO_LEGACY_MAX_SIZE 65536
|
||||
#define TSO_MAX_SIZE UINT_MAX
|
||||
unsigned int tso_max_size;
|
||||
u16 gso_max_segs;
|
||||
#define TSO_MAX_SEGS U16_MAX
|
||||
u16 tso_max_segs;
|
||||
unsigned int gso_ipv4_max_size;
|
||||
|
||||
#ifdef CONFIG_DCB
|
||||
const struct dcbnl_rtnl_ops *dcbnl_ops;
|
||||
#endif
|
||||
s16 num_tc;
|
||||
struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
|
||||
u8 prio_tc_map[TC_BITMASK + 1];
|
||||
|
||||
#if IS_ENABLED(CONFIG_FCOE)
|
||||
|
|
|
|||
|
|
@ -194,36 +194,62 @@ static inline bool tcp_rsk_used_ao(const struct request_sock *req)
|
|||
#define TCP_RMEM_TO_WIN_SCALE 8
|
||||
|
||||
struct tcp_sock {
|
||||
/* Cacheline organization can be found documented in
|
||||
* Documentation/networking/net_cachelines/tcp_sock.rst.
|
||||
* Please update the document when adding new fields.
|
||||
*/
|
||||
|
||||
/* inet_connection_sock has to be the first member of tcp_sock */
|
||||
struct inet_connection_sock inet_conn;
|
||||
u16 tcp_header_len; /* Bytes of tcp header to send */
|
||||
|
||||
/* TX read-mostly hotpath cache lines */
|
||||
__cacheline_group_begin(tcp_sock_read_tx);
|
||||
/* timestamp of last sent data packet (for restart window) */
|
||||
u32 max_window; /* Maximal window ever seen from peer */
|
||||
u32 rcv_ssthresh; /* Current window clamp */
|
||||
u32 reordering; /* Packet reordering metric. */
|
||||
u32 notsent_lowat; /* TCP_NOTSENT_LOWAT */
|
||||
u16 gso_segs; /* Max number of segs per GSO packet */
|
||||
/* from STCP, retrans queue hinting */
|
||||
struct sk_buff *lost_skb_hint;
|
||||
struct sk_buff *retransmit_skb_hint;
|
||||
__cacheline_group_end(tcp_sock_read_tx);
|
||||
|
||||
/*
|
||||
* Header prediction flags
|
||||
* 0x5?10 << 16 + snd_wnd in net byte order
|
||||
*/
|
||||
__be32 pred_flags;
|
||||
/* TXRX read-mostly hotpath cache lines */
|
||||
__cacheline_group_begin(tcp_sock_read_txrx);
|
||||
u32 tsoffset; /* timestamp offset */
|
||||
u32 snd_wnd; /* The window we expect to receive */
|
||||
u32 mss_cache; /* Cached effective mss, not including SACKS */
|
||||
u32 snd_cwnd; /* Sending congestion window */
|
||||
u32 prr_out; /* Total number of pkts sent during Recovery. */
|
||||
u32 lost_out; /* Lost packets */
|
||||
u32 sacked_out; /* SACK'd packets */
|
||||
u16 tcp_header_len; /* Bytes of tcp header to send */
|
||||
u8 chrono_type : 2, /* current chronograph type */
|
||||
repair : 1,
|
||||
is_sack_reneg:1, /* in recovery from loss with SACK reneg? */
|
||||
is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
|
||||
__cacheline_group_end(tcp_sock_read_txrx);
|
||||
|
||||
/*
|
||||
* RFC793 variables by their proper names. This means you can
|
||||
* read the code and the spec side by side (and laugh ...)
|
||||
* See RFC793 and RFC1122. The RFC writes these in capitals.
|
||||
*/
|
||||
u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived
|
||||
* sum(delta(rcv_nxt)), or how many bytes
|
||||
* were acked.
|
||||
*/
|
||||
u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn
|
||||
* total number of segments in.
|
||||
*/
|
||||
u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn
|
||||
* total number of data segments in.
|
||||
*/
|
||||
u32 rcv_nxt; /* What we want to receive next */
|
||||
u32 copied_seq; /* Head of yet unread data */
|
||||
u32 rcv_wup; /* rcv_nxt on last window update sent */
|
||||
u32 snd_nxt; /* Next sequence we send */
|
||||
/* RX read-mostly hotpath cache lines */
|
||||
__cacheline_group_begin(tcp_sock_read_rx);
|
||||
u32 copied_seq; /* Head of yet unread data */
|
||||
u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
|
||||
u32 snd_wl1; /* Sequence for window update */
|
||||
u32 tlp_high_seq; /* snd_nxt at the time of TLP */
|
||||
u32 rttvar_us; /* smoothed mdev_max */
|
||||
u32 retrans_out; /* Retransmitted packets out */
|
||||
u16 advmss; /* Advertised MSS */
|
||||
u16 urg_data; /* Saved octet of OOB data and control flags */
|
||||
u32 lost; /* Total data packets lost incl. rexmits */
|
||||
struct minmax rtt_min;
|
||||
/* OOO segments go in this rbtree. Socket lock must be held. */
|
||||
struct rb_root out_of_order_queue;
|
||||
u32 snd_ssthresh; /* Slow start size threshold */
|
||||
__cacheline_group_end(tcp_sock_read_rx);
|
||||
|
||||
/* TX read-write hotpath cache lines */
|
||||
__cacheline_group_begin(tcp_sock_write_tx) ____cacheline_aligned;
|
||||
u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut
|
||||
* The total number of segments sent.
|
||||
*/
|
||||
|
|
@ -233,32 +259,103 @@ struct tcp_sock {
|
|||
u64 bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut
|
||||
* total number of data bytes sent.
|
||||
*/
|
||||
u32 snd_sml; /* Last byte of the most recently transmitted small packet */
|
||||
u32 chrono_start; /* Start time in jiffies of a TCP chrono */
|
||||
u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
|
||||
u32 write_seq; /* Tail(+1) of data held in tcp send buffer */
|
||||
u32 pushed_seq; /* Last pushed seq, required to talk to windows */
|
||||
u32 lsndtime;
|
||||
u32 mdev_us; /* medium deviation */
|
||||
u64 tcp_wstamp_ns; /* departure time for next sent data packet */
|
||||
u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */
|
||||
u64 tcp_mstamp; /* most recent packet received/sent */
|
||||
u32 rtt_seq; /* sequence number to update rttvar */
|
||||
struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */
|
||||
struct sk_buff *highest_sack; /* skb just after the highest
|
||||
* skb with SACKed bit set
|
||||
* (validity guaranteed only if
|
||||
* sacked_out > 0)
|
||||
*/
|
||||
u8 ecn_flags; /* ECN status bits. */
|
||||
__cacheline_group_end(tcp_sock_write_tx);
|
||||
|
||||
/* TXRX read-write hotpath cache lines */
|
||||
__cacheline_group_begin(tcp_sock_write_txrx);
|
||||
/*
|
||||
* Header prediction flags
|
||||
* 0x5?10 << 16 + snd_wnd in net byte order
|
||||
*/
|
||||
__be32 pred_flags;
|
||||
u32 rcv_nxt; /* What we want to receive next */
|
||||
u32 snd_nxt; /* Next sequence we send */
|
||||
u32 snd_una; /* First byte we want an ack for */
|
||||
u32 window_clamp; /* Maximal window to advertise */
|
||||
u32 srtt_us; /* smoothed round trip time << 3 in usecs */
|
||||
u32 packets_out; /* Packets which are "in flight" */
|
||||
u32 snd_up; /* Urgent pointer */
|
||||
u32 delivered; /* Total data packets delivered incl. rexmits */
|
||||
u32 delivered_ce; /* Like the above but only ECE marked packets */
|
||||
u32 app_limited; /* limited until "delivered" reaches this val */
|
||||
u32 rcv_wnd; /* Current receiver window */
|
||||
/*
|
||||
* Options received (usually on last packet, some only on SYN packets).
|
||||
*/
|
||||
struct tcp_options_received rx_opt;
|
||||
u8 nonagle : 4,/* Disable Nagle algorithm? */
|
||||
rate_app_limited:1; /* rate_{delivered,interval_us} limited? */
|
||||
__cacheline_group_end(tcp_sock_write_txrx);
|
||||
|
||||
/* RX read-write hotpath cache lines */
|
||||
__cacheline_group_begin(tcp_sock_write_rx);
|
||||
u64 bytes_received;
|
||||
/* RFC4898 tcpEStatsAppHCThruOctetsReceived
|
||||
* sum(delta(rcv_nxt)), or how many bytes
|
||||
* were acked.
|
||||
*/
|
||||
u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn
|
||||
* total number of segments in.
|
||||
*/
|
||||
u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn
|
||||
* total number of data segments in.
|
||||
*/
|
||||
u32 rcv_wup; /* rcv_nxt on last window update sent */
|
||||
u32 max_packets_out; /* max packets_out in last window */
|
||||
u32 cwnd_usage_seq; /* right edge of cwnd usage tracking flight */
|
||||
u32 rate_delivered; /* saved rate sample: packets delivered */
|
||||
u32 rate_interval_us; /* saved rate sample: time elapsed */
|
||||
u32 rcv_rtt_last_tsecr;
|
||||
u64 first_tx_mstamp; /* start of window send phase */
|
||||
u64 delivered_mstamp; /* time we reached "delivered" */
|
||||
u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
|
||||
* sum(delta(snd_una)), or how many bytes
|
||||
* were acked.
|
||||
*/
|
||||
struct {
|
||||
u32 rtt_us;
|
||||
u32 seq;
|
||||
u64 time;
|
||||
} rcv_rtt_est;
|
||||
/* Receiver queue space */
|
||||
struct {
|
||||
u32 space;
|
||||
u32 seq;
|
||||
u64 time;
|
||||
} rcvq_space;
|
||||
__cacheline_group_end(tcp_sock_write_rx);
|
||||
/* End of Hot Path */
|
||||
|
||||
/*
|
||||
* RFC793 variables by their proper names. This means you can
|
||||
* read the code and the spec side by side (and laugh ...)
|
||||
* See RFC793 and RFC1122. The RFC writes these in capitals.
|
||||
*/
|
||||
u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups
|
||||
* total number of DSACK blocks received
|
||||
*/
|
||||
u32 snd_una; /* First byte we want an ack for */
|
||||
u32 snd_sml; /* Last byte of the most recently transmitted small packet */
|
||||
u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
|
||||
u32 lsndtime; /* timestamp of last sent data packet (for restart window) */
|
||||
u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */
|
||||
u32 compressed_ack_rcv_nxt;
|
||||
|
||||
u32 tsoffset; /* timestamp offset */
|
||||
|
||||
struct list_head tsq_node; /* anchor in tsq_tasklet.head list */
|
||||
struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */
|
||||
|
||||
u32 snd_wl1; /* Sequence for window update */
|
||||
u32 snd_wnd; /* The window we expect to receive */
|
||||
u32 max_window; /* Maximal window ever seen from peer */
|
||||
u32 mss_cache; /* Cached effective mss, not including SACKS */
|
||||
|
||||
u32 window_clamp; /* Maximal window to advertise */
|
||||
u32 rcv_ssthresh; /* Current window clamp */
|
||||
u8 scaling_ratio; /* see tcp_win_from_space() */
|
||||
/* Information of the most recently (s)acked skb */
|
||||
struct tcp_rack {
|
||||
|
|
@ -272,24 +369,16 @@ struct tcp_sock {
|
|||
dsack_seen:1, /* Whether DSACK seen after last adj */
|
||||
advanced:1; /* mstamp advanced since last lost marking */
|
||||
} rack;
|
||||
u16 advmss; /* Advertised MSS */
|
||||
u8 compressed_ack;
|
||||
u8 dup_ack_counter:2,
|
||||
tlp_retrans:1, /* TLP is a retransmission */
|
||||
tcp_usec_ts:1, /* TSval values in usec */
|
||||
unused:4;
|
||||
u32 chrono_start; /* Start time in jiffies of a TCP chrono */
|
||||
u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
|
||||
u8 chrono_type:2, /* current chronograph type */
|
||||
rate_app_limited:1, /* rate_{delivered,interval_us} limited? */
|
||||
u8 thin_lto : 1,/* Use linear timeouts for thin streams */
|
||||
recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */
|
||||
fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
|
||||
fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
|
||||
is_sack_reneg:1, /* in recovery from loss with SACK reneg? */
|
||||
fastopen_client_fail:2; /* reason why fastopen failed */
|
||||
u8 nonagle : 4,/* Disable Nagle algorithm? */
|
||||
thin_lto : 1,/* Use linear timeouts for thin streams */
|
||||
recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */
|
||||
repair : 1,
|
||||
fastopen_client_fail:2, /* reason why fastopen failed */
|
||||
frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */
|
||||
u8 repair_queue;
|
||||
u8 save_syn:2, /* Save headers of SYN packet */
|
||||
|
|
@ -297,45 +386,19 @@ struct tcp_sock {
|
|||
syn_fastopen:1, /* SYN includes Fast Open option */
|
||||
syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
|
||||
syn_fastopen_ch:1, /* Active TFO re-enabling probe */
|
||||
syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
|
||||
is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
|
||||
u32 tlp_high_seq; /* snd_nxt at the time of TLP */
|
||||
syn_data_acked:1;/* data in SYN is acked by SYN-ACK */
|
||||
|
||||
u32 tcp_tx_delay; /* delay (in usec) added to TX packets */
|
||||
u64 tcp_wstamp_ns; /* departure time for next sent data packet */
|
||||
u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */
|
||||
|
||||
/* RTT measurement */
|
||||
u64 tcp_mstamp; /* most recent packet received/sent */
|
||||
u32 srtt_us; /* smoothed round trip time << 3 in usecs */
|
||||
u32 mdev_us; /* medium deviation */
|
||||
u32 mdev_max_us; /* maximal mdev for the last rtt period */
|
||||
u32 rttvar_us; /* smoothed mdev_max */
|
||||
u32 rtt_seq; /* sequence number to update rttvar */
|
||||
struct minmax rtt_min;
|
||||
|
||||
u32 packets_out; /* Packets which are "in flight" */
|
||||
u32 retrans_out; /* Retransmitted packets out */
|
||||
u32 max_packets_out; /* max packets_out in last window */
|
||||
u32 cwnd_usage_seq; /* right edge of cwnd usage tracking flight */
|
||||
|
||||
u16 urg_data; /* Saved octet of OOB data and control flags */
|
||||
u8 ecn_flags; /* ECN status bits. */
|
||||
u8 keepalive_probes; /* num of allowed keep alive probes */
|
||||
u32 reordering; /* Packet reordering metric. */
|
||||
u32 reord_seen; /* number of data packet reordering events */
|
||||
u32 snd_up; /* Urgent pointer */
|
||||
|
||||
/*
|
||||
* Options received (usually on last packet, some only on SYN packets).
|
||||
*/
|
||||
struct tcp_options_received rx_opt;
|
||||
|
||||
/*
|
||||
* Slow start and congestion control (see also Nagle, and Karn & Partridge)
|
||||
*/
|
||||
u32 snd_ssthresh; /* Slow start size threshold */
|
||||
u32 snd_cwnd; /* Sending congestion window */
|
||||
u32 snd_cwnd_cnt; /* Linear increase counter */
|
||||
u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
|
||||
u32 snd_cwnd_used;
|
||||
|
|
@ -343,32 +406,10 @@ struct tcp_sock {
|
|||
u32 prior_cwnd; /* cwnd right before starting loss recovery */
|
||||
u32 prr_delivered; /* Number of newly delivered packets to
|
||||
* receiver in Recovery. */
|
||||
u32 prr_out; /* Total number of pkts sent during Recovery. */
|
||||
u32 delivered; /* Total data packets delivered incl. rexmits */
|
||||
u32 delivered_ce; /* Like the above but only ECE marked packets */
|
||||
u32 lost; /* Total data packets lost incl. rexmits */
|
||||
u32 app_limited; /* limited until "delivered" reaches this val */
|
||||
u64 first_tx_mstamp; /* start of window send phase */
|
||||
u64 delivered_mstamp; /* time we reached "delivered" */
|
||||
u32 rate_delivered; /* saved rate sample: packets delivered */
|
||||
u32 rate_interval_us; /* saved rate sample: time elapsed */
|
||||
|
||||
u32 rcv_wnd; /* Current receiver window */
|
||||
u32 write_seq; /* Tail(+1) of data held in tcp send buffer */
|
||||
u32 notsent_lowat; /* TCP_NOTSENT_LOWAT */
|
||||
u32 pushed_seq; /* Last pushed seq, required to talk to windows */
|
||||
u32 lost_out; /* Lost packets */
|
||||
u32 sacked_out; /* SACK'd packets */
|
||||
|
||||
struct hrtimer pacing_timer;
|
||||
struct hrtimer compressed_ack_timer;
|
||||
|
||||
/* from STCP, retrans queue hinting */
|
||||
struct sk_buff* lost_skb_hint;
|
||||
struct sk_buff *retransmit_skb_hint;
|
||||
|
||||
/* OOO segments go in this rbtree. Socket lock must be held. */
|
||||
struct rb_root out_of_order_queue;
|
||||
struct sk_buff *ooo_last_skb; /* cache rb_last(out_of_order_queue) */
|
||||
|
||||
/* SACKs data, these 2 need to be together (see tcp_options_write) */
|
||||
|
|
@ -377,12 +418,6 @@ struct tcp_sock {
|
|||
|
||||
struct tcp_sack_block recv_sack_cache[4];
|
||||
|
||||
struct sk_buff *highest_sack; /* skb just after the highest
|
||||
* skb with SACKed bit set
|
||||
* (validity guaranteed only if
|
||||
* sacked_out > 0)
|
||||
*/
|
||||
|
||||
int lost_cnt_hint;
|
||||
|
||||
u32 prior_ssthresh; /* ssthresh saved at recovery start */
|
||||
|
|
@ -433,21 +468,6 @@ struct tcp_sock {
|
|||
|
||||
u32 rcv_ooopack; /* Received out-of-order packets, for tcpinfo */
|
||||
|
||||
/* Receiver side RTT estimation */
|
||||
u32 rcv_rtt_last_tsecr;
|
||||
struct {
|
||||
u32 rtt_us;
|
||||
u32 seq;
|
||||
u64 time;
|
||||
} rcv_rtt_est;
|
||||
|
||||
/* Receiver queue space */
|
||||
struct {
|
||||
u32 space;
|
||||
u32 seq;
|
||||
u64 time;
|
||||
} rcvq_space;
|
||||
|
||||
/* TCP-specific MTU probe information. */
|
||||
struct {
|
||||
u32 probe_seq_start;
|
||||
|
|
|
|||
|
|
@ -11609,6 +11609,60 @@ static struct pernet_operations __net_initdata default_device_ops = {
|
|||
.exit_batch = default_device_exit_batch,
|
||||
};
|
||||
|
||||
static void __init net_dev_struct_check(void)
|
||||
{
|
||||
/* TX read-mostly hotpath */
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, priv_flags);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, netdev_ops);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, header_ops);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, _tx);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, real_num_tx_queues);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_size);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_ipv4_max_size);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_segs);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, num_tc);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, mtu);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, needed_headroom);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tc_to_txq);
|
||||
#ifdef CONFIG_XPS
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, xps_maps);
|
||||
#endif
|
||||
#ifdef CONFIG_NETFILTER_EGRESS
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, nf_hooks_egress);
|
||||
#endif
|
||||
#ifdef CONFIG_NET_XGRESS
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tcx_egress);
|
||||
#endif
|
||||
CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 152);
|
||||
|
||||
/* TXRX read-mostly hotpath */
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, hard_header_len);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, features);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, ip6_ptr);
|
||||
CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 30);
|
||||
|
||||
/* RX read-mostly hotpath */
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ptype_specific);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ifindex);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, napi_defer_hard_irqs);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler_data);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, nd_net);
|
||||
#ifdef CONFIG_NETPOLL
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, npinfo);
|
||||
#endif
|
||||
#ifdef CONFIG_NET_XGRESS
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress);
|
||||
#endif
|
||||
CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 96);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the DEV module. At boot time this walks the device list and
|
||||
* unhooks any devices that fail to initialise (normally hardware not
|
||||
|
|
@ -11626,6 +11680,8 @@ static int __init net_dev_init(void)
|
|||
|
||||
BUG_ON(!dev_boot_phase);
|
||||
|
||||
net_dev_struct_check();
|
||||
|
||||
if (dev_proc_init())
|
||||
goto out;
|
||||
|
||||
|
|
|
|||
|
|
@ -4564,6 +4564,97 @@ static void __init tcp_init_mem(void)
|
|||
sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; /* 9.37 % */
|
||||
}
|
||||
|
||||
static void __init tcp_struct_check(void)
|
||||
{
|
||||
/* TX read-mostly hotpath cache lines */
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, max_window);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, rcv_ssthresh);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, reordering);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, notsent_lowat);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, gso_segs);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, lost_skb_hint);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, retransmit_skb_hint);
|
||||
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_tx, 40);
|
||||
|
||||
/* TXRX read-mostly hotpath cache lines */
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, tsoffset);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, snd_wnd);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, mss_cache);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, snd_cwnd);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, prr_out);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, lost_out);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, sacked_out);
|
||||
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 31);
|
||||
|
||||
/* RX read-mostly hotpath cache lines */
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, copied_seq);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rcv_tstamp);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_wl1);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, tlp_high_seq);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rttvar_us);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, retrans_out);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, advmss);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, urg_data);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, lost);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rtt_min);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, out_of_order_queue);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_ssthresh);
|
||||
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 69);
|
||||
|
||||
/* TX read-write hotpath cache lines */
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, segs_out);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, data_segs_out);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, bytes_sent);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, snd_sml);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, chrono_start);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, chrono_stat);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, write_seq);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, pushed_seq);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, lsndtime);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, mdev_us);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_wstamp_ns);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_clock_cache);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_mstamp);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, rtt_seq);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tsorted_sent_queue);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, highest_sack);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, ecn_flags);
|
||||
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 113);
|
||||
|
||||
/* TXRX read-write hotpath cache lines */
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, pred_flags);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_nxt);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_nxt);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_una);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, window_clamp);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, srtt_us);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, packets_out);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_up);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered_ce);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt);
|
||||
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 76);
|
||||
|
||||
/* RX read-write hotpath cache lines */
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, segs_in);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, data_segs_in);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_wup);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, max_packets_out);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, cwnd_usage_seq);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_delivered);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_interval_us);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_last_tsecr);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, first_tx_mstamp);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, delivered_mstamp);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_acked);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_est);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcvq_space);
|
||||
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_rx, 99);
|
||||
}
|
||||
|
||||
void __init tcp_init(void)
|
||||
{
|
||||
int max_rshare, max_wshare, cnt;
|
||||
|
|
@ -4574,6 +4665,8 @@ void __init tcp_init(void)
|
|||
BUILD_BUG_ON(sizeof(struct tcp_skb_cb) >
|
||||
sizeof_field(struct sk_buff, cb));
|
||||
|
||||
tcp_struct_check();
|
||||
|
||||
percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
|
||||
|
||||
timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE);
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user