mirror of
https://github.com/torvalds/linux.git
synced 2026-06-27 00:24:51 +02:00
Some networks can make sure TCP payload can exactly fit 4KB pages, with well chosen MSS/MTU and architectures. Implement mmap() system call so that applications can avoid copying data without complex splice() games. Note that a successful mmap( X bytes) on TCP socket is consuming bytes, as if recvmsg() has been done. (tp->copied += X) Only PROT_READ mappings are accepted, as skb page frags are fundamentally shared and read only. If tcp_mmap() finds data that is not a full page, or a patch of urgent data, -EINVAL is returned, no bytes are consumed. Application must fallback to recvmsg() to read the problematic sequence. mmap() wont block, regardless of socket being in blocking or non-blocking mode. If not enough bytes are in receive queue, mmap() would return -EAGAIN, or -EIO if socket is in a state where no other bytes can be added into receive queue. An application might use SO_RCVLOWAT, poll() and/or ioctl( FIONREAD) to efficiently use mmap() On the sender side, MSG_EOR might help to clearly separate unaligned headers and 4K-aligned chunks if necessary. Tested: mlx4 (cx-3) 40Gbit NIC, with tcp_mmap program provided in following patch. MTU set to 4168 (4096 TCP payload, 40 bytes IPv6 header, 32 bytes TCP header) Without mmap() (tcp_mmap -s) received 32768 MB (0 % mmap'ed) in 8.13342 s, 33.7961 Gbit, cpu usage user:0.034 sys:3.778, 116.333 usec per MB, 63062 c-switches received 32768 MB (0 % mmap'ed) in 8.14501 s, 33.748 Gbit, cpu usage user:0.029 sys:3.997, 122.864 usec per MB, 61903 c-switches received 32768 MB (0 % mmap'ed) in 8.11723 s, 33.8635 Gbit, cpu usage user:0.048 sys:3.964, 122.437 usec per MB, 62983 c-switches received 32768 MB (0 % mmap'ed) in 8.39189 s, 32.7552 Gbit, cpu usage user:0.038 sys:4.181, 128.754 usec per MB, 55834 c-switches With mmap() on receiver (tcp_mmap -s -z) received 32768 MB (100 % mmap'ed) in 8.03083 s, 34.2278 Gbit, cpu usage user:0.024 sys:1.466, 45.4712 usec per MB, 65479 c-switches received 32768 MB (100 % mmap'ed) in 7.98805 s, 34.4111 Gbit, cpu usage user:0.026 sys:1.401, 43.5486 usec per MB, 65447 c-switches received 32768 MB (100 % mmap'ed) in 7.98377 s, 34.4296 Gbit, cpu usage user:0.028 sys:1.452, 45.166 usec per MB, 65496 c-switches received 32768 MB (99.9969 % mmap'ed) in 8.01838 s, 34.281 Gbit, cpu usage user:0.02 sys:1.446, 44.7388 usec per MB, 65505 c-switches Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> |
||
|---|---|---|
| .. | ||
| 9p | ||
| bluetooth | ||
| caif | ||
| iucv | ||
| netfilter | ||
| netns | ||
| nfc | ||
| phonet | ||
| sctp | ||
| tc_act | ||
| 6lowpan.h | ||
| act_api.h | ||
| addrconf.h | ||
| af_ieee802154.h | ||
| af_rxrpc.h | ||
| af_unix.h | ||
| af_vsock.h | ||
| ah.h | ||
| arp.h | ||
| atmclip.h | ||
| ax25.h | ||
| ax88796.h | ||
| bond_3ad.h | ||
| bond_alb.h | ||
| bond_options.h | ||
| bonding.h | ||
| busy_poll.h | ||
| calipso.h | ||
| cfg80211-wext.h | ||
| cfg80211.h | ||
| cfg802154.h | ||
| checksum.h | ||
| cipso_ipv4.h | ||
| cls_cgroup.h | ||
| codel_impl.h | ||
| codel_qdisc.h | ||
| codel.h | ||
| compat.h | ||
| datalink.h | ||
| dcbevent.h | ||
| dcbnl.h | ||
| devlink.h | ||
| dn_dev.h | ||
| dn_fib.h | ||
| dn_neigh.h | ||
| dn_nsp.h | ||
| dn_route.h | ||
| dn.h | ||
| dsa.h | ||
| dsfield.h | ||
| dst_cache.h | ||
| dst_metadata.h | ||
| dst_ops.h | ||
| dst.h | ||
| erspan.h | ||
| esp.h | ||
| ethoc.h | ||
| fib_notifier.h | ||
| fib_rules.h | ||
| firewire.h | ||
| flow_dissector.h | ||
| flow.h | ||
| fou.h | ||
| fq_impl.h | ||
| fq.h | ||
| garp.h | ||
| gen_stats.h | ||
| genetlink.h | ||
| geneve.h | ||
| gre.h | ||
| gro_cells.h | ||
| gtp.h | ||
| gue.h | ||
| hwbm.h | ||
| icmp.h | ||
| ieee80211_radiotap.h | ||
| ieee802154_netdev.h | ||
| if_inet6.h | ||
| ife.h | ||
| ila.h | ||
| inet_common.h | ||
| inet_connection_sock.h | ||
| inet_ecn.h | ||
| inet_frag.h | ||
| inet_hashtables.h | ||
| inet_sock.h | ||
| inet_timewait_sock.h | ||
| inet6_connection_sock.h | ||
| inet6_hashtables.h | ||
| inetpeer.h | ||
| ip_fib.h | ||
| ip_tunnels.h | ||
| ip_vs.h | ||
| ip.h | ||
| ip6_checksum.h | ||
| ip6_fib.h | ||
| ip6_route.h | ||
| ip6_tunnel.h | ||
| ipcomp.h | ||
| ipconfig.h | ||
| ipv6.h | ||
| ipx.h | ||
| iw_handler.h | ||
| kcm.h | ||
| l3mdev.h | ||
| lapb.h | ||
| lib80211.h | ||
| llc_c_ac.h | ||
| llc_c_ev.h | ||
| llc_c_st.h | ||
| llc_conn.h | ||
| llc_if.h | ||
| llc_pdu.h | ||
| llc_s_ac.h | ||
| llc_s_ev.h | ||
| llc_s_st.h | ||
| llc_sap.h | ||
| llc.h | ||
| lwtunnel.h | ||
| mac80211.h | ||
| mac802154.h | ||
| mip6.h | ||
| mld.h | ||
| mpls_iptunnel.h | ||
| mpls.h | ||
| mrp.h | ||
| ncsi.h | ||
| ndisc.h | ||
| neighbour.h | ||
| net_namespace.h | ||
| net_ratelimit.h | ||
| netevent.h | ||
| netlabel.h | ||
| netlink.h | ||
| netprio_cgroup.h | ||
| netrom.h | ||
| nexthop.h | ||
| nl802154.h | ||
| nsh.h | ||
| p8022.h | ||
| ping.h | ||
| pkt_cls.h | ||
| pkt_sched.h | ||
| pptp.h | ||
| protocol.h | ||
| psample.h | ||
| psnap.h | ||
| raw.h | ||
| rawv6.h | ||
| red.h | ||
| regulatory.h | ||
| request_sock.h | ||
| rose.h | ||
| route.h | ||
| rsi_91x.h | ||
| rtnetlink.h | ||
| sch_generic.h | ||
| scm.h | ||
| secure_seq.h | ||
| seg6_hmac.h | ||
| seg6.h | ||
| slhc_vj.h | ||
| smc.h | ||
| snmp.h | ||
| sock_reuseport.h | ||
| sock.h | ||
| Space.h | ||
| stp.h | ||
| strparser.h | ||
| switchdev.h | ||
| tcp_states.h | ||
| tcp.h | ||
| timewait_sock.h | ||
| tipc.h | ||
| tls.h | ||
| transp_v6.h | ||
| tso.h | ||
| tun_proto.h | ||
| udp_tunnel.h | ||
| udp.h | ||
| udplite.h | ||
| vsock_addr.h | ||
| vxlan.h | ||
| wext.h | ||
| wimax.h | ||
| x25.h | ||
| x25device.h | ||
| xdp.h | ||
| xfrm.h | ||