mirror of
https://github.com/torvalds/linux.git
synced 2026-06-08 14:42:37 +02:00
tcp: allow splice() to build full TSO packets
[ This combines upstream commit2f53384424and the follow-on bug fix commit35f9c09fe9] vmsplice()/splice(pipe, socket) call do_tcp_sendpages() one page at a time, adding at most 4096 bytes to an skb. (assuming PAGE_SIZE=4096) The call to tcp_push() at the end of do_tcp_sendpages() forces an immediate xmit when pipe is not already filled, and tso_fragment() try to split these skb to MSS multiples. 4096 bytes are usually split in a skb with 2 MSS, and a remaining sub-mss skb (assuming MTU=1500) This makes slow start suboptimal because many small frames are sent to qdisc/driver layers instead of big ones (constrained by cwnd and packets in flight of course) In fact, applications using sendmsg() (adding an additional memory copy) instead of vmsplice()/splice()/sendfile() are a bit faster because of this anomaly, especially if serving small files in environments with large initial [c]wnd. Call tcp_push() only if MSG_MORE is not set in the flags parameter. This bit is automatically provided by splice() internals but for the last page, or on all pages if user specified SPLICE_F_MORE splice() flag. In some workloads, this can reduce number of sent logical packets by an order of magnitude, making zero-copy TCP actually faster than one-copy :) Reported-by: Tom Herbert <therbert@google.com> Cc: Nandita Dukkipati <nanditad@google.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Tom Herbert <therbert@google.com> Cc: Yuchung Cheng <ycheng@google.com> Cc: H.K. Jerry Chu <hkchu@google.com> Cc: Maciej Żenczykowski <maze@google.com> Cc: Mahesh Bandewar <maheshb@google.com> Cc: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
d2491ed1e1
commit
8d2228dd95
|
|
@ -31,6 +31,7 @@
|
|||
#include <linux/uio.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/socket.h>
|
||||
|
||||
/*
|
||||
* Attempt to steal a page from a pipe buffer. This should perhaps go into
|
||||
|
|
@ -691,7 +692,9 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
|
|||
if (!likely(file->f_op && file->f_op->sendpage))
|
||||
return -EINVAL;
|
||||
|
||||
more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
|
||||
more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
|
||||
if (sd->len < sd->total_len)
|
||||
more |= MSG_SENDPAGE_NOTLAST;
|
||||
return file->f_op->sendpage(file, buf->page, buf->offset,
|
||||
sd->len, &pos, more);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -261,7 +261,7 @@ struct ucred {
|
|||
#define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */
|
||||
#define MSG_MORE 0x8000 /* Sender will send more */
|
||||
#define MSG_WAITFORONE 0x10000 /* recvmmsg(): block until 1+ packets avail */
|
||||
|
||||
#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
|
||||
#define MSG_EOF MSG_FIN
|
||||
|
||||
#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exit for file
|
||||
|
|
|
|||
|
|
@ -860,7 +860,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
|
|||
}
|
||||
|
||||
out:
|
||||
if (copied)
|
||||
if (copied && !(flags & MSG_SENDPAGE_NOTLAST))
|
||||
tcp_push(sk, flags, mss_now, tp->nonagle);
|
||||
return copied;
|
||||
|
||||
|
|
|
|||
|
|
@ -791,9 +791,9 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
|
|||
|
||||
sock = file->private_data;
|
||||
|
||||
flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
|
||||
if (more)
|
||||
flags |= MSG_MORE;
|
||||
flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
|
||||
/* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
|
||||
flags |= more;
|
||||
|
||||
return kernel_sendpage(sock, page, offset, size, flags);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user