mirror of
https://github.com/torvalds/linux.git
synced 2026-05-30 10:04:04 +02:00
libeth: xdp, xsk: access adjacent u32s as u64 where applicable
On 64-bit systems, writing/reading one u64 is faster than two u32s even when they're are adjacent in a struct. The compilers won't guarantee they will combine those; I observed both successful and unsuccessful attempts with both GCC and Clang, and it's not easy to say what it depends on. There's a few places in libeth_xdp winning up to several percent from combined access (both performance and object code size, especially when unrolling). Add __LIBETH_WORD_ACCESS and use it there on LE. Drivers are free to optimize HW-specific callbacks under the same definition. Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
This commit is contained in:
parent
3ced71a8b3
commit
80bae9df21
|
|
@ -475,6 +475,21 @@ struct libeth_xdp_tx_desc {
|
|||
((const void *)(uintptr_t)(priv)); \
|
||||
})
|
||||
|
||||
/*
|
||||
* On 64-bit systems, assigning one u64 is faster than two u32s. When ::len
|
||||
* occupies lowest 32 bits (LE), whole ::opts can be assigned directly instead.
|
||||
*/
|
||||
#ifdef __LITTLE_ENDIAN
|
||||
#define __LIBETH_WORD_ACCESS 1
|
||||
#endif
|
||||
#ifdef __LIBETH_WORD_ACCESS
|
||||
#define __libeth_xdp_tx_len(flen, ...) \
|
||||
.opts = ((flen) | FIELD_PREP(GENMASK_ULL(63, 32), (__VA_ARGS__ + 0)))
|
||||
#else
|
||||
#define __libeth_xdp_tx_len(flen, ...) \
|
||||
.len = (flen), .flags = (__VA_ARGS__ + 0)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* libeth_xdp_tx_xmit_bulk - main XDP Tx function
|
||||
* @bulk: array of frames to send
|
||||
|
|
@ -870,8 +885,7 @@ static inline u32 libeth_xdp_xmit_queue_head(struct libeth_xdp_tx_bulk *bq,
|
|||
|
||||
bq->bulk[bq->count++] = (typeof(*bq->bulk)){
|
||||
.xdpf = xdpf,
|
||||
.len = xdpf->len,
|
||||
.flags = LIBETH_XDP_TX_FIRST,
|
||||
__libeth_xdp_tx_len(xdpf->len, LIBETH_XDP_TX_FIRST),
|
||||
};
|
||||
|
||||
if (!xdp_frame_has_frags(xdpf))
|
||||
|
|
@ -902,7 +916,7 @@ static inline bool libeth_xdp_xmit_queue_frag(struct libeth_xdp_tx_bulk *bq,
|
|||
|
||||
bq->bulk[bq->count++] = (typeof(*bq->bulk)){
|
||||
.dma = dma,
|
||||
.len = skb_frag_size(frag),
|
||||
__libeth_xdp_tx_len(skb_frag_size(frag)),
|
||||
};
|
||||
|
||||
return true;
|
||||
|
|
@ -1260,6 +1274,7 @@ bool libeth_xdp_buff_add_frag(struct libeth_xdp_buff *xdp,
|
|||
* Internal, use libeth_xdp_process_buff() instead. Initializes XDP buffer
|
||||
* head with the Rx buffer data: data pointer, length, headroom, and
|
||||
* truesize/tailroom. Zeroes the flags.
|
||||
* Uses faster single u64 write instead of per-field access.
|
||||
*/
|
||||
static inline void libeth_xdp_prepare_buff(struct libeth_xdp_buff *xdp,
|
||||
const struct libeth_fqe *fqe,
|
||||
|
|
@ -1267,7 +1282,15 @@ static inline void libeth_xdp_prepare_buff(struct libeth_xdp_buff *xdp,
|
|||
{
|
||||
const struct page *page = __netmem_to_page(fqe->netmem);
|
||||
|
||||
#ifdef __LIBETH_WORD_ACCESS
|
||||
static_assert(offsetofend(typeof(xdp->base), flags) -
|
||||
offsetof(typeof(xdp->base), frame_sz) ==
|
||||
sizeof(u64));
|
||||
|
||||
*(u64 *)&xdp->base.frame_sz = fqe->truesize;
|
||||
#else
|
||||
xdp_init_buff(&xdp->base, fqe->truesize, xdp->base.rxq);
|
||||
#endif
|
||||
xdp_prepare_buff(&xdp->base, page_address(page) + fqe->offset,
|
||||
page->pp->p.offset, len, true);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,8 +26,8 @@ static inline bool libeth_xsk_tx_queue_head(struct libeth_xdp_tx_bulk *bq,
|
|||
{
|
||||
bq->bulk[bq->count++] = (typeof(*bq->bulk)){
|
||||
.xsk = xdp,
|
||||
.len = xdp->base.data_end - xdp->data,
|
||||
.flags = LIBETH_XDP_TX_FIRST,
|
||||
__libeth_xdp_tx_len(xdp->base.data_end - xdp->data,
|
||||
LIBETH_XDP_TX_FIRST),
|
||||
};
|
||||
|
||||
if (likely(!xdp_buff_has_frags(&xdp->base)))
|
||||
|
|
@ -48,7 +48,7 @@ static inline void libeth_xsk_tx_queue_frag(struct libeth_xdp_tx_bulk *bq,
|
|||
{
|
||||
bq->bulk[bq->count++] = (typeof(*bq->bulk)){
|
||||
.xsk = frag,
|
||||
.len = frag->base.data_end - frag->data,
|
||||
__libeth_xdp_tx_len(frag->base.data_end - frag->data),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -199,7 +199,7 @@ __libeth_xsk_xmit_fill_buf_md(const struct xdp_desc *xdesc,
|
|||
ctx = xsk_buff_raw_get_ctx(sq->pool, xdesc->addr);
|
||||
desc = (typeof(desc)){
|
||||
.addr = ctx.dma,
|
||||
.len = xdesc->len,
|
||||
__libeth_xdp_tx_len(xdesc->len),
|
||||
};
|
||||
|
||||
BUILD_BUG_ON(!__builtin_constant_p(tmo == libeth_xsktmo));
|
||||
|
|
@ -226,7 +226,7 @@ __libeth_xsk_xmit_fill_buf(const struct xdp_desc *xdesc,
|
|||
{
|
||||
return (struct libeth_xdp_tx_desc){
|
||||
.addr = xsk_buff_raw_get_dma(sq->pool, xdesc->addr),
|
||||
.len = xdesc->len,
|
||||
__libeth_xdp_tx_len(xdesc->len),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user