Merge branch 'net-xdp-handle-frags-with-unreadable-memory'

Jakub Kicinski says:

====================
net: xdp: handle frags with unreadable memory

Make XDP helpers compatible with unreadable memory. This is very
similar to how we handle pfmemalloc frags today. Record the info
in xdp_buf flags as frags get added and then update the skb once
allocated.

This series adds the unreadable memory metadata tracking to drivers
using xdp_build_skb_from*() with no changes on the driver side - hence
the only driver changes here are refactoring. Obviously, unreadable memory
is incompatible with XDP today, but thanks to xdp_build_skb_from_buf()
increasing number of drivers have a unified datapath, whether XDP is
enabled or not.

RFC: https://lore.kernel.org/20250812161528.835855-1-kuba@kernel.org
====================

Link: https://patch.msgid.link/20250905221539.2930285-1-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Paolo Abeni 2025-09-11 12:00:25 +02:00
commit 7f0b763b81
8 changed files with 69 additions and 64 deletions

View File

@ -468,9 +468,8 @@ bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags,
if (!skb)
return NULL;
xdp_update_skb_shared_info(skb, num_frags,
sinfo->xdp_frags_size,
BNXT_RX_PAGE_SIZE * num_frags,
xdp_buff_is_frag_pfmemalloc(xdp));
xdp_update_skb_frags_info(skb, num_frags, sinfo->xdp_frags_size,
BNXT_RX_PAGE_SIZE * num_frags,
xdp_buff_get_skb_flags(xdp));
return skb;
}

View File

@ -2151,10 +2151,10 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0],
sizeof(skb_frag_t) * nr_frags);
xdp_update_skb_shared_info(skb, skinfo->nr_frags + nr_frags,
sinfo->xdp_frags_size,
nr_frags * xdp->frame_sz,
xdp_buff_is_frag_pfmemalloc(xdp));
xdp_update_skb_frags_info(skb, skinfo->nr_frags + nr_frags,
sinfo->xdp_frags_size,
nr_frags * xdp->frame_sz,
xdp_buff_get_skb_flags(xdp));
/* First buffer has already been processed, so bump ntc */
if (++rx_ring->next_to_clean == rx_ring->count)
@ -2206,10 +2206,9 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
skb_metadata_set(skb, metasize);
if (unlikely(xdp_buff_has_frags(xdp))) {
xdp_update_skb_shared_info(skb, nr_frags,
sinfo->xdp_frags_size,
nr_frags * xdp->frame_sz,
xdp_buff_is_frag_pfmemalloc(xdp));
xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size,
nr_frags * xdp->frame_sz,
xdp_buff_get_skb_flags(xdp));
i40e_process_rx_buffs(rx_ring, I40E_XDP_PASS, xdp);
} else {

View File

@ -1035,10 +1035,9 @@ ice_build_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
skb_metadata_set(skb, metasize);
if (unlikely(xdp_buff_has_frags(xdp)))
xdp_update_skb_shared_info(skb, nr_frags,
sinfo->xdp_frags_size,
nr_frags * xdp->frame_sz,
xdp_buff_is_frag_pfmemalloc(xdp));
xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size,
nr_frags * xdp->frame_sz,
xdp_buff_get_skb_flags(xdp));
return skb;
}
@ -1115,10 +1114,10 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0],
sizeof(skb_frag_t) * nr_frags);
xdp_update_skb_shared_info(skb, skinfo->nr_frags + nr_frags,
sinfo->xdp_frags_size,
nr_frags * xdp->frame_sz,
xdp_buff_is_frag_pfmemalloc(xdp));
xdp_update_skb_frags_info(skb, skinfo->nr_frags + nr_frags,
sinfo->xdp_frags_size,
nr_frags * xdp->frame_sz,
xdp_buff_get_skb_flags(xdp));
}
return skb;

View File

@ -2416,10 +2416,9 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
skb->ip_summed = mvneta_rx_csum(pp, desc_status);
if (unlikely(xdp_buff_has_frags(xdp)))
xdp_update_skb_shared_info(skb, num_frags,
sinfo->xdp_frags_size,
num_frags * xdp->frame_sz,
xdp_buff_is_frag_pfmemalloc(xdp));
xdp_update_skb_frags_info(skb, num_frags, sinfo->xdp_frags_size,
num_frags * xdp->frame_sz,
xdp_buff_get_skb_flags(xdp));
return skb;
}

View File

@ -1796,10 +1796,9 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
if (xdp_buff_has_frags(&mxbuf->xdp)) {
/* sinfo->nr_frags is reset by build_skb, calculate again. */
xdp_update_skb_shared_info(skb, wi - head_wi - 1,
sinfo->xdp_frags_size, truesize,
xdp_buff_is_frag_pfmemalloc(
&mxbuf->xdp));
xdp_update_skb_frags_info(skb, wi - head_wi - 1,
sinfo->xdp_frags_size, truesize,
xdp_buff_get_skb_flags(&mxbuf->xdp));
for (struct mlx5e_wqe_frag_info *pwi = head_wi + 1; pwi < wi; pwi++)
pwi->frag_page->frags++;
@ -2105,10 +2104,10 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
struct mlx5e_frag_page *pagep;
/* sinfo->nr_frags is reset by build_skb, calculate again. */
xdp_update_skb_shared_info(skb, frag_page - head_page,
sinfo->xdp_frags_size, truesize,
xdp_buff_is_frag_pfmemalloc(
&mxbuf->xdp));
xdp_update_skb_frags_info(skb, frag_page - head_page,
sinfo->xdp_frags_size,
truesize,
xdp_buff_get_skb_flags(&mxbuf->xdp));
pagep = head_page;
do
@ -2122,10 +2121,10 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
if (xdp_buff_has_frags(&mxbuf->xdp)) {
struct mlx5e_frag_page *pagep;
xdp_update_skb_shared_info(skb, sinfo->nr_frags,
sinfo->xdp_frags_size, truesize,
xdp_buff_is_frag_pfmemalloc(
&mxbuf->xdp));
xdp_update_skb_frags_info(skb, sinfo->nr_frags,
sinfo->xdp_frags_size,
truesize,
xdp_buff_get_skb_flags(&mxbuf->xdp));
pagep = frag_page - sinfo->nr_frags;
do

View File

@ -2185,10 +2185,9 @@ static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev,
skb_metadata_set(skb, metasize);
if (unlikely(xdp_buff_has_frags(xdp)))
xdp_update_skb_shared_info(skb, nr_frags,
sinfo->xdp_frags_size,
xdp_frags_truesz,
xdp_buff_is_frag_pfmemalloc(xdp));
xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size,
xdp_frags_truesz,
xdp_buff_get_skb_flags(xdp));
return skb;
}

View File

@ -76,6 +76,11 @@ enum xdp_buff_flags {
XDP_FLAGS_FRAGS_PF_MEMALLOC = BIT(1), /* xdp paged memory is under
* pressure
*/
/* frags have unreadable mem, this can't be true for real XDP packets,
* but drivers may use XDP helpers to construct Rx pkt state even when
* XDP program is not attached.
*/
XDP_FLAGS_FRAGS_UNREADABLE = BIT(2),
};
struct xdp_buff {
@ -116,17 +121,21 @@ static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp)
xdp->flags &= ~XDP_FLAGS_HAS_FRAGS;
}
static __always_inline bool
xdp_buff_is_frag_pfmemalloc(const struct xdp_buff *xdp)
{
return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
}
static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp)
{
xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC;
}
static __always_inline void xdp_buff_set_frag_unreadable(struct xdp_buff *xdp)
{
xdp->flags |= XDP_FLAGS_FRAGS_UNREADABLE;
}
static __always_inline u32 xdp_buff_get_skb_flags(const struct xdp_buff *xdp)
{
return xdp->flags;
}
static __always_inline void
xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq)
{
@ -271,6 +280,8 @@ static inline bool xdp_buff_add_frag(struct xdp_buff *xdp, netmem_ref netmem,
if (unlikely(netmem_is_pfmemalloc(netmem)))
xdp_buff_set_frag_pfmemalloc(xdp);
if (unlikely(netmem_is_net_iov(netmem)))
xdp_buff_set_frag_unreadable(xdp);
return true;
}
@ -294,10 +305,10 @@ static __always_inline bool xdp_frame_has_frags(const struct xdp_frame *frame)
return !!(frame->flags & XDP_FLAGS_HAS_FRAGS);
}
static __always_inline bool
xdp_frame_is_frag_pfmemalloc(const struct xdp_frame *frame)
static __always_inline u32
xdp_frame_get_skb_flags(const struct xdp_frame *frame)
{
return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
return frame->flags;
}
#define XDP_BULK_QUEUE_SIZE 16
@ -334,9 +345,9 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame)
}
static inline void
xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
unsigned int size, unsigned int truesize,
bool pfmemalloc)
xdp_update_skb_frags_info(struct sk_buff *skb, u8 nr_frags,
unsigned int size, unsigned int truesize,
u32 xdp_flags)
{
struct skb_shared_info *sinfo = skb_shinfo(skb);
@ -350,7 +361,8 @@ xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
skb->len += size;
skb->data_len += size;
skb->truesize += truesize;
skb->pfmemalloc |= pfmemalloc;
skb->pfmemalloc |= !!(xdp_flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
skb->unreadable |= !!(xdp_flags & XDP_FLAGS_FRAGS_UNREADABLE);
}
/* Avoids inlining WARN macro in fast-path */

View File

@ -663,9 +663,8 @@ struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp)
u32 tsize;
tsize = sinfo->xdp_frags_truesize ? : nr_frags * xdp->frame_sz;
xdp_update_skb_shared_info(skb, nr_frags,
sinfo->xdp_frags_size, tsize,
xdp_buff_is_frag_pfmemalloc(xdp));
xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size,
tsize, xdp_buff_get_skb_flags(xdp));
}
skb->protocol = eth_type_trans(skb, rxq->dev);
@ -692,7 +691,7 @@ static noinline bool xdp_copy_frags_from_zc(struct sk_buff *skb,
struct skb_shared_info *sinfo = skb_shinfo(skb);
const struct skb_shared_info *xinfo;
u32 nr_frags, tsize = 0;
bool pfmemalloc = false;
u32 flags = 0;
xinfo = xdp_get_shared_info_from_buff(xdp);
nr_frags = xinfo->nr_frags;
@ -714,11 +713,12 @@ static noinline bool xdp_copy_frags_from_zc(struct sk_buff *skb,
__skb_fill_page_desc_noacc(sinfo, i, page, offset, len);
tsize += truesize;
pfmemalloc |= page_is_pfmemalloc(page);
if (page_is_pfmemalloc(page))
flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC;
}
xdp_update_skb_shared_info(skb, nr_frags, xinfo->xdp_frags_size,
tsize, pfmemalloc);
xdp_update_skb_frags_info(skb, nr_frags, xinfo->xdp_frags_size, tsize,
flags);
return true;
}
@ -823,10 +823,9 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
skb_metadata_set(skb, xdpf->metasize);
if (unlikely(xdp_frame_has_frags(xdpf)))
xdp_update_skb_shared_info(skb, nr_frags,
sinfo->xdp_frags_size,
nr_frags * xdpf->frame_sz,
xdp_frame_is_frag_pfmemalloc(xdpf));
xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size,
nr_frags * xdpf->frame_sz,
xdp_frame_get_skb_flags(xdpf));
/* Essential SKB info: protocol and skb->dev */
skb->protocol = eth_type_trans(skb, dev);