Merge branch 'gve-af_xdp-zero-copy-for-dqo-rda'

Joshua Washington says:

====================
gve: AF_XDP zero-copy for DQO RDA

This patch series adds support for AF_XDP zero-copy in the DQO RDA queue
format.

XSK infrastructure is updated to re-post buffers when adding XSK pools
because XSK umem will be posted directly to the NIC, a departure from
the bounce buffer model used in GQI QPL. A registry of XSK pools is
introduced to prevent the usage of XSK pools when in copy mode.

v1: https://lore.kernel.org/netdev/20250714160451.124671-1-jeroendb@google.com/
====================

Link: https://patch.msgid.link/20250717152839.973004-1-jeroendb@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Paolo Abeni 2025-07-22 11:35:51 +02:00
commit cdb794002d
6 changed files with 423 additions and 101 deletions

View File

@ -190,6 +190,9 @@ struct gve_rx_buf_state_dqo {
/* The page posted to HW. */
struct gve_rx_slot_page_info page_info;
/* XSK buffer */
struct xdp_buff *xsk_buff;
/* The DMA address corresponding to `page_info`. */
dma_addr_t addr;
@ -331,7 +334,6 @@ struct gve_rx_ring {
/* XDP stuff */
struct xdp_rxq_info xdp_rxq;
struct xdp_rxq_info xsk_rxq;
struct xsk_buff_pool *xsk_pool;
struct page_frag_cache page_cache; /* Page cache to allocate XDP frames */
};
@ -400,11 +402,17 @@ enum gve_packet_state {
GVE_PACKET_STATE_PENDING_REINJECT_COMPL,
/* No valid completion received within the specified timeout. */
GVE_PACKET_STATE_TIMED_OUT_COMPL,
/* XSK pending packet has received a packet/reinjection completion, or
* has timed out. At this point, the pending packet can be counted by
* xsk_tx_complete and freed.
*/
GVE_PACKET_STATE_XSK_COMPLETE,
};
enum gve_tx_pending_packet_dqo_type {
GVE_TX_PENDING_PACKET_DQO_SKB,
GVE_TX_PENDING_PACKET_DQO_XDP_FRAME
GVE_TX_PENDING_PACKET_DQO_XDP_FRAME,
GVE_TX_PENDING_PACKET_DQO_XSK,
};
struct gve_tx_pending_packet_dqo {
@ -441,10 +449,10 @@ struct gve_tx_pending_packet_dqo {
/* Identifies the current state of the packet as defined in
* `enum gve_packet_state`.
*/
u8 state : 2;
u8 state : 3;
/* gve_tx_pending_packet_dqo_type */
u8 type : 1;
u8 type : 2;
/* If packet is an outstanding miss completion, then the packet is
* freed if the corresponding re-injection completion is not received
@ -513,6 +521,8 @@ struct gve_tx_ring {
/* Cached value of `dqo_compl.free_tx_qpl_buf_cnt` */
u32 free_tx_qpl_buf_cnt;
};
atomic_t xsk_reorder_queue_tail;
} dqo_tx;
};
@ -546,6 +556,9 @@ struct gve_tx_ring {
/* Last TX ring index fetched by HW */
atomic_t hw_tx_head;
u16 xsk_reorder_queue_head;
u16 xsk_reorder_queue_tail;
/* List to track pending packets which received a miss
* completion but not a corresponding reinjection.
*/
@ -599,6 +612,8 @@ struct gve_tx_ring {
struct gve_tx_pending_packet_dqo *pending_packets;
s16 num_pending_packets;
u16 *xsk_reorder_queue;
u32 complq_mask; /* complq size is complq_mask + 1 */
/* QPL fields */
@ -803,6 +818,7 @@ struct gve_priv {
struct gve_tx_queue_config tx_cfg;
struct gve_rx_queue_config rx_cfg;
unsigned long *xsk_pools; /* bitmap of RX queues with XSK pools */
u32 num_ntfy_blks; /* split between TX and RX so must be even */
int numa_node;

View File

@ -4,6 +4,7 @@
* Copyright (C) 2015-2024 Google, Inc.
*/
#include <net/xdp_sock_drv.h>
#include "gve.h"
#include "gve_utils.h"
@ -29,6 +30,10 @@ struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx)
/* Point buf_state to itself to mark it as allocated */
buf_state->next = buffer_id;
/* Clear the buffer pointers */
buf_state->page_info.page = NULL;
buf_state->xsk_buff = NULL;
return buf_state;
}
@ -286,7 +291,24 @@ int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc)
{
struct gve_rx_buf_state_dqo *buf_state;
if (rx->dqo.page_pool) {
if (rx->xsk_pool) {
buf_state = gve_alloc_buf_state(rx);
if (unlikely(!buf_state))
return -ENOMEM;
buf_state->xsk_buff = xsk_buff_alloc(rx->xsk_pool);
if (unlikely(!buf_state->xsk_buff)) {
xsk_set_rx_need_wakeup(rx->xsk_pool);
gve_free_buf_state(rx, buf_state);
return -ENOMEM;
}
/* Allocated xsk buffer. Clear wakeup in case it was set. */
xsk_clear_rx_need_wakeup(rx->xsk_pool);
desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
desc->buf_addr =
cpu_to_le64(xsk_buff_xdp_get_dma(buf_state->xsk_buff));
return 0;
} else if (rx->dqo.page_pool) {
buf_state = gve_alloc_buf_state(rx);
if (WARN_ON_ONCE(!buf_state))
return -ENOMEM;

View File

@ -38,6 +38,7 @@ netdev_features_t gve_features_check_dqo(struct sk_buff *skb,
netdev_features_t features);
bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean);
bool gve_xdp_poll_dqo(struct gve_notify_block *block);
bool gve_xsk_tx_poll_dqo(struct gve_notify_block *block, int budget);
int gve_rx_poll_dqo(struct gve_notify_block *block, int budget);
int gve_tx_alloc_rings_dqo(struct gve_priv *priv,
struct gve_tx_alloc_rings_cfg *cfg);

View File

@ -4,6 +4,7 @@
* Copyright (C) 2015-2024 Google LLC
*/
#include <linux/bitmap.h>
#include <linux/bpf.h>
#include <linux/cpumask.h>
#include <linux/etherdevice.h>
@ -426,6 +427,12 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
if (block->rx) {
work_done = gve_rx_poll_dqo(block, budget);
/* Poll XSK TX as part of RX NAPI. Setup re-poll based on if
* either datapath has more work to do.
*/
if (priv->xdp_prog)
reschedule |= gve_xsk_tx_poll_dqo(block, budget);
reschedule |= work_done == budget;
}
@ -1158,18 +1165,84 @@ static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
static void gve_turndown(struct gve_priv *priv);
static void gve_turnup(struct gve_priv *priv);
static void gve_unreg_xsk_pool(struct gve_priv *priv, u16 qid)
{
struct gve_rx_ring *rx;
if (!priv->rx)
return;
rx = &priv->rx[qid];
rx->xsk_pool = NULL;
if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
xdp_rxq_info_unreg_mem_model(&rx->xdp_rxq);
if (!priv->tx)
return;
priv->tx[gve_xdp_tx_queue_id(priv, qid)].xsk_pool = NULL;
}
static int gve_reg_xsk_pool(struct gve_priv *priv, struct net_device *dev,
struct xsk_buff_pool *pool, u16 qid)
{
struct gve_rx_ring *rx;
u16 tx_qid;
int err;
rx = &priv->rx[qid];
err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
MEM_TYPE_XSK_BUFF_POOL, pool);
if (err) {
gve_unreg_xsk_pool(priv, qid);
return err;
}
rx->xsk_pool = pool;
tx_qid = gve_xdp_tx_queue_id(priv, qid);
priv->tx[tx_qid].xsk_pool = pool;
return 0;
}
static void gve_unreg_xdp_info(struct gve_priv *priv)
{
int i;
if (!priv->tx_cfg.num_xdp_queues || !priv->rx)
return;
for (i = 0; i < priv->rx_cfg.num_queues; i++) {
struct gve_rx_ring *rx = &priv->rx[i];
if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
xdp_rxq_info_unreg(&rx->xdp_rxq);
gve_unreg_xsk_pool(priv, i);
}
}
static struct xsk_buff_pool *gve_get_xsk_pool(struct gve_priv *priv, int qid)
{
if (!test_bit(qid, priv->xsk_pools))
return NULL;
return xsk_get_pool_from_qid(priv->dev, qid);
}
static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
{
struct napi_struct *napi;
struct gve_rx_ring *rx;
int err = 0;
int i, j;
u32 tx_qid;
int i;
if (!priv->tx_cfg.num_xdp_queues)
return 0;
for (i = 0; i < priv->rx_cfg.num_queues; i++) {
struct xsk_buff_pool *xsk_pool;
rx = &priv->rx[i];
napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
@ -1177,7 +1250,11 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
napi->napi_id);
if (err)
goto err;
if (gve_is_qpl(priv))
xsk_pool = gve_get_xsk_pool(priv, i);
if (xsk_pool)
err = gve_reg_xsk_pool(priv, dev, xsk_pool, i);
else if (gve_is_qpl(priv))
err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
MEM_TYPE_PAGE_SHARED,
NULL);
@ -1187,60 +1264,14 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
rx->dqo.page_pool);
if (err)
goto err;
rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
if (rx->xsk_pool) {
err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
napi->napi_id);
if (err)
goto err;
err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
MEM_TYPE_XSK_BUFF_POOL, NULL);
if (err)
goto err;
xsk_pool_set_rxq_info(rx->xsk_pool,
&rx->xsk_rxq);
}
}
for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) {
tx_qid = gve_xdp_tx_queue_id(priv, i);
priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
}
return 0;
err:
for (j = i; j >= 0; j--) {
rx = &priv->rx[j];
if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
xdp_rxq_info_unreg(&rx->xdp_rxq);
if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
xdp_rxq_info_unreg(&rx->xsk_rxq);
}
gve_unreg_xdp_info(priv);
return err;
}
static void gve_unreg_xdp_info(struct gve_priv *priv)
{
int i, tx_qid;
if (!priv->tx_cfg.num_xdp_queues || !priv->rx || !priv->tx)
return;
for (i = 0; i < priv->rx_cfg.num_queues; i++) {
struct gve_rx_ring *rx = &priv->rx[i];
xdp_rxq_info_unreg(&rx->xdp_rxq);
if (rx->xsk_pool) {
xdp_rxq_info_unreg(&rx->xsk_rxq);
rx->xsk_pool = NULL;
}
}
for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) {
tx_qid = gve_xdp_tx_queue_id(priv, i);
priv->tx[tx_qid].xsk_pool = NULL;
}
}
static void gve_drain_page_cache(struct gve_priv *priv)
{
@ -1555,9 +1586,6 @@ static int gve_xsk_pool_enable(struct net_device *dev,
u16 qid)
{
struct gve_priv *priv = netdev_priv(dev);
struct napi_struct *napi;
struct gve_rx_ring *rx;
int tx_qid;
int err;
if (qid >= priv->rx_cfg.num_queues) {
@ -1575,34 +1603,31 @@ static int gve_xsk_pool_enable(struct net_device *dev,
if (err)
return err;
set_bit(qid, priv->xsk_pools);
/* If XDP prog is not installed or interface is down, return. */
if (!priv->xdp_prog || !netif_running(dev))
return 0;
rx = &priv->rx[qid];
napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
err = gve_reg_xsk_pool(priv, dev, pool, qid);
if (err)
goto err;
err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
MEM_TYPE_XSK_BUFF_POOL, NULL);
if (err)
goto err;
xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
rx->xsk_pool = pool;
tx_qid = gve_xdp_tx_queue_id(priv, qid);
priv->tx[tx_qid].xsk_pool = pool;
goto err_xsk_pool_dma_mapped;
/* Stop and start RDA queues to repost buffers. */
if (!gve_is_qpl(priv)) {
err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues);
if (err)
goto err_xsk_pool_registered;
}
return 0;
err:
if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
xdp_rxq_info_unreg(&rx->xsk_rxq);
err_xsk_pool_registered:
gve_unreg_xsk_pool(priv, qid);
err_xsk_pool_dma_mapped:
clear_bit(qid, priv->xsk_pools);
xsk_pool_dma_unmap(pool,
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
DMA_ATTR_SKIP_CPU_SYNC |
DMA_ATTR_WEAK_ORDERING);
return err;
}
@ -1614,18 +1639,28 @@ static int gve_xsk_pool_disable(struct net_device *dev,
struct napi_struct *napi_tx;
struct xsk_buff_pool *pool;
int tx_qid;
int err;
pool = xsk_get_pool_from_qid(dev, qid);
if (!pool)
return -EINVAL;
if (qid >= priv->rx_cfg.num_queues)
return -EINVAL;
/* If XDP prog is not installed or interface is down, unmap DMA and
* return.
*/
if (!priv->xdp_prog || !netif_running(dev))
goto done;
clear_bit(qid, priv->xsk_pools);
pool = xsk_get_pool_from_qid(dev, qid);
if (pool)
xsk_pool_dma_unmap(pool,
DMA_ATTR_SKIP_CPU_SYNC |
DMA_ATTR_WEAK_ORDERING);
if (!netif_running(dev) || !priv->tx_cfg.num_xdp_queues)
return 0;
/* Stop and start RDA queues to repost buffers. */
if (!gve_is_qpl(priv) && priv->xdp_prog) {
err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues);
if (err)
return err;
}
napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
napi_disable(napi_rx); /* make sure current rx poll is done */
@ -1634,22 +1669,19 @@ static int gve_xsk_pool_disable(struct net_device *dev,
napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
napi_disable(napi_tx); /* make sure current tx poll is done */
priv->rx[qid].xsk_pool = NULL;
xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
priv->tx[tx_qid].xsk_pool = NULL;
gve_unreg_xsk_pool(priv, qid);
smp_mb(); /* Make sure it is visible to the workers on datapath */
napi_enable(napi_rx);
if (gve_rx_work_pending(&priv->rx[qid]))
napi_schedule(napi_rx);
napi_enable(napi_tx);
if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
napi_schedule(napi_tx);
if (gve_is_gqi(priv)) {
if (gve_rx_work_pending(&priv->rx[qid]))
napi_schedule(napi_rx);
if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
napi_schedule(napi_tx);
}
done:
xsk_pool_dma_unmap(pool,
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
return 0;
}
@ -2275,6 +2307,7 @@ static void gve_set_netdev_xdp_features(struct gve_priv *priv)
} else if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
xdp_features = NETDEV_XDP_ACT_BASIC;
xdp_features |= NETDEV_XDP_ACT_REDIRECT;
xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
} else {
xdp_features = 0;
}
@ -2370,10 +2403,22 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
priv->ts_config.rx_filter = HWTSTAMP_FILTER_NONE;
setup_device:
priv->xsk_pools = bitmap_zalloc(priv->rx_cfg.max_queues, GFP_KERNEL);
if (!priv->xsk_pools) {
err = -ENOMEM;
goto err;
}
gve_set_netdev_xdp_features(priv);
err = gve_setup_device_resources(priv);
if (!err)
return 0;
if (err)
goto err_free_xsk_bitmap;
return 0;
err_free_xsk_bitmap:
bitmap_free(priv->xsk_pools);
priv->xsk_pools = NULL;
err:
gve_adminq_free(&priv->pdev->dev, priv);
return err;
@ -2383,6 +2428,8 @@ static void gve_teardown_priv_resources(struct gve_priv *priv)
{
gve_teardown_device_resources(priv);
gve_adminq_free(&priv->pdev->dev, priv);
bitmap_free(priv->xsk_pools);
priv->xsk_pools = NULL;
}
static void gve_trigger_reset(struct gve_priv *priv)

View File

@ -16,6 +16,7 @@
#include <net/ip6_checksum.h>
#include <net/ipv6.h>
#include <net/tcp.h>
#include <net/xdp_sock_drv.h>
static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
{
@ -149,6 +150,10 @@ void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
gve_free_to_page_pool(rx, bs, false);
else
gve_free_qpl_page_dqo(bs);
if (gve_buf_state_is_allocated(rx, bs) && bs->xsk_buff) {
xsk_buff_free(bs->xsk_buff);
bs->xsk_buff = NULL;
}
}
if (rx->dqo.qpl) {
@ -580,8 +585,11 @@ static int gve_xdp_tx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
int err;
xdpf = xdp_convert_buff_to_frame(xdp);
if (unlikely(!xdpf))
if (unlikely(!xdpf)) {
if (rx->xsk_pool)
xsk_buff_free(xdp);
return -ENOSPC;
}
tx_qid = gve_xdp_tx_queue_id(priv, rx->q_num);
tx = &priv->tx[tx_qid];
@ -592,6 +600,41 @@ static int gve_xdp_tx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
return err;
}
static void gve_xsk_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
struct xdp_buff *xdp, struct bpf_prog *xprog,
int xdp_act)
{
switch (xdp_act) {
case XDP_ABORTED:
case XDP_DROP:
default:
xsk_buff_free(xdp);
break;
case XDP_TX:
if (unlikely(gve_xdp_tx_dqo(priv, rx, xdp)))
goto err;
break;
case XDP_REDIRECT:
if (unlikely(xdp_do_redirect(priv->dev, xdp, xprog)))
goto err;
break;
}
u64_stats_update_begin(&rx->statss);
if ((u32)xdp_act < GVE_XDP_ACTIONS)
rx->xdp_actions[xdp_act]++;
u64_stats_update_end(&rx->statss);
return;
err:
u64_stats_update_begin(&rx->statss);
if (xdp_act == XDP_TX)
rx->xdp_tx_errors++;
if (xdp_act == XDP_REDIRECT)
rx->xdp_redirect_errors++;
u64_stats_update_end(&rx->statss);
}
static void gve_xdp_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
struct xdp_buff *xdp, struct bpf_prog *xprog,
int xdp_act,
@ -633,6 +676,48 @@ static void gve_xdp_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
return;
}
static int gve_rx_xsk_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
struct gve_rx_buf_state_dqo *buf_state, int buf_len,
struct bpf_prog *xprog)
{
struct xdp_buff *xdp = buf_state->xsk_buff;
struct gve_priv *priv = rx->gve;
int xdp_act;
xdp->data_end = xdp->data + buf_len;
xsk_buff_dma_sync_for_cpu(xdp);
if (xprog) {
xdp_act = bpf_prog_run_xdp(xprog, xdp);
buf_len = xdp->data_end - xdp->data;
if (xdp_act != XDP_PASS) {
gve_xsk_done_dqo(priv, rx, xdp, xprog, xdp_act);
gve_free_buf_state(rx, buf_state);
return 0;
}
}
/* Copy the data to skb */
rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi,
xdp->data, buf_len);
if (unlikely(!rx->ctx.skb_head)) {
xsk_buff_free(xdp);
gve_free_buf_state(rx, buf_state);
return -ENOMEM;
}
rx->ctx.skb_tail = rx->ctx.skb_head;
/* Free XSK buffer and Buffer state */
xsk_buff_free(xdp);
gve_free_buf_state(rx, buf_state);
/* Update Stats */
u64_stats_update_begin(&rx->statss);
rx->xdp_actions[XDP_PASS]++;
u64_stats_update_end(&rx->statss);
return 0;
}
/* Returns 0 if descriptor is completed successfully.
* Returns -EINVAL if descriptor is invalid.
* Returns -ENOMEM if data cannot be copied to skb.
@ -671,7 +756,11 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
buf_len = compl_desc->packet_len;
hdr_len = compl_desc->header_len;
/* Page might have not been used for a while and was likely last written
xprog = READ_ONCE(priv->xdp_prog);
if (buf_state->xsk_buff)
return gve_rx_xsk_dqo(napi, rx, buf_state, buf_len, xprog);
/* Page might have not been used for awhile and was likely last written
* by a different thread.
*/
if (rx->dqo.page_pool) {
@ -721,7 +810,6 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
return 0;
}
xprog = READ_ONCE(priv->xdp_prog);
if (xprog) {
struct xdp_buff xdp;
void *old_data;

View File

@ -13,6 +13,7 @@
#include <linux/tcp.h>
#include <linux/slab.h>
#include <linux/skbuff.h>
#include <net/xdp_sock_drv.h>
/* Returns true if tx_bufs are available. */
static bool gve_has_free_tx_qpl_bufs(struct gve_tx_ring *tx, int count)
@ -241,6 +242,9 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
tx->dqo.tx_ring = NULL;
}
kvfree(tx->dqo.xsk_reorder_queue);
tx->dqo.xsk_reorder_queue = NULL;
kvfree(tx->dqo.pending_packets);
tx->dqo.pending_packets = NULL;
@ -345,6 +349,17 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv,
tx->dqo.pending_packets[tx->dqo.num_pending_packets - 1].next = -1;
atomic_set_release(&tx->dqo_compl.free_pending_packets, -1);
/* Only alloc xsk pool for XDP queues */
if (idx >= cfg->qcfg->num_queues && cfg->num_xdp_rings) {
tx->dqo.xsk_reorder_queue =
kvcalloc(tx->dqo.complq_mask + 1,
sizeof(tx->dqo.xsk_reorder_queue[0]),
GFP_KERNEL);
if (!tx->dqo.xsk_reorder_queue)
goto err;
}
tx->dqo_compl.miss_completions.head = -1;
tx->dqo_compl.miss_completions.tail = -1;
tx->dqo_compl.timed_out_completions.head = -1;
@ -992,6 +1007,38 @@ static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx,
return 0;
}
static void gve_xsk_reorder_queue_push_dqo(struct gve_tx_ring *tx,
u16 completion_tag)
{
u32 tail = atomic_read(&tx->dqo_tx.xsk_reorder_queue_tail);
tx->dqo.xsk_reorder_queue[tail] = completion_tag;
tail = (tail + 1) & tx->dqo.complq_mask;
atomic_set_release(&tx->dqo_tx.xsk_reorder_queue_tail, tail);
}
static struct gve_tx_pending_packet_dqo *
gve_xsk_reorder_queue_head(struct gve_tx_ring *tx)
{
u32 head = tx->dqo_compl.xsk_reorder_queue_head;
if (head == tx->dqo_compl.xsk_reorder_queue_tail) {
tx->dqo_compl.xsk_reorder_queue_tail =
atomic_read_acquire(&tx->dqo_tx.xsk_reorder_queue_tail);
if (head == tx->dqo_compl.xsk_reorder_queue_tail)
return NULL;
}
return &tx->dqo.pending_packets[tx->dqo.xsk_reorder_queue[head]];
}
static void gve_xsk_reorder_queue_pop_dqo(struct gve_tx_ring *tx)
{
tx->dqo_compl.xsk_reorder_queue_head++;
tx->dqo_compl.xsk_reorder_queue_head &= tx->dqo.complq_mask;
}
/* Transmit a given skb and ring the doorbell. */
netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev)
{
@ -1015,6 +1062,62 @@ netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
static bool gve_xsk_tx_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
int budget)
{
struct xsk_buff_pool *pool = tx->xsk_pool;
struct xdp_desc desc;
bool repoll = false;
int sent = 0;
spin_lock(&tx->dqo_tx.xdp_lock);
for (; sent < budget; sent++) {
struct gve_tx_pending_packet_dqo *pkt;
s16 completion_tag;
dma_addr_t addr;
u32 desc_idx;
if (unlikely(!gve_has_avail_slots_tx_dqo(tx, 1, 1))) {
repoll = true;
break;
}
if (!xsk_tx_peek_desc(pool, &desc))
break;
pkt = gve_alloc_pending_packet(tx);
pkt->type = GVE_TX_PENDING_PACKET_DQO_XSK;
pkt->num_bufs = 0;
completion_tag = pkt - tx->dqo.pending_packets;
addr = xsk_buff_raw_get_dma(pool, desc.addr);
xsk_buff_raw_dma_sync_for_device(pool, addr, desc.len);
desc_idx = tx->dqo_tx.tail;
gve_tx_fill_pkt_desc_dqo(tx, &desc_idx,
true, desc.len,
addr, completion_tag, true,
false);
++pkt->num_bufs;
gve_tx_update_tail(tx, desc_idx);
tx->dqo_tx.posted_packet_desc_cnt += pkt->num_bufs;
gve_xsk_reorder_queue_push_dqo(tx, completion_tag);
}
if (sent) {
gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail);
xsk_tx_release(pool);
}
spin_unlock(&tx->dqo_tx.xdp_lock);
u64_stats_update_begin(&tx->statss);
tx->xdp_xsk_sent += sent;
u64_stats_update_end(&tx->statss);
return (sent == budget) || repoll;
}
static void add_to_list(struct gve_tx_ring *tx, struct gve_index_list *list,
struct gve_tx_pending_packet_dqo *pending_packet)
{
@ -1152,6 +1255,9 @@ static void gve_handle_packet_completion(struct gve_priv *priv,
pending_packet->xdpf = NULL;
gve_free_pending_packet(tx, pending_packet);
break;
case GVE_TX_PENDING_PACKET_DQO_XSK:
pending_packet->state = GVE_PACKET_STATE_XSK_COMPLETE;
break;
default:
WARN_ON_ONCE(1);
}
@ -1251,8 +1357,34 @@ static void remove_timed_out_completions(struct gve_priv *priv,
remove_from_list(tx, &tx->dqo_compl.timed_out_completions,
pending_packet);
/* Need to count XSK packets in xsk_tx_completed. */
if (pending_packet->type == GVE_TX_PENDING_PACKET_DQO_XSK)
pending_packet->state = GVE_PACKET_STATE_XSK_COMPLETE;
else
gve_free_pending_packet(tx, pending_packet);
}
}
static void gve_tx_process_xsk_completions(struct gve_tx_ring *tx)
{
u32 num_xsks = 0;
while (true) {
struct gve_tx_pending_packet_dqo *pending_packet =
gve_xsk_reorder_queue_head(tx);
if (!pending_packet ||
pending_packet->state != GVE_PACKET_STATE_XSK_COMPLETE)
break;
num_xsks++;
gve_xsk_reorder_queue_pop_dqo(tx);
gve_free_pending_packet(tx, pending_packet);
}
if (num_xsks)
xsk_tx_completed(tx->xsk_pool, num_xsks);
}
int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
@ -1333,6 +1465,9 @@ int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
remove_miss_completions(priv, tx);
remove_timed_out_completions(priv, tx);
if (tx->xsk_pool)
gve_tx_process_xsk_completions(tx);
u64_stats_update_begin(&tx->statss);
tx->bytes_done += pkt_compl_bytes + reinject_compl_bytes;
tx->pkt_done += pkt_compl_pkts + reinject_compl_pkts;
@ -1365,6 +1500,19 @@ bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean)
return compl_desc->generation != tx->dqo_compl.cur_gen_bit;
}
bool gve_xsk_tx_poll_dqo(struct gve_notify_block *rx_block, int budget)
{
struct gve_rx_ring *rx = rx_block->rx;
struct gve_priv *priv = rx->gve;
struct gve_tx_ring *tx;
tx = &priv->tx[gve_xdp_tx_queue_id(priv, rx->q_num)];
if (tx->xsk_pool)
return gve_xsk_tx_dqo(priv, tx, budget);
return 0;
}
bool gve_xdp_poll_dqo(struct gve_notify_block *block)
{
struct gve_tx_compl_desc *compl_desc;