mirror of
https://github.com/torvalds/linux.git
synced 2026-05-31 10:33:41 +02:00
Merge branch 'gve-af_xdp-zero-copy-for-dqo-rda'
Joshua Washington says: ==================== gve: AF_XDP zero-copy for DQO RDA This patch series adds support for AF_XDP zero-copy in the DQO RDA queue format. XSK infrastructure is updated to re-post buffers when adding XSK pools because XSK umem will be posted directly to the NIC, a departure from the bounce buffer model used in GQI QPL. A registry of XSK pools is introduced to prevent the usage of XSK pools when in copy mode. v1: https://lore.kernel.org/netdev/20250714160451.124671-1-jeroendb@google.com/ ==================== Link: https://patch.msgid.link/20250717152839.973004-1-jeroendb@google.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
commit
cdb794002d
|
|
@ -190,6 +190,9 @@ struct gve_rx_buf_state_dqo {
|
|||
/* The page posted to HW. */
|
||||
struct gve_rx_slot_page_info page_info;
|
||||
|
||||
/* XSK buffer */
|
||||
struct xdp_buff *xsk_buff;
|
||||
|
||||
/* The DMA address corresponding to `page_info`. */
|
||||
dma_addr_t addr;
|
||||
|
||||
|
|
@ -331,7 +334,6 @@ struct gve_rx_ring {
|
|||
|
||||
/* XDP stuff */
|
||||
struct xdp_rxq_info xdp_rxq;
|
||||
struct xdp_rxq_info xsk_rxq;
|
||||
struct xsk_buff_pool *xsk_pool;
|
||||
struct page_frag_cache page_cache; /* Page cache to allocate XDP frames */
|
||||
};
|
||||
|
|
@ -400,11 +402,17 @@ enum gve_packet_state {
|
|||
GVE_PACKET_STATE_PENDING_REINJECT_COMPL,
|
||||
/* No valid completion received within the specified timeout. */
|
||||
GVE_PACKET_STATE_TIMED_OUT_COMPL,
|
||||
/* XSK pending packet has received a packet/reinjection completion, or
|
||||
* has timed out. At this point, the pending packet can be counted by
|
||||
* xsk_tx_complete and freed.
|
||||
*/
|
||||
GVE_PACKET_STATE_XSK_COMPLETE,
|
||||
};
|
||||
|
||||
enum gve_tx_pending_packet_dqo_type {
|
||||
GVE_TX_PENDING_PACKET_DQO_SKB,
|
||||
GVE_TX_PENDING_PACKET_DQO_XDP_FRAME
|
||||
GVE_TX_PENDING_PACKET_DQO_XDP_FRAME,
|
||||
GVE_TX_PENDING_PACKET_DQO_XSK,
|
||||
};
|
||||
|
||||
struct gve_tx_pending_packet_dqo {
|
||||
|
|
@ -441,10 +449,10 @@ struct gve_tx_pending_packet_dqo {
|
|||
/* Identifies the current state of the packet as defined in
|
||||
* `enum gve_packet_state`.
|
||||
*/
|
||||
u8 state : 2;
|
||||
u8 state : 3;
|
||||
|
||||
/* gve_tx_pending_packet_dqo_type */
|
||||
u8 type : 1;
|
||||
u8 type : 2;
|
||||
|
||||
/* If packet is an outstanding miss completion, then the packet is
|
||||
* freed if the corresponding re-injection completion is not received
|
||||
|
|
@ -513,6 +521,8 @@ struct gve_tx_ring {
|
|||
/* Cached value of `dqo_compl.free_tx_qpl_buf_cnt` */
|
||||
u32 free_tx_qpl_buf_cnt;
|
||||
};
|
||||
|
||||
atomic_t xsk_reorder_queue_tail;
|
||||
} dqo_tx;
|
||||
};
|
||||
|
||||
|
|
@ -546,6 +556,9 @@ struct gve_tx_ring {
|
|||
/* Last TX ring index fetched by HW */
|
||||
atomic_t hw_tx_head;
|
||||
|
||||
u16 xsk_reorder_queue_head;
|
||||
u16 xsk_reorder_queue_tail;
|
||||
|
||||
/* List to track pending packets which received a miss
|
||||
* completion but not a corresponding reinjection.
|
||||
*/
|
||||
|
|
@ -599,6 +612,8 @@ struct gve_tx_ring {
|
|||
struct gve_tx_pending_packet_dqo *pending_packets;
|
||||
s16 num_pending_packets;
|
||||
|
||||
u16 *xsk_reorder_queue;
|
||||
|
||||
u32 complq_mask; /* complq size is complq_mask + 1 */
|
||||
|
||||
/* QPL fields */
|
||||
|
|
@ -803,6 +818,7 @@ struct gve_priv {
|
|||
|
||||
struct gve_tx_queue_config tx_cfg;
|
||||
struct gve_rx_queue_config rx_cfg;
|
||||
unsigned long *xsk_pools; /* bitmap of RX queues with XSK pools */
|
||||
u32 num_ntfy_blks; /* split between TX and RX so must be even */
|
||||
int numa_node;
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
* Copyright (C) 2015-2024 Google, Inc.
|
||||
*/
|
||||
|
||||
#include <net/xdp_sock_drv.h>
|
||||
#include "gve.h"
|
||||
#include "gve_utils.h"
|
||||
|
||||
|
|
@ -29,6 +30,10 @@ struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx)
|
|||
/* Point buf_state to itself to mark it as allocated */
|
||||
buf_state->next = buffer_id;
|
||||
|
||||
/* Clear the buffer pointers */
|
||||
buf_state->page_info.page = NULL;
|
||||
buf_state->xsk_buff = NULL;
|
||||
|
||||
return buf_state;
|
||||
}
|
||||
|
||||
|
|
@ -286,7 +291,24 @@ int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc)
|
|||
{
|
||||
struct gve_rx_buf_state_dqo *buf_state;
|
||||
|
||||
if (rx->dqo.page_pool) {
|
||||
if (rx->xsk_pool) {
|
||||
buf_state = gve_alloc_buf_state(rx);
|
||||
if (unlikely(!buf_state))
|
||||
return -ENOMEM;
|
||||
|
||||
buf_state->xsk_buff = xsk_buff_alloc(rx->xsk_pool);
|
||||
if (unlikely(!buf_state->xsk_buff)) {
|
||||
xsk_set_rx_need_wakeup(rx->xsk_pool);
|
||||
gve_free_buf_state(rx, buf_state);
|
||||
return -ENOMEM;
|
||||
}
|
||||
/* Allocated xsk buffer. Clear wakeup in case it was set. */
|
||||
xsk_clear_rx_need_wakeup(rx->xsk_pool);
|
||||
desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
|
||||
desc->buf_addr =
|
||||
cpu_to_le64(xsk_buff_xdp_get_dma(buf_state->xsk_buff));
|
||||
return 0;
|
||||
} else if (rx->dqo.page_pool) {
|
||||
buf_state = gve_alloc_buf_state(rx);
|
||||
if (WARN_ON_ONCE(!buf_state))
|
||||
return -ENOMEM;
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ netdev_features_t gve_features_check_dqo(struct sk_buff *skb,
|
|||
netdev_features_t features);
|
||||
bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean);
|
||||
bool gve_xdp_poll_dqo(struct gve_notify_block *block);
|
||||
bool gve_xsk_tx_poll_dqo(struct gve_notify_block *block, int budget);
|
||||
int gve_rx_poll_dqo(struct gve_notify_block *block, int budget);
|
||||
int gve_tx_alloc_rings_dqo(struct gve_priv *priv,
|
||||
struct gve_tx_alloc_rings_cfg *cfg);
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
* Copyright (C) 2015-2024 Google LLC
|
||||
*/
|
||||
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/etherdevice.h>
|
||||
|
|
@ -426,6 +427,12 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
|
|||
|
||||
if (block->rx) {
|
||||
work_done = gve_rx_poll_dqo(block, budget);
|
||||
|
||||
/* Poll XSK TX as part of RX NAPI. Setup re-poll based on if
|
||||
* either datapath has more work to do.
|
||||
*/
|
||||
if (priv->xdp_prog)
|
||||
reschedule |= gve_xsk_tx_poll_dqo(block, budget);
|
||||
reschedule |= work_done == budget;
|
||||
}
|
||||
|
||||
|
|
@ -1158,18 +1165,84 @@ static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
|
|||
static void gve_turndown(struct gve_priv *priv);
|
||||
static void gve_turnup(struct gve_priv *priv);
|
||||
|
||||
static void gve_unreg_xsk_pool(struct gve_priv *priv, u16 qid)
|
||||
{
|
||||
struct gve_rx_ring *rx;
|
||||
|
||||
if (!priv->rx)
|
||||
return;
|
||||
|
||||
rx = &priv->rx[qid];
|
||||
rx->xsk_pool = NULL;
|
||||
if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
|
||||
xdp_rxq_info_unreg_mem_model(&rx->xdp_rxq);
|
||||
|
||||
if (!priv->tx)
|
||||
return;
|
||||
priv->tx[gve_xdp_tx_queue_id(priv, qid)].xsk_pool = NULL;
|
||||
}
|
||||
|
||||
static int gve_reg_xsk_pool(struct gve_priv *priv, struct net_device *dev,
|
||||
struct xsk_buff_pool *pool, u16 qid)
|
||||
{
|
||||
struct gve_rx_ring *rx;
|
||||
u16 tx_qid;
|
||||
int err;
|
||||
|
||||
rx = &priv->rx[qid];
|
||||
err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
|
||||
MEM_TYPE_XSK_BUFF_POOL, pool);
|
||||
if (err) {
|
||||
gve_unreg_xsk_pool(priv, qid);
|
||||
return err;
|
||||
}
|
||||
|
||||
rx->xsk_pool = pool;
|
||||
|
||||
tx_qid = gve_xdp_tx_queue_id(priv, qid);
|
||||
priv->tx[tx_qid].xsk_pool = pool;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void gve_unreg_xdp_info(struct gve_priv *priv)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!priv->tx_cfg.num_xdp_queues || !priv->rx)
|
||||
return;
|
||||
|
||||
for (i = 0; i < priv->rx_cfg.num_queues; i++) {
|
||||
struct gve_rx_ring *rx = &priv->rx[i];
|
||||
|
||||
if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
|
||||
xdp_rxq_info_unreg(&rx->xdp_rxq);
|
||||
|
||||
gve_unreg_xsk_pool(priv, i);
|
||||
}
|
||||
}
|
||||
|
||||
static struct xsk_buff_pool *gve_get_xsk_pool(struct gve_priv *priv, int qid)
|
||||
{
|
||||
if (!test_bit(qid, priv->xsk_pools))
|
||||
return NULL;
|
||||
|
||||
return xsk_get_pool_from_qid(priv->dev, qid);
|
||||
}
|
||||
|
||||
static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
|
||||
{
|
||||
struct napi_struct *napi;
|
||||
struct gve_rx_ring *rx;
|
||||
int err = 0;
|
||||
int i, j;
|
||||
u32 tx_qid;
|
||||
int i;
|
||||
|
||||
if (!priv->tx_cfg.num_xdp_queues)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < priv->rx_cfg.num_queues; i++) {
|
||||
struct xsk_buff_pool *xsk_pool;
|
||||
|
||||
rx = &priv->rx[i];
|
||||
napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
|
||||
|
||||
|
|
@ -1177,7 +1250,11 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
|
|||
napi->napi_id);
|
||||
if (err)
|
||||
goto err;
|
||||
if (gve_is_qpl(priv))
|
||||
|
||||
xsk_pool = gve_get_xsk_pool(priv, i);
|
||||
if (xsk_pool)
|
||||
err = gve_reg_xsk_pool(priv, dev, xsk_pool, i);
|
||||
else if (gve_is_qpl(priv))
|
||||
err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
|
||||
MEM_TYPE_PAGE_SHARED,
|
||||
NULL);
|
||||
|
|
@ -1187,60 +1264,14 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
|
|||
rx->dqo.page_pool);
|
||||
if (err)
|
||||
goto err;
|
||||
rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
|
||||
if (rx->xsk_pool) {
|
||||
err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
|
||||
napi->napi_id);
|
||||
if (err)
|
||||
goto err;
|
||||
err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
|
||||
MEM_TYPE_XSK_BUFF_POOL, NULL);
|
||||
if (err)
|
||||
goto err;
|
||||
xsk_pool_set_rxq_info(rx->xsk_pool,
|
||||
&rx->xsk_rxq);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) {
|
||||
tx_qid = gve_xdp_tx_queue_id(priv, i);
|
||||
priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
|
||||
}
|
||||
return 0;
|
||||
|
||||
err:
|
||||
for (j = i; j >= 0; j--) {
|
||||
rx = &priv->rx[j];
|
||||
if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
|
||||
xdp_rxq_info_unreg(&rx->xdp_rxq);
|
||||
if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
|
||||
xdp_rxq_info_unreg(&rx->xsk_rxq);
|
||||
}
|
||||
gve_unreg_xdp_info(priv);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void gve_unreg_xdp_info(struct gve_priv *priv)
|
||||
{
|
||||
int i, tx_qid;
|
||||
|
||||
if (!priv->tx_cfg.num_xdp_queues || !priv->rx || !priv->tx)
|
||||
return;
|
||||
|
||||
for (i = 0; i < priv->rx_cfg.num_queues; i++) {
|
||||
struct gve_rx_ring *rx = &priv->rx[i];
|
||||
|
||||
xdp_rxq_info_unreg(&rx->xdp_rxq);
|
||||
if (rx->xsk_pool) {
|
||||
xdp_rxq_info_unreg(&rx->xsk_rxq);
|
||||
rx->xsk_pool = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) {
|
||||
tx_qid = gve_xdp_tx_queue_id(priv, i);
|
||||
priv->tx[tx_qid].xsk_pool = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void gve_drain_page_cache(struct gve_priv *priv)
|
||||
{
|
||||
|
|
@ -1555,9 +1586,6 @@ static int gve_xsk_pool_enable(struct net_device *dev,
|
|||
u16 qid)
|
||||
{
|
||||
struct gve_priv *priv = netdev_priv(dev);
|
||||
struct napi_struct *napi;
|
||||
struct gve_rx_ring *rx;
|
||||
int tx_qid;
|
||||
int err;
|
||||
|
||||
if (qid >= priv->rx_cfg.num_queues) {
|
||||
|
|
@ -1575,34 +1603,31 @@ static int gve_xsk_pool_enable(struct net_device *dev,
|
|||
if (err)
|
||||
return err;
|
||||
|
||||
set_bit(qid, priv->xsk_pools);
|
||||
|
||||
/* If XDP prog is not installed or interface is down, return. */
|
||||
if (!priv->xdp_prog || !netif_running(dev))
|
||||
return 0;
|
||||
|
||||
rx = &priv->rx[qid];
|
||||
napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
|
||||
err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
|
||||
err = gve_reg_xsk_pool(priv, dev, pool, qid);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
|
||||
MEM_TYPE_XSK_BUFF_POOL, NULL);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
|
||||
rx->xsk_pool = pool;
|
||||
|
||||
tx_qid = gve_xdp_tx_queue_id(priv, qid);
|
||||
priv->tx[tx_qid].xsk_pool = pool;
|
||||
goto err_xsk_pool_dma_mapped;
|
||||
|
||||
/* Stop and start RDA queues to repost buffers. */
|
||||
if (!gve_is_qpl(priv)) {
|
||||
err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues);
|
||||
if (err)
|
||||
goto err_xsk_pool_registered;
|
||||
}
|
||||
return 0;
|
||||
err:
|
||||
if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
|
||||
xdp_rxq_info_unreg(&rx->xsk_rxq);
|
||||
|
||||
err_xsk_pool_registered:
|
||||
gve_unreg_xsk_pool(priv, qid);
|
||||
err_xsk_pool_dma_mapped:
|
||||
clear_bit(qid, priv->xsk_pools);
|
||||
xsk_pool_dma_unmap(pool,
|
||||
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
|
||||
DMA_ATTR_SKIP_CPU_SYNC |
|
||||
DMA_ATTR_WEAK_ORDERING);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
|
@ -1614,18 +1639,28 @@ static int gve_xsk_pool_disable(struct net_device *dev,
|
|||
struct napi_struct *napi_tx;
|
||||
struct xsk_buff_pool *pool;
|
||||
int tx_qid;
|
||||
int err;
|
||||
|
||||
pool = xsk_get_pool_from_qid(dev, qid);
|
||||
if (!pool)
|
||||
return -EINVAL;
|
||||
if (qid >= priv->rx_cfg.num_queues)
|
||||
return -EINVAL;
|
||||
|
||||
/* If XDP prog is not installed or interface is down, unmap DMA and
|
||||
* return.
|
||||
*/
|
||||
if (!priv->xdp_prog || !netif_running(dev))
|
||||
goto done;
|
||||
clear_bit(qid, priv->xsk_pools);
|
||||
|
||||
pool = xsk_get_pool_from_qid(dev, qid);
|
||||
if (pool)
|
||||
xsk_pool_dma_unmap(pool,
|
||||
DMA_ATTR_SKIP_CPU_SYNC |
|
||||
DMA_ATTR_WEAK_ORDERING);
|
||||
|
||||
if (!netif_running(dev) || !priv->tx_cfg.num_xdp_queues)
|
||||
return 0;
|
||||
|
||||
/* Stop and start RDA queues to repost buffers. */
|
||||
if (!gve_is_qpl(priv) && priv->xdp_prog) {
|
||||
err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
|
||||
napi_disable(napi_rx); /* make sure current rx poll is done */
|
||||
|
|
@ -1634,22 +1669,19 @@ static int gve_xsk_pool_disable(struct net_device *dev,
|
|||
napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
|
||||
napi_disable(napi_tx); /* make sure current tx poll is done */
|
||||
|
||||
priv->rx[qid].xsk_pool = NULL;
|
||||
xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
|
||||
priv->tx[tx_qid].xsk_pool = NULL;
|
||||
gve_unreg_xsk_pool(priv, qid);
|
||||
smp_mb(); /* Make sure it is visible to the workers on datapath */
|
||||
|
||||
napi_enable(napi_rx);
|
||||
if (gve_rx_work_pending(&priv->rx[qid]))
|
||||
napi_schedule(napi_rx);
|
||||
|
||||
napi_enable(napi_tx);
|
||||
if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
|
||||
napi_schedule(napi_tx);
|
||||
if (gve_is_gqi(priv)) {
|
||||
if (gve_rx_work_pending(&priv->rx[qid]))
|
||||
napi_schedule(napi_rx);
|
||||
|
||||
if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
|
||||
napi_schedule(napi_tx);
|
||||
}
|
||||
|
||||
done:
|
||||
xsk_pool_dma_unmap(pool,
|
||||
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -2275,6 +2307,7 @@ static void gve_set_netdev_xdp_features(struct gve_priv *priv)
|
|||
} else if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
|
||||
xdp_features = NETDEV_XDP_ACT_BASIC;
|
||||
xdp_features |= NETDEV_XDP_ACT_REDIRECT;
|
||||
xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
|
||||
} else {
|
||||
xdp_features = 0;
|
||||
}
|
||||
|
|
@ -2370,10 +2403,22 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
|
|||
priv->ts_config.rx_filter = HWTSTAMP_FILTER_NONE;
|
||||
|
||||
setup_device:
|
||||
priv->xsk_pools = bitmap_zalloc(priv->rx_cfg.max_queues, GFP_KERNEL);
|
||||
if (!priv->xsk_pools) {
|
||||
err = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
gve_set_netdev_xdp_features(priv);
|
||||
err = gve_setup_device_resources(priv);
|
||||
if (!err)
|
||||
return 0;
|
||||
if (err)
|
||||
goto err_free_xsk_bitmap;
|
||||
|
||||
return 0;
|
||||
|
||||
err_free_xsk_bitmap:
|
||||
bitmap_free(priv->xsk_pools);
|
||||
priv->xsk_pools = NULL;
|
||||
err:
|
||||
gve_adminq_free(&priv->pdev->dev, priv);
|
||||
return err;
|
||||
|
|
@ -2383,6 +2428,8 @@ static void gve_teardown_priv_resources(struct gve_priv *priv)
|
|||
{
|
||||
gve_teardown_device_resources(priv);
|
||||
gve_adminq_free(&priv->pdev->dev, priv);
|
||||
bitmap_free(priv->xsk_pools);
|
||||
priv->xsk_pools = NULL;
|
||||
}
|
||||
|
||||
static void gve_trigger_reset(struct gve_priv *priv)
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
#include <net/ip6_checksum.h>
|
||||
#include <net/ipv6.h>
|
||||
#include <net/tcp.h>
|
||||
#include <net/xdp_sock_drv.h>
|
||||
|
||||
static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
|
||||
{
|
||||
|
|
@ -149,6 +150,10 @@ void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
|
|||
gve_free_to_page_pool(rx, bs, false);
|
||||
else
|
||||
gve_free_qpl_page_dqo(bs);
|
||||
if (gve_buf_state_is_allocated(rx, bs) && bs->xsk_buff) {
|
||||
xsk_buff_free(bs->xsk_buff);
|
||||
bs->xsk_buff = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (rx->dqo.qpl) {
|
||||
|
|
@ -580,8 +585,11 @@ static int gve_xdp_tx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
|
|||
int err;
|
||||
|
||||
xdpf = xdp_convert_buff_to_frame(xdp);
|
||||
if (unlikely(!xdpf))
|
||||
if (unlikely(!xdpf)) {
|
||||
if (rx->xsk_pool)
|
||||
xsk_buff_free(xdp);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
tx_qid = gve_xdp_tx_queue_id(priv, rx->q_num);
|
||||
tx = &priv->tx[tx_qid];
|
||||
|
|
@ -592,6 +600,41 @@ static int gve_xdp_tx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
|
|||
return err;
|
||||
}
|
||||
|
||||
static void gve_xsk_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
|
||||
struct xdp_buff *xdp, struct bpf_prog *xprog,
|
||||
int xdp_act)
|
||||
{
|
||||
switch (xdp_act) {
|
||||
case XDP_ABORTED:
|
||||
case XDP_DROP:
|
||||
default:
|
||||
xsk_buff_free(xdp);
|
||||
break;
|
||||
case XDP_TX:
|
||||
if (unlikely(gve_xdp_tx_dqo(priv, rx, xdp)))
|
||||
goto err;
|
||||
break;
|
||||
case XDP_REDIRECT:
|
||||
if (unlikely(xdp_do_redirect(priv->dev, xdp, xprog)))
|
||||
goto err;
|
||||
break;
|
||||
}
|
||||
|
||||
u64_stats_update_begin(&rx->statss);
|
||||
if ((u32)xdp_act < GVE_XDP_ACTIONS)
|
||||
rx->xdp_actions[xdp_act]++;
|
||||
u64_stats_update_end(&rx->statss);
|
||||
return;
|
||||
|
||||
err:
|
||||
u64_stats_update_begin(&rx->statss);
|
||||
if (xdp_act == XDP_TX)
|
||||
rx->xdp_tx_errors++;
|
||||
if (xdp_act == XDP_REDIRECT)
|
||||
rx->xdp_redirect_errors++;
|
||||
u64_stats_update_end(&rx->statss);
|
||||
}
|
||||
|
||||
static void gve_xdp_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
|
||||
struct xdp_buff *xdp, struct bpf_prog *xprog,
|
||||
int xdp_act,
|
||||
|
|
@ -633,6 +676,48 @@ static void gve_xdp_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
|
|||
return;
|
||||
}
|
||||
|
||||
static int gve_rx_xsk_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
|
||||
struct gve_rx_buf_state_dqo *buf_state, int buf_len,
|
||||
struct bpf_prog *xprog)
|
||||
{
|
||||
struct xdp_buff *xdp = buf_state->xsk_buff;
|
||||
struct gve_priv *priv = rx->gve;
|
||||
int xdp_act;
|
||||
|
||||
xdp->data_end = xdp->data + buf_len;
|
||||
xsk_buff_dma_sync_for_cpu(xdp);
|
||||
|
||||
if (xprog) {
|
||||
xdp_act = bpf_prog_run_xdp(xprog, xdp);
|
||||
buf_len = xdp->data_end - xdp->data;
|
||||
if (xdp_act != XDP_PASS) {
|
||||
gve_xsk_done_dqo(priv, rx, xdp, xprog, xdp_act);
|
||||
gve_free_buf_state(rx, buf_state);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Copy the data to skb */
|
||||
rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi,
|
||||
xdp->data, buf_len);
|
||||
if (unlikely(!rx->ctx.skb_head)) {
|
||||
xsk_buff_free(xdp);
|
||||
gve_free_buf_state(rx, buf_state);
|
||||
return -ENOMEM;
|
||||
}
|
||||
rx->ctx.skb_tail = rx->ctx.skb_head;
|
||||
|
||||
/* Free XSK buffer and Buffer state */
|
||||
xsk_buff_free(xdp);
|
||||
gve_free_buf_state(rx, buf_state);
|
||||
|
||||
/* Update Stats */
|
||||
u64_stats_update_begin(&rx->statss);
|
||||
rx->xdp_actions[XDP_PASS]++;
|
||||
u64_stats_update_end(&rx->statss);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Returns 0 if descriptor is completed successfully.
|
||||
* Returns -EINVAL if descriptor is invalid.
|
||||
* Returns -ENOMEM if data cannot be copied to skb.
|
||||
|
|
@ -671,7 +756,11 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
|
|||
buf_len = compl_desc->packet_len;
|
||||
hdr_len = compl_desc->header_len;
|
||||
|
||||
/* Page might have not been used for a while and was likely last written
|
||||
xprog = READ_ONCE(priv->xdp_prog);
|
||||
if (buf_state->xsk_buff)
|
||||
return gve_rx_xsk_dqo(napi, rx, buf_state, buf_len, xprog);
|
||||
|
||||
/* Page might have not been used for awhile and was likely last written
|
||||
* by a different thread.
|
||||
*/
|
||||
if (rx->dqo.page_pool) {
|
||||
|
|
@ -721,7 +810,6 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
|
|||
return 0;
|
||||
}
|
||||
|
||||
xprog = READ_ONCE(priv->xdp_prog);
|
||||
if (xprog) {
|
||||
struct xdp_buff xdp;
|
||||
void *old_data;
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
#include <linux/tcp.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <net/xdp_sock_drv.h>
|
||||
|
||||
/* Returns true if tx_bufs are available. */
|
||||
static bool gve_has_free_tx_qpl_bufs(struct gve_tx_ring *tx, int count)
|
||||
|
|
@ -241,6 +242,9 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
|
|||
tx->dqo.tx_ring = NULL;
|
||||
}
|
||||
|
||||
kvfree(tx->dqo.xsk_reorder_queue);
|
||||
tx->dqo.xsk_reorder_queue = NULL;
|
||||
|
||||
kvfree(tx->dqo.pending_packets);
|
||||
tx->dqo.pending_packets = NULL;
|
||||
|
||||
|
|
@ -345,6 +349,17 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv,
|
|||
|
||||
tx->dqo.pending_packets[tx->dqo.num_pending_packets - 1].next = -1;
|
||||
atomic_set_release(&tx->dqo_compl.free_pending_packets, -1);
|
||||
|
||||
/* Only alloc xsk pool for XDP queues */
|
||||
if (idx >= cfg->qcfg->num_queues && cfg->num_xdp_rings) {
|
||||
tx->dqo.xsk_reorder_queue =
|
||||
kvcalloc(tx->dqo.complq_mask + 1,
|
||||
sizeof(tx->dqo.xsk_reorder_queue[0]),
|
||||
GFP_KERNEL);
|
||||
if (!tx->dqo.xsk_reorder_queue)
|
||||
goto err;
|
||||
}
|
||||
|
||||
tx->dqo_compl.miss_completions.head = -1;
|
||||
tx->dqo_compl.miss_completions.tail = -1;
|
||||
tx->dqo_compl.timed_out_completions.head = -1;
|
||||
|
|
@ -992,6 +1007,38 @@ static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void gve_xsk_reorder_queue_push_dqo(struct gve_tx_ring *tx,
|
||||
u16 completion_tag)
|
||||
{
|
||||
u32 tail = atomic_read(&tx->dqo_tx.xsk_reorder_queue_tail);
|
||||
|
||||
tx->dqo.xsk_reorder_queue[tail] = completion_tag;
|
||||
tail = (tail + 1) & tx->dqo.complq_mask;
|
||||
atomic_set_release(&tx->dqo_tx.xsk_reorder_queue_tail, tail);
|
||||
}
|
||||
|
||||
static struct gve_tx_pending_packet_dqo *
|
||||
gve_xsk_reorder_queue_head(struct gve_tx_ring *tx)
|
||||
{
|
||||
u32 head = tx->dqo_compl.xsk_reorder_queue_head;
|
||||
|
||||
if (head == tx->dqo_compl.xsk_reorder_queue_tail) {
|
||||
tx->dqo_compl.xsk_reorder_queue_tail =
|
||||
atomic_read_acquire(&tx->dqo_tx.xsk_reorder_queue_tail);
|
||||
|
||||
if (head == tx->dqo_compl.xsk_reorder_queue_tail)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return &tx->dqo.pending_packets[tx->dqo.xsk_reorder_queue[head]];
|
||||
}
|
||||
|
||||
static void gve_xsk_reorder_queue_pop_dqo(struct gve_tx_ring *tx)
|
||||
{
|
||||
tx->dqo_compl.xsk_reorder_queue_head++;
|
||||
tx->dqo_compl.xsk_reorder_queue_head &= tx->dqo.complq_mask;
|
||||
}
|
||||
|
||||
/* Transmit a given skb and ring the doorbell. */
|
||||
netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev)
|
||||
{
|
||||
|
|
@ -1015,6 +1062,62 @@ netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev)
|
|||
return NETDEV_TX_OK;
|
||||
}
|
||||
|
||||
static bool gve_xsk_tx_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
|
||||
int budget)
|
||||
{
|
||||
struct xsk_buff_pool *pool = tx->xsk_pool;
|
||||
struct xdp_desc desc;
|
||||
bool repoll = false;
|
||||
int sent = 0;
|
||||
|
||||
spin_lock(&tx->dqo_tx.xdp_lock);
|
||||
for (; sent < budget; sent++) {
|
||||
struct gve_tx_pending_packet_dqo *pkt;
|
||||
s16 completion_tag;
|
||||
dma_addr_t addr;
|
||||
u32 desc_idx;
|
||||
|
||||
if (unlikely(!gve_has_avail_slots_tx_dqo(tx, 1, 1))) {
|
||||
repoll = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!xsk_tx_peek_desc(pool, &desc))
|
||||
break;
|
||||
|
||||
pkt = gve_alloc_pending_packet(tx);
|
||||
pkt->type = GVE_TX_PENDING_PACKET_DQO_XSK;
|
||||
pkt->num_bufs = 0;
|
||||
completion_tag = pkt - tx->dqo.pending_packets;
|
||||
|
||||
addr = xsk_buff_raw_get_dma(pool, desc.addr);
|
||||
xsk_buff_raw_dma_sync_for_device(pool, addr, desc.len);
|
||||
|
||||
desc_idx = tx->dqo_tx.tail;
|
||||
gve_tx_fill_pkt_desc_dqo(tx, &desc_idx,
|
||||
true, desc.len,
|
||||
addr, completion_tag, true,
|
||||
false);
|
||||
++pkt->num_bufs;
|
||||
gve_tx_update_tail(tx, desc_idx);
|
||||
tx->dqo_tx.posted_packet_desc_cnt += pkt->num_bufs;
|
||||
gve_xsk_reorder_queue_push_dqo(tx, completion_tag);
|
||||
}
|
||||
|
||||
if (sent) {
|
||||
gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail);
|
||||
xsk_tx_release(pool);
|
||||
}
|
||||
|
||||
spin_unlock(&tx->dqo_tx.xdp_lock);
|
||||
|
||||
u64_stats_update_begin(&tx->statss);
|
||||
tx->xdp_xsk_sent += sent;
|
||||
u64_stats_update_end(&tx->statss);
|
||||
|
||||
return (sent == budget) || repoll;
|
||||
}
|
||||
|
||||
static void add_to_list(struct gve_tx_ring *tx, struct gve_index_list *list,
|
||||
struct gve_tx_pending_packet_dqo *pending_packet)
|
||||
{
|
||||
|
|
@ -1152,6 +1255,9 @@ static void gve_handle_packet_completion(struct gve_priv *priv,
|
|||
pending_packet->xdpf = NULL;
|
||||
gve_free_pending_packet(tx, pending_packet);
|
||||
break;
|
||||
case GVE_TX_PENDING_PACKET_DQO_XSK:
|
||||
pending_packet->state = GVE_PACKET_STATE_XSK_COMPLETE;
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
}
|
||||
|
|
@ -1251,8 +1357,34 @@ static void remove_timed_out_completions(struct gve_priv *priv,
|
|||
|
||||
remove_from_list(tx, &tx->dqo_compl.timed_out_completions,
|
||||
pending_packet);
|
||||
|
||||
/* Need to count XSK packets in xsk_tx_completed. */
|
||||
if (pending_packet->type == GVE_TX_PENDING_PACKET_DQO_XSK)
|
||||
pending_packet->state = GVE_PACKET_STATE_XSK_COMPLETE;
|
||||
else
|
||||
gve_free_pending_packet(tx, pending_packet);
|
||||
}
|
||||
}
|
||||
|
||||
static void gve_tx_process_xsk_completions(struct gve_tx_ring *tx)
|
||||
{
|
||||
u32 num_xsks = 0;
|
||||
|
||||
while (true) {
|
||||
struct gve_tx_pending_packet_dqo *pending_packet =
|
||||
gve_xsk_reorder_queue_head(tx);
|
||||
|
||||
if (!pending_packet ||
|
||||
pending_packet->state != GVE_PACKET_STATE_XSK_COMPLETE)
|
||||
break;
|
||||
|
||||
num_xsks++;
|
||||
gve_xsk_reorder_queue_pop_dqo(tx);
|
||||
gve_free_pending_packet(tx, pending_packet);
|
||||
}
|
||||
|
||||
if (num_xsks)
|
||||
xsk_tx_completed(tx->xsk_pool, num_xsks);
|
||||
}
|
||||
|
||||
int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
|
||||
|
|
@ -1333,6 +1465,9 @@ int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
|
|||
remove_miss_completions(priv, tx);
|
||||
remove_timed_out_completions(priv, tx);
|
||||
|
||||
if (tx->xsk_pool)
|
||||
gve_tx_process_xsk_completions(tx);
|
||||
|
||||
u64_stats_update_begin(&tx->statss);
|
||||
tx->bytes_done += pkt_compl_bytes + reinject_compl_bytes;
|
||||
tx->pkt_done += pkt_compl_pkts + reinject_compl_pkts;
|
||||
|
|
@ -1365,6 +1500,19 @@ bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean)
|
|||
return compl_desc->generation != tx->dqo_compl.cur_gen_bit;
|
||||
}
|
||||
|
||||
bool gve_xsk_tx_poll_dqo(struct gve_notify_block *rx_block, int budget)
|
||||
{
|
||||
struct gve_rx_ring *rx = rx_block->rx;
|
||||
struct gve_priv *priv = rx->gve;
|
||||
struct gve_tx_ring *tx;
|
||||
|
||||
tx = &priv->tx[gve_xdp_tx_queue_id(priv, rx->q_num)];
|
||||
if (tx->xsk_pool)
|
||||
return gve_xsk_tx_dqo(priv, tx, budget);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool gve_xdp_poll_dqo(struct gve_notify_block *block)
|
||||
{
|
||||
struct gve_tx_compl_desc *compl_desc;
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user