From 2cb67ab153d59473d67713c709a198922b7e87a3 Mon Sep 17 00:00:00 2001 From: Yangchun Fu Date: Mon, 11 Oct 2021 08:36:44 -0700 Subject: [PATCH 1/7] gve: Switch to use napi_complete_done Use napi_complete_done to allow for the use of gro_flush_timeout. Fixes: f5cedc84a30d2 ("gve: Add transmit and receive support") Signed-off-by: Yangchun Fu Signed-off-by: Catherine Sullivan Signed-off-by: David Awogbemila Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 5 ++- drivers/net/ethernet/google/gve/gve_main.c | 38 +++++++++++++--------- drivers/net/ethernet/google/gve/gve_rx.c | 37 +++++++++++---------- 3 files changed, 43 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 2f93ed470590..4abd53bdde73 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -825,11 +825,10 @@ __be32 gve_tx_load_event_counter(struct gve_priv *priv, struct gve_tx_ring *tx); /* rx handling */ void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx); -bool gve_rx_poll(struct gve_notify_block *block, int budget); +int gve_rx_poll(struct gve_notify_block *block, int budget); +bool gve_rx_work_pending(struct gve_rx_ring *rx); int gve_rx_alloc_rings(struct gve_priv *priv); void gve_rx_free_rings_gqi(struct gve_priv *priv); -bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget, - netdev_features_t feat); /* Reset */ void gve_schedule_reset(struct gve_priv *priv); int gve_reset(struct gve_priv *priv, bool attempt_teardown); diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 5b5dcaaeed7f..b41679ab0dbe 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -192,34 +192,40 @@ static int gve_napi_poll(struct napi_struct *napi, int budget) __be32 __iomem *irq_doorbell; bool reschedule = false; struct gve_priv *priv; + int work_done = 0; block = container_of(napi, struct gve_notify_block, napi); priv = block->priv; if (block->tx) reschedule |= gve_tx_poll(block, budget); - if (block->rx) - reschedule |= gve_rx_poll(block, budget); + if (block->rx) { + work_done = gve_rx_poll(block, budget); + reschedule |= work_done == budget; + } if (reschedule) return budget; - napi_complete(napi); - irq_doorbell = gve_irq_doorbell(priv, block); - iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); + /* Complete processing - don't unmask irq if busy polling is enabled */ + if (likely(napi_complete_done(napi, work_done))) { + irq_doorbell = gve_irq_doorbell(priv, block); + iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); - /* Double check we have no extra work. - * Ensure unmask synchronizes with checking for work. - */ - mb(); - if (block->tx) - reschedule |= gve_tx_poll(block, -1); - if (block->rx) - reschedule |= gve_rx_poll(block, -1); - if (reschedule && napi_reschedule(napi)) - iowrite32be(GVE_IRQ_MASK, irq_doorbell); + /* Double check we have no extra work. + * Ensure unmask synchronizes with checking for work. + */ + mb(); - return 0; + if (block->tx) + reschedule |= gve_tx_poll(block, -1); + if (block->rx) + reschedule |= gve_rx_work_pending(block->rx); + + if (reschedule && napi_reschedule(napi)) + iowrite32be(GVE_IRQ_MASK, irq_doorbell); + } + return work_done; } static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 94941d4e4744..3347879a4a5d 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -456,7 +456,7 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc, return true; } -static bool gve_rx_work_pending(struct gve_rx_ring *rx) +bool gve_rx_work_pending(struct gve_rx_ring *rx) { struct gve_rx_desc *desc; __be16 flags_seq; @@ -524,8 +524,8 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx) return true; } -bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget, - netdev_features_t feat) +static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget, + netdev_features_t feat) { struct gve_priv *priv = rx->gve; u32 work_done = 0, packets = 0; @@ -559,13 +559,15 @@ bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget, } if (!work_done && rx->fill_cnt - cnt > rx->db_threshold) - return false; + return 0; - u64_stats_update_begin(&rx->statss); - rx->rpackets += packets; - rx->rbytes += bytes; - u64_stats_update_end(&rx->statss); - rx->cnt = cnt; + if (work_done) { + u64_stats_update_begin(&rx->statss); + rx->rpackets += packets; + rx->rbytes += bytes; + u64_stats_update_end(&rx->statss); + rx->cnt = cnt; + } /* restock ring slots */ if (!rx->data.raw_addressing) { @@ -576,26 +578,26 @@ bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget, * falls below a threshold. */ if (!gve_rx_refill_buffers(priv, rx)) - return false; + return 0; /* If we were not able to completely refill buffers, we'll want * to schedule this queue for work again to refill buffers. */ if (rx->fill_cnt - cnt <= rx->db_threshold) { gve_rx_write_doorbell(priv, rx); - return true; + return budget; } } gve_rx_write_doorbell(priv, rx); - return gve_rx_work_pending(rx); + return work_done; } -bool gve_rx_poll(struct gve_notify_block *block, int budget) +int gve_rx_poll(struct gve_notify_block *block, int budget) { struct gve_rx_ring *rx = block->rx; netdev_features_t feat; - bool repoll = false; + int work_done = 0; feat = block->napi.dev->features; @@ -604,8 +606,7 @@ bool gve_rx_poll(struct gve_notify_block *block, int budget) budget = INT_MAX; if (budget > 0) - repoll |= gve_clean_rx_done(rx, budget, feat); - else - repoll |= gve_rx_work_pending(rx); - return repoll; + work_done = gve_clean_rx_done(rx, budget, feat); + + return work_done; } From 58401b2a46e7d5090873e7725ac786685475d638 Mon Sep 17 00:00:00 2001 From: Catherine Sullivan Date: Mon, 11 Oct 2021 08:36:45 -0700 Subject: [PATCH 2/7] gve: Add rx buffer pagecnt bias Add a pagecnt bias field to rx buffer info struct to eliminate needing to increment the atomic page ref count on every pass in the rx hotpath. Also prefetch two packet pages ahead. Fixes: ede3fcf5ec67f ("gve: Add support for raw addressing to the rx path") Signed-off-by: Yanchun Fu Signed-off-by: Nathan Lewis Signed-off-by: Catherine Sullivan Signed-off-by: David Awogbemila Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_rx.c | 52 +++++++++++++++++------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 3347879a4a5d..41b21b527470 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -16,19 +16,23 @@ static void gve_rx_free_buffer(struct device *dev, dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) & GVE_DATA_SLOT_ADDR_PAGE_MASK); + page_ref_sub(page_info->page, page_info->pagecnt_bias - 1); gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE); } static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx) { - if (rx->data.raw_addressing) { - u32 slots = rx->mask + 1; - int i; + u32 slots = rx->mask + 1; + int i; + if (rx->data.raw_addressing) { for (i = 0; i < slots; i++) gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i], &rx->data.data_ring[i]); } else { + for (i = 0; i < slots; i++) + page_ref_sub(rx->data.page_info[i].page, + rx->data.page_info[i].pagecnt_bias - 1); gve_unassign_qpl(priv, rx->data.qpl->id); rx->data.qpl = NULL; } @@ -69,6 +73,9 @@ static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info, page_info->page_offset = 0; page_info->page_address = page_address(page); *slot_addr = cpu_to_be64(addr); + /* The page already has 1 ref */ + page_ref_add(page, INT_MAX - 1); + page_info->pagecnt_bias = INT_MAX; } static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev, @@ -299,17 +306,18 @@ static bool gve_rx_can_flip_buffers(struct net_device *netdev) ? netdev->mtu + GVE_RX_PAD + ETH_HLEN <= PAGE_SIZE / 2 : false; } -static int gve_rx_can_recycle_buffer(struct page *page) +static int gve_rx_can_recycle_buffer(struct gve_rx_slot_page_info *page_info) { - int pagecount = page_count(page); + int pagecount = page_count(page_info->page); /* This page is not being used by any SKBs - reuse */ - if (pagecount == 1) + if (pagecount == page_info->pagecnt_bias) return 1; /* This page is still being used by an SKB - we can't reuse */ - else if (pagecount >= 2) + else if (pagecount > page_info->pagecnt_bias) return 0; - WARN(pagecount < 1, "Pagecount should never be < 1"); + WARN(pagecount < page_info->pagecnt_bias, + "Pagecount should never be less than the bias."); return -1; } @@ -325,11 +333,11 @@ gve_rx_raw_addressing(struct device *dev, struct net_device *netdev, if (!skb) return NULL; - /* Optimistically stop the kernel from freeing the page by increasing - * the page bias. We will check the refcount in refill to determine if - * we need to alloc a new page. + /* Optimistically stop the kernel from freeing the page. + * We will check again in refill to determine if we need to alloc a + * new page. */ - get_page(page_info->page); + gve_dec_pagecnt_bias(page_info); return skb; } @@ -352,7 +360,7 @@ gve_rx_qpl(struct device *dev, struct net_device *netdev, /* No point in recycling if we didn't get the skb */ if (skb) { /* Make sure that the page isn't freed. */ - get_page(page_info->page); + gve_dec_pagecnt_bias(page_info); gve_rx_flip_buff(page_info, &data_slot->qpl_offset); } } else { @@ -376,8 +384,18 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc, union gve_rx_data_slot *data_slot; struct sk_buff *skb = NULL; dma_addr_t page_bus; + void *va; u16 len; + /* Prefetch two packet pages ahead, we will need it soon. */ + page_info = &rx->data.page_info[(idx + 2) & rx->mask]; + va = page_info->page_address + GVE_RX_PAD + + page_info->page_offset; + + prefetch(page_info->page); /* Kernel page struct. */ + prefetch(va); /* Packet header. */ + prefetch(va + 64); /* Next cacheline too. */ + /* drop this packet */ if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) { u64_stats_update_begin(&rx->statss); @@ -408,7 +426,7 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc, int recycle = 0; if (can_flip) { - recycle = gve_rx_can_recycle_buffer(page_info->page); + recycle = gve_rx_can_recycle_buffer(page_info); if (recycle < 0) { if (!rx->data.raw_addressing) gve_schedule_reset(priv); @@ -499,7 +517,7 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx) * owns half the page it is impossible to tell which half. Either * the whole page is free or it needs to be replaced. */ - int recycle = gve_rx_can_recycle_buffer(page_info->page); + int recycle = gve_rx_can_recycle_buffer(page_info); if (recycle < 0) { if (!rx->data.raw_addressing) @@ -546,6 +564,10 @@ static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget, "[%d] seqno=%d rx->desc.seqno=%d\n", rx->q_num, GVE_SEQNO(desc->flags_seq), rx->desc.seqno); + + /* prefetch two descriptors ahead */ + prefetch(rx->desc.desc_ring + ((cnt + 2) & rx->mask)); + dropped = !gve_rx(rx, desc, feat, idx); if (!dropped) { bytes += be16_to_cpu(desc->len) - GVE_RX_PAD; From 61d72c7e486b63340f1fadcf3bed4cae98f03d9b Mon Sep 17 00:00:00 2001 From: Tao Liu Date: Mon, 11 Oct 2021 08:36:46 -0700 Subject: [PATCH 3/7] gve: Do lazy cleanup in TX path When TX queue is full, attemt to process enough TX completions to avoid stalling the queue. Fixes: f5cedc84a30d2 ("gve: Add transmit and receive support") Signed-off-by: Tao Liu Signed-off-by: Catherine Sullivan Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 9 +- drivers/net/ethernet/google/gve/gve_ethtool.c | 3 +- drivers/net/ethernet/google/gve/gve_main.c | 6 +- drivers/net/ethernet/google/gve/gve_tx.c | 92 +++++++++++-------- 4 files changed, 61 insertions(+), 49 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 4abd53bdde73..3de561e22659 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -341,8 +341,8 @@ struct gve_tx_ring { union { /* GQI fields */ struct { - /* NIC tail pointer */ - __be32 last_nic_done; + /* Spinlock for when cleanup in progress */ + spinlock_t clean_lock; }; /* DQO fields. */ @@ -821,8 +821,9 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev); bool gve_tx_poll(struct gve_notify_block *block, int budget); int gve_tx_alloc_rings(struct gve_priv *priv); void gve_tx_free_rings_gqi(struct gve_priv *priv); -__be32 gve_tx_load_event_counter(struct gve_priv *priv, - struct gve_tx_ring *tx); +u32 gve_tx_load_event_counter(struct gve_priv *priv, + struct gve_tx_ring *tx); +bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx); /* rx handling */ void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx); int gve_rx_poll(struct gve_notify_block *block, int budget); diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 716e6240305d..618a3e1d858e 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -330,8 +330,7 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = tmp_tx_bytes; data[i++] = tx->wake_queue; data[i++] = tx->stop_queue; - data[i++] = be32_to_cpu(gve_tx_load_event_counter(priv, - tx)); + data[i++] = gve_tx_load_event_counter(priv, tx); data[i++] = tx->dma_mapping_error; /* stats from NIC */ if (skip_nic_stats) { diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index b41679ab0dbe..b6805ad2011b 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -212,13 +212,13 @@ static int gve_napi_poll(struct napi_struct *napi, int budget) irq_doorbell = gve_irq_doorbell(priv, block); iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); - /* Double check we have no extra work. - * Ensure unmask synchronizes with checking for work. + /* Ensure IRQ ACK is visible before we check pending work. + * If queue had issued updates, it would be truly visible. */ mb(); if (block->tx) - reschedule |= gve_tx_poll(block, -1); + reschedule |= gve_tx_clean_pending(priv, block->tx); if (block->rx) reschedule |= gve_rx_work_pending(block->rx); diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 9922ce46a635..a9cb241fedf4 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -144,7 +144,7 @@ static void gve_tx_free_ring(struct gve_priv *priv, int idx) gve_tx_remove_from_block(priv, idx); slots = tx->mask + 1; - gve_clean_tx_done(priv, tx, tx->req, false); + gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false); netdev_tx_reset_queue(tx->netdev_txq); dma_free_coherent(hdev, sizeof(*tx->q_resources), @@ -176,6 +176,7 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) /* Make sure everything is zeroed to start */ memset(tx, 0, sizeof(*tx)); + spin_lock_init(&tx->clean_lock); tx->q_num = idx; tx->mask = slots - 1; @@ -328,10 +329,16 @@ static inline bool gve_can_tx(struct gve_tx_ring *tx, int bytes_required) return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED && can_alloc); } +static_assert(NAPI_POLL_WEIGHT >= MAX_TX_DESC_NEEDED); + /* Stops the queue if the skb cannot be transmitted. */ -static int gve_maybe_stop_tx(struct gve_tx_ring *tx, struct sk_buff *skb) +static int gve_maybe_stop_tx(struct gve_priv *priv, struct gve_tx_ring *tx, + struct sk_buff *skb) { int bytes_required = 0; + u32 nic_done; + u32 to_do; + int ret; if (!tx->raw_addressing) bytes_required = gve_skb_fifo_bytes_required(tx, skb); @@ -339,29 +346,28 @@ static int gve_maybe_stop_tx(struct gve_tx_ring *tx, struct sk_buff *skb) if (likely(gve_can_tx(tx, bytes_required))) return 0; - /* No space, so stop the queue */ - tx->stop_queue++; - netif_tx_stop_queue(tx->netdev_txq); - smp_mb(); /* sync with restarting queue in gve_clean_tx_done() */ + ret = -EBUSY; + spin_lock(&tx->clean_lock); + nic_done = gve_tx_load_event_counter(priv, tx); + to_do = nic_done - tx->done; - /* Now check for resources again, in case gve_clean_tx_done() freed - * resources after we checked and we stopped the queue after - * gve_clean_tx_done() checked. - * - * gve_maybe_stop_tx() gve_clean_tx_done() - * nsegs/can_alloc test failed - * gve_tx_free_fifo() - * if (tx queue stopped) - * netif_tx_queue_wake() - * netif_tx_stop_queue() - * Need to check again for space here! - */ - if (likely(!gve_can_tx(tx, bytes_required))) - return -EBUSY; + /* Only try to clean if there is hope for TX */ + if (to_do + gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED) { + if (to_do > 0) { + to_do = min_t(u32, to_do, NAPI_POLL_WEIGHT); + gve_clean_tx_done(priv, tx, to_do, false); + } + if (likely(gve_can_tx(tx, bytes_required))) + ret = 0; + } + if (ret) { + /* No space, so stop the queue */ + tx->stop_queue++; + netif_tx_stop_queue(tx->netdev_txq); + } + spin_unlock(&tx->clean_lock); - netif_tx_start_queue(tx->netdev_txq); - tx->wake_queue++; - return 0; + return ret; } static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc, @@ -576,7 +582,7 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev) WARN(skb_get_queue_mapping(skb) >= priv->tx_cfg.num_queues, "skb queue index out of range"); tx = &priv->tx[skb_get_queue_mapping(skb)]; - if (unlikely(gve_maybe_stop_tx(tx, skb))) { + if (unlikely(gve_maybe_stop_tx(priv, tx, skb))) { /* We need to ring the txq doorbell -- we have stopped the Tx * queue for want of resources, but prior calls to gve_tx() * may have added descriptors without ringing the doorbell. @@ -672,19 +678,19 @@ static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, return pkts; } -__be32 gve_tx_load_event_counter(struct gve_priv *priv, - struct gve_tx_ring *tx) +u32 gve_tx_load_event_counter(struct gve_priv *priv, + struct gve_tx_ring *tx) { - u32 counter_index = be32_to_cpu((tx->q_resources->counter_index)); + u32 counter_index = be32_to_cpu(tx->q_resources->counter_index); + __be32 counter = READ_ONCE(priv->counter_array[counter_index]); - return READ_ONCE(priv->counter_array[counter_index]); + return be32_to_cpu(counter); } bool gve_tx_poll(struct gve_notify_block *block, int budget) { struct gve_priv *priv = block->priv; struct gve_tx_ring *tx = block->tx; - bool repoll = false; u32 nic_done; u32 to_do; @@ -692,17 +698,23 @@ bool gve_tx_poll(struct gve_notify_block *block, int budget) if (budget == 0) budget = INT_MAX; + /* In TX path, it may try to clean completed pkts in order to xmit, + * to avoid cleaning conflict, use spin_lock(), it yields better + * concurrency between xmit/clean than netif's lock. + */ + spin_lock(&tx->clean_lock); /* Find out how much work there is to be done */ - tx->last_nic_done = gve_tx_load_event_counter(priv, tx); - nic_done = be32_to_cpu(tx->last_nic_done); - if (budget > 0) { - /* Do as much work as we have that the budget will - * allow - */ - to_do = min_t(u32, (nic_done - tx->done), budget); - gve_clean_tx_done(priv, tx, to_do, true); - } + nic_done = gve_tx_load_event_counter(priv, tx); + to_do = min_t(u32, (nic_done - tx->done), budget); + gve_clean_tx_done(priv, tx, to_do, true); + spin_unlock(&tx->clean_lock); /* If we still have work we want to repoll */ - repoll |= (nic_done != tx->done); - return repoll; + return nic_done != tx->done; +} + +bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx) +{ + u32 nic_done = gve_tx_load_event_counter(priv, tx); + + return nic_done != tx->done; } From 87a7f321bb6a45e54b7d6c90d032ee5636a6ad97 Mon Sep 17 00:00:00 2001 From: John Fraker Date: Mon, 11 Oct 2021 08:36:47 -0700 Subject: [PATCH 4/7] gve: Recover from queue stall due to missed IRQ Don't always reset the driver on a TX timeout. Attempt to recover by kicking the queue in case an IRQ was missed. Fixes: 9e5f7d26a4c08 ("gve: Add workqueue and reset support") Signed-off-by: John Fraker Signed-off-by: David Awogbemila Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 4 +- drivers/net/ethernet/google/gve/gve_adminq.h | 1 + drivers/net/ethernet/google/gve/gve_main.c | 48 +++++++++++++++++++- 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 3de561e22659..51ed8fe71d2d 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -30,7 +30,7 @@ #define GVE_MIN_MSIX 3 /* Numbers of gve tx/rx stats in stats report. */ -#define GVE_TX_STATS_REPORT_NUM 5 +#define GVE_TX_STATS_REPORT_NUM 6 #define GVE_RX_STATS_REPORT_NUM 2 /* Interval to schedule a stats report update, 20000ms. */ @@ -413,7 +413,9 @@ struct gve_tx_ring { u32 q_num ____cacheline_aligned; /* queue idx */ u32 stop_queue; /* count of queue stops */ u32 wake_queue; /* count of queue wakes */ + u32 queue_timeout; /* count of queue timeouts */ u32 ntfy_id; /* notification block index */ + u32 last_kick_msec; /* Last time the queue was kicked */ dma_addr_t bus; /* dma address of the descr ring */ dma_addr_t q_resources_bus; /* dma address of the queue resources */ dma_addr_t complq_bus_dqo; /* dma address of the dqo.compl_ring */ diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index 47c3d8f313fc..3953f6f7a427 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -270,6 +270,7 @@ enum gve_stat_names { TX_LAST_COMPLETION_PROCESSED = 5, RX_NEXT_EXPECTED_SEQUENCE = 6, RX_BUFFERS_POSTED = 7, + TX_TIMEOUT_CNT = 8, // stats from NIC RX_QUEUE_DROP_CNT = 65, RX_NO_BUFFERS_POSTED = 66, diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index b6805ad2011b..7647cd05b1d2 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -24,6 +24,9 @@ #define GVE_VERSION "1.0.0" #define GVE_VERSION_PREFIX "GVE-" +// Minimum amount of time between queue kicks in msec (10 seconds) +#define MIN_TX_TIMEOUT_GAP (1000 * 10) + const char gve_version_str[] = GVE_VERSION; static const char gve_version_prefix[] = GVE_VERSION_PREFIX; @@ -1121,9 +1124,47 @@ static void gve_turnup(struct gve_priv *priv) static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) { - struct gve_priv *priv = netdev_priv(dev); + struct gve_notify_block *block; + struct gve_tx_ring *tx = NULL; + struct gve_priv *priv; + u32 last_nic_done; + u32 current_time; + u32 ntfy_idx; + netdev_info(dev, "Timeout on tx queue, %d", txqueue); + priv = netdev_priv(dev); + if (txqueue > priv->tx_cfg.num_queues) + goto reset; + + ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); + if (ntfy_idx > priv->num_ntfy_blks) + goto reset; + + block = &priv->ntfy_blocks[ntfy_idx]; + tx = block->tx; + + current_time = jiffies_to_msecs(jiffies); + if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) + goto reset; + + /* Check to see if there are missed completions, which will allow us to + * kick the queue. + */ + last_nic_done = gve_tx_load_event_counter(priv, tx); + if (last_nic_done - tx->done) { + netdev_info(dev, "Kicking queue %d", txqueue); + iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); + napi_schedule(&block->napi); + tx->last_kick_msec = current_time; + goto out; + } // Else reset. + +reset: gve_schedule_reset(priv); + +out: + if (tx) + tx->queue_timeout++; priv->tx_timeo_cnt++; } @@ -1252,6 +1293,11 @@ void gve_handle_report_stats(struct gve_priv *priv) .value = cpu_to_be64(last_completion), .queue_id = cpu_to_be32(idx), }; + stats[stats_idx++] = (struct stats) { + .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), + .value = cpu_to_be64(priv->tx[idx].queue_timeout), + .queue_id = cpu_to_be32(idx), + }; } } /* rx stats */ From 4edf8249bcd1c554b1a34166ca4ea5630031ecf5 Mon Sep 17 00:00:00 2001 From: Catherine Sullivan Date: Mon, 11 Oct 2021 08:36:48 -0700 Subject: [PATCH 5/7] gve: Add netif_set_xps_queue call Configure XPS when adding tx queues to the notification blocks. Fixes: dbdaa67540512 ("gve: Move some static functions to a common file") Signed-off-by: Catherine Sullivan Signed-off-by: David Awogbemila Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_utils.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c index 93f3dcbeeea9..45ff7a9ab5f9 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.c +++ b/drivers/net/ethernet/google/gve/gve_utils.c @@ -18,12 +18,16 @@ void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx) void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx) { + unsigned int active_cpus = min_t(int, priv->num_ntfy_blks / 2, + num_online_cpus()); int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; struct gve_tx_ring *tx = &priv->tx[queue_idx]; block->tx = tx; tx->ntfy_id = ntfy_idx; + netif_set_xps_queue(priv->dev, get_cpu_mask(ntfy_idx % active_cpus), + queue_idx); } void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx) From ea5d3455adf1f3d47130604e6d353b23c3ee4a50 Mon Sep 17 00:00:00 2001 From: Jordan Kim Date: Mon, 11 Oct 2021 08:36:49 -0700 Subject: [PATCH 6/7] gve: Allow pageflips on larger pages Half pages are just used for small enough packets. This change allows this to also apply for systems with pages larger than 4 KB. Fixes: 02b0e0c18ba75 ("gve: Rx Buffer Recycling") Signed-off-by: Jordan Kim Signed-off-by: Jeroen de Borst Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 41b21b527470..98ba981cd534 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -302,7 +302,7 @@ static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *sl static bool gve_rx_can_flip_buffers(struct net_device *netdev) { - return PAGE_SIZE == 4096 + return PAGE_SIZE >= 4096 ? netdev->mtu + GVE_RX_PAD + ETH_HLEN <= PAGE_SIZE / 2 : false; } From 1b4d1c9bab091ac6e20a3ff80c30c5cefe192bf4 Mon Sep 17 00:00:00 2001 From: Catherine Sullivan Date: Mon, 11 Oct 2021 08:36:50 -0700 Subject: [PATCH 7/7] gve: Track RX buffer allocation failures The rx_buf_alloc_fail counter wasn't getting updated. Fixes: 433e274b8f7b0 ("gve: Add stats for gve.") Signed-off-by: Catherine Sullivan Signed-off-by: Jeroen de Borst Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_rx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 98ba981cd534..95bc4d8a1811 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -532,8 +532,13 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx) gve_rx_free_buffer(dev, page_info, data_slot); page_info->page = NULL; - if (gve_rx_alloc_buffer(priv, dev, page_info, data_slot)) + if (gve_rx_alloc_buffer(priv, dev, page_info, + data_slot)) { + u64_stats_update_begin(&rx->statss); + rx->rx_buf_alloc_fail++; + u64_stats_update_end(&rx->statss); break; + } } } fill_cnt++;