mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
net: bcmasp: Switch to page pool for RX path
This shows an improvement of 1.9% in reducing the CPU cycles and data cache misses. Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com> Reviewed-by: Justin Chen <justin.chen@broadcom.com> Reviewed-by: Nicolai Buchwitz <nb@tipi-net.de> Link: https://patch.msgid.link/20260408001813.635679-1-florian.fainelli@broadcom.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
202ab59941
commit
686a7587bd
|
|
@ -272,6 +272,7 @@ config BCMASP
|
|||
depends on OF
|
||||
select PHYLIB
|
||||
select MDIO_BCM_UNIMAC
|
||||
select PAGE_POOL
|
||||
help
|
||||
This configuration enables the Broadcom ASP 2.0 Ethernet controller
|
||||
driver which is present in Broadcom STB SoCs such as 72165.
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
#include <linux/phy.h>
|
||||
#include <linux/io-64-nonatomic-hi-lo.h>
|
||||
#include <uapi/linux/ethtool.h>
|
||||
#include <net/page_pool/helpers.h>
|
||||
|
||||
#define ASP_INTR2_OFFSET 0x1000
|
||||
#define ASP_INTR2_STATUS 0x0
|
||||
|
|
@ -298,16 +299,19 @@ struct bcmasp_intf {
|
|||
void __iomem *rx_edpkt_cfg;
|
||||
void __iomem *rx_edpkt_dma;
|
||||
int rx_edpkt_index;
|
||||
int rx_buf_order;
|
||||
struct bcmasp_desc *rx_edpkt_cpu;
|
||||
dma_addr_t rx_edpkt_dma_addr;
|
||||
dma_addr_t rx_edpkt_dma_read;
|
||||
dma_addr_t rx_edpkt_dma_valid;
|
||||
|
||||
/* RX buffer prefetcher ring*/
|
||||
/* Streaming RX data ring (RBUF_4K mode) */
|
||||
void *rx_ring_cpu;
|
||||
dma_addr_t rx_ring_dma;
|
||||
dma_addr_t rx_ring_dma_valid;
|
||||
int rx_buf_order;
|
||||
|
||||
/* Page pool for recycling RX SKB data pages */
|
||||
struct page_pool *rx_page_pool;
|
||||
struct napi_struct rx_napi;
|
||||
|
||||
struct bcmasp_res res;
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@
|
|||
#include <linux/platform_device.h>
|
||||
#include <net/ip.h>
|
||||
#include <net/ipv6.h>
|
||||
#include <net/page_pool/helpers.h>
|
||||
|
||||
#include "bcmasp.h"
|
||||
#include "bcmasp_intf_defs.h"
|
||||
|
|
@ -482,10 +483,14 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget)
|
|||
struct bcmasp_desc *desc;
|
||||
struct sk_buff *skb;
|
||||
dma_addr_t valid;
|
||||
struct page *page;
|
||||
void *data;
|
||||
u64 flags;
|
||||
u32 len;
|
||||
|
||||
/* Hardware advances DMA_VALID as it writes each descriptor
|
||||
* (RBUF_4K streaming mode); software chases with rx_edpkt_dma_read.
|
||||
*/
|
||||
valid = rx_edpkt_dma_rq(intf, RX_EDPKT_DMA_VALID) + 1;
|
||||
if (valid == intf->rx_edpkt_dma_addr + DESC_RING_SIZE)
|
||||
valid = intf->rx_edpkt_dma_addr;
|
||||
|
|
@ -493,12 +498,12 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget)
|
|||
while ((processed < budget) && (valid != intf->rx_edpkt_dma_read)) {
|
||||
desc = &intf->rx_edpkt_cpu[intf->rx_edpkt_index];
|
||||
|
||||
/* Ensure that descriptor has been fully written to DRAM by
|
||||
* hardware before reading by the CPU
|
||||
/* Ensure the descriptor has been fully written to DRAM by
|
||||
* the hardware before the CPU reads it.
|
||||
*/
|
||||
rmb();
|
||||
|
||||
/* Calculate virt addr by offsetting from physical addr */
|
||||
/* Locate the packet data inside the streaming ring buffer. */
|
||||
data = intf->rx_ring_cpu +
|
||||
(DESC_ADDR(desc->buf) - intf->rx_ring_dma);
|
||||
|
||||
|
|
@ -524,19 +529,38 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget)
|
|||
|
||||
len = desc->size;
|
||||
|
||||
skb = napi_alloc_skb(napi, len);
|
||||
/* Allocate a page pool page as the SKB data area so the
|
||||
* kernel can recycle it efficiently after the packet is
|
||||
* consumed, avoiding repeated slab allocations.
|
||||
*/
|
||||
page = page_pool_dev_alloc_pages(intf->rx_page_pool);
|
||||
if (!page) {
|
||||
u64_stats_update_begin(&stats->syncp);
|
||||
u64_stats_inc(&stats->rx_dropped);
|
||||
u64_stats_update_end(&stats->syncp);
|
||||
intf->mib.alloc_rx_skb_failed++;
|
||||
goto next;
|
||||
}
|
||||
|
||||
skb = napi_build_skb(page_address(page), PAGE_SIZE);
|
||||
if (!skb) {
|
||||
u64_stats_update_begin(&stats->syncp);
|
||||
u64_stats_inc(&stats->rx_dropped);
|
||||
u64_stats_update_end(&stats->syncp);
|
||||
intf->mib.alloc_rx_skb_failed++;
|
||||
|
||||
page_pool_recycle_direct(intf->rx_page_pool, page);
|
||||
goto next;
|
||||
}
|
||||
|
||||
/* Reserve headroom then copy the full descriptor payload
|
||||
* (hardware prepends a 2-byte alignment pad at the start).
|
||||
*/
|
||||
skb_reserve(skb, NET_SKB_PAD);
|
||||
skb_put(skb, len);
|
||||
memcpy(skb->data, data, len);
|
||||
skb_mark_for_recycle(skb);
|
||||
|
||||
/* Skip the 2-byte hardware alignment pad. */
|
||||
skb_pull(skb, 2);
|
||||
len -= 2;
|
||||
if (likely(intf->crc_fwd)) {
|
||||
|
|
@ -558,6 +582,7 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget)
|
|||
u64_stats_update_end(&stats->syncp);
|
||||
|
||||
next:
|
||||
/* Return this portion of the streaming ring buffer to HW. */
|
||||
rx_edpkt_cfg_wq(intf, (DESC_ADDR(desc->buf) + desc->size),
|
||||
RX_EDPKT_RING_BUFFER_READ);
|
||||
|
||||
|
|
@ -661,12 +686,31 @@ static void bcmasp_adj_link(struct net_device *dev)
|
|||
phy_print_status(phydev);
|
||||
}
|
||||
|
||||
static int bcmasp_alloc_buffers(struct bcmasp_intf *intf)
|
||||
static struct page_pool *
|
||||
bcmasp_rx_page_pool_create(struct bcmasp_intf *intf)
|
||||
{
|
||||
struct page_pool_params pp_params = {
|
||||
.order = 0,
|
||||
.flags = 0,
|
||||
.pool_size = NUM_4K_BUFFERS,
|
||||
.nid = NUMA_NO_NODE,
|
||||
.dev = &intf->parent->pdev->dev,
|
||||
.napi = &intf->rx_napi,
|
||||
.netdev = intf->ndev,
|
||||
.offset = 0,
|
||||
.max_len = PAGE_SIZE,
|
||||
};
|
||||
|
||||
return page_pool_create(&pp_params);
|
||||
}
|
||||
|
||||
static int bcmasp_alloc_rx_buffers(struct bcmasp_intf *intf)
|
||||
{
|
||||
struct device *kdev = &intf->parent->pdev->dev;
|
||||
struct page *buffer_pg;
|
||||
int ret;
|
||||
|
||||
/* Alloc RX */
|
||||
/* Contiguous streaming ring that hardware writes packet data into. */
|
||||
intf->rx_buf_order = get_order(RING_BUFFER_SIZE);
|
||||
buffer_pg = alloc_pages(GFP_KERNEL, intf->rx_buf_order);
|
||||
if (!buffer_pg)
|
||||
|
|
@ -675,13 +719,55 @@ static int bcmasp_alloc_buffers(struct bcmasp_intf *intf)
|
|||
intf->rx_ring_cpu = page_to_virt(buffer_pg);
|
||||
intf->rx_ring_dma = dma_map_page(kdev, buffer_pg, 0, RING_BUFFER_SIZE,
|
||||
DMA_FROM_DEVICE);
|
||||
if (dma_mapping_error(kdev, intf->rx_ring_dma))
|
||||
goto free_rx_buffer;
|
||||
if (dma_mapping_error(kdev, intf->rx_ring_dma)) {
|
||||
ret = -ENOMEM;
|
||||
goto free_ring_pages;
|
||||
}
|
||||
|
||||
/* Page pool for SKB data areas (copy targets, not DMA buffers). */
|
||||
intf->rx_page_pool = bcmasp_rx_page_pool_create(intf);
|
||||
if (IS_ERR(intf->rx_page_pool)) {
|
||||
ret = PTR_ERR(intf->rx_page_pool);
|
||||
intf->rx_page_pool = NULL;
|
||||
goto free_ring_dma;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
free_ring_dma:
|
||||
dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
|
||||
DMA_FROM_DEVICE);
|
||||
free_ring_pages:
|
||||
__free_pages(buffer_pg, intf->rx_buf_order);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bcmasp_reclaim_rx_buffers(struct bcmasp_intf *intf)
|
||||
{
|
||||
struct device *kdev = &intf->parent->pdev->dev;
|
||||
|
||||
page_pool_destroy(intf->rx_page_pool);
|
||||
intf->rx_page_pool = NULL;
|
||||
dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
|
||||
DMA_FROM_DEVICE);
|
||||
__free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order);
|
||||
}
|
||||
|
||||
static int bcmasp_alloc_buffers(struct bcmasp_intf *intf)
|
||||
{
|
||||
struct device *kdev = &intf->parent->pdev->dev;
|
||||
int ret;
|
||||
|
||||
/* Alloc RX */
|
||||
ret = bcmasp_alloc_rx_buffers(intf);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
intf->rx_edpkt_cpu = dma_alloc_coherent(kdev, DESC_RING_SIZE,
|
||||
&intf->rx_edpkt_dma_addr, GFP_KERNEL);
|
||||
&intf->rx_edpkt_dma_addr,
|
||||
GFP_KERNEL);
|
||||
if (!intf->rx_edpkt_cpu)
|
||||
goto free_rx_buffer_dma;
|
||||
goto free_rx_buffers;
|
||||
|
||||
/* Alloc TX */
|
||||
intf->tx_spb_cpu = dma_alloc_coherent(kdev, DESC_RING_SIZE,
|
||||
|
|
@ -701,11 +787,8 @@ static int bcmasp_alloc_buffers(struct bcmasp_intf *intf)
|
|||
free_rx_edpkt_dma:
|
||||
dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu,
|
||||
intf->rx_edpkt_dma_addr);
|
||||
free_rx_buffer_dma:
|
||||
dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
|
||||
DMA_FROM_DEVICE);
|
||||
free_rx_buffer:
|
||||
__free_pages(buffer_pg, intf->rx_buf_order);
|
||||
free_rx_buffers:
|
||||
bcmasp_reclaim_rx_buffers(intf);
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
|
@ -717,9 +800,7 @@ static void bcmasp_reclaim_free_buffers(struct bcmasp_intf *intf)
|
|||
/* RX buffers */
|
||||
dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu,
|
||||
intf->rx_edpkt_dma_addr);
|
||||
dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
|
||||
DMA_FROM_DEVICE);
|
||||
__free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order);
|
||||
bcmasp_reclaim_rx_buffers(intf);
|
||||
|
||||
/* TX buffers */
|
||||
dma_free_coherent(kdev, DESC_RING_SIZE, intf->tx_spb_cpu,
|
||||
|
|
@ -738,7 +819,7 @@ static void bcmasp_init_rx(struct bcmasp_intf *intf)
|
|||
/* Make sure channels are disabled */
|
||||
rx_edpkt_cfg_wl(intf, 0x0, RX_EDPKT_CFG_ENABLE);
|
||||
|
||||
/* Rx SPB */
|
||||
/* Streaming data ring: hardware writes raw packet bytes here. */
|
||||
rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_READ);
|
||||
rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_WRITE);
|
||||
rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_BASE);
|
||||
|
|
@ -747,7 +828,9 @@ static void bcmasp_init_rx(struct bcmasp_intf *intf)
|
|||
rx_edpkt_cfg_wq(intf, intf->rx_ring_dma_valid,
|
||||
RX_EDPKT_RING_BUFFER_VALID);
|
||||
|
||||
/* EDPKT */
|
||||
/* EDPKT descriptor ring: hardware fills descriptors pointing into
|
||||
* the streaming ring buffer above (RBUF_4K mode).
|
||||
*/
|
||||
rx_edpkt_cfg_wl(intf, (RX_EDPKT_CFG_CFG0_RBUF_4K <<
|
||||
RX_EDPKT_CFG_CFG0_DBUF_SHIFT) |
|
||||
(RX_EDPKT_CFG_CFG0_64_ALN <<
|
||||
|
|
|
|||
|
|
@ -246,6 +246,10 @@
|
|||
((((intf)->channel - 6) * 0x14) + 0xa2000)
|
||||
#define RX_SPB_TOP_BLKOUT 0x00
|
||||
|
||||
/*
|
||||
* Number of 4 KB pages that make up the contiguous RBUF_4K streaming ring
|
||||
* and the page pool used as copy-target SKB data areas.
|
||||
*/
|
||||
#define NUM_4K_BUFFERS 32
|
||||
#define RING_BUFFER_SIZE (PAGE_SIZE * NUM_4K_BUFFERS)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user