net: bcmasp: Switch to page pool for RX path

This shows an improvement of 1.9% in reducing the CPU cycles and data
cache misses.

Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
Reviewed-by: Justin Chen <justin.chen@broadcom.com>
Reviewed-by: Nicolai Buchwitz <nb@tipi-net.de>
Link: https://patch.msgid.link/20260408001813.635679-1-florian.fainelli@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Florian Fainelli 2026-04-07 17:18:13 -07:00 committed by Jakub Kicinski
parent 202ab59941
commit 686a7587bd
4 changed files with 115 additions and 23 deletions

View File

@ -272,6 +272,7 @@ config BCMASP
depends on OF
select PHYLIB
select MDIO_BCM_UNIMAC
select PAGE_POOL
help
This configuration enables the Broadcom ASP 2.0 Ethernet controller
driver which is present in Broadcom STB SoCs such as 72165.

View File

@ -6,6 +6,7 @@
#include <linux/phy.h>
#include <linux/io-64-nonatomic-hi-lo.h>
#include <uapi/linux/ethtool.h>
#include <net/page_pool/helpers.h>
#define ASP_INTR2_OFFSET 0x1000
#define ASP_INTR2_STATUS 0x0
@ -298,16 +299,19 @@ struct bcmasp_intf {
void __iomem *rx_edpkt_cfg;
void __iomem *rx_edpkt_dma;
int rx_edpkt_index;
int rx_buf_order;
struct bcmasp_desc *rx_edpkt_cpu;
dma_addr_t rx_edpkt_dma_addr;
dma_addr_t rx_edpkt_dma_read;
dma_addr_t rx_edpkt_dma_valid;
/* RX buffer prefetcher ring*/
/* Streaming RX data ring (RBUF_4K mode) */
void *rx_ring_cpu;
dma_addr_t rx_ring_dma;
dma_addr_t rx_ring_dma_valid;
int rx_buf_order;
/* Page pool for recycling RX SKB data pages */
struct page_pool *rx_page_pool;
struct napi_struct rx_napi;
struct bcmasp_res res;

View File

@ -15,6 +15,7 @@
#include <linux/platform_device.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/page_pool/helpers.h>
#include "bcmasp.h"
#include "bcmasp_intf_defs.h"
@ -482,10 +483,14 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget)
struct bcmasp_desc *desc;
struct sk_buff *skb;
dma_addr_t valid;
struct page *page;
void *data;
u64 flags;
u32 len;
/* Hardware advances DMA_VALID as it writes each descriptor
* (RBUF_4K streaming mode); software chases with rx_edpkt_dma_read.
*/
valid = rx_edpkt_dma_rq(intf, RX_EDPKT_DMA_VALID) + 1;
if (valid == intf->rx_edpkt_dma_addr + DESC_RING_SIZE)
valid = intf->rx_edpkt_dma_addr;
@ -493,12 +498,12 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget)
while ((processed < budget) && (valid != intf->rx_edpkt_dma_read)) {
desc = &intf->rx_edpkt_cpu[intf->rx_edpkt_index];
/* Ensure that descriptor has been fully written to DRAM by
* hardware before reading by the CPU
/* Ensure the descriptor has been fully written to DRAM by
* the hardware before the CPU reads it.
*/
rmb();
/* Calculate virt addr by offsetting from physical addr */
/* Locate the packet data inside the streaming ring buffer. */
data = intf->rx_ring_cpu +
(DESC_ADDR(desc->buf) - intf->rx_ring_dma);
@ -524,19 +529,38 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget)
len = desc->size;
skb = napi_alloc_skb(napi, len);
/* Allocate a page pool page as the SKB data area so the
* kernel can recycle it efficiently after the packet is
* consumed, avoiding repeated slab allocations.
*/
page = page_pool_dev_alloc_pages(intf->rx_page_pool);
if (!page) {
u64_stats_update_begin(&stats->syncp);
u64_stats_inc(&stats->rx_dropped);
u64_stats_update_end(&stats->syncp);
intf->mib.alloc_rx_skb_failed++;
goto next;
}
skb = napi_build_skb(page_address(page), PAGE_SIZE);
if (!skb) {
u64_stats_update_begin(&stats->syncp);
u64_stats_inc(&stats->rx_dropped);
u64_stats_update_end(&stats->syncp);
intf->mib.alloc_rx_skb_failed++;
page_pool_recycle_direct(intf->rx_page_pool, page);
goto next;
}
/* Reserve headroom then copy the full descriptor payload
* (hardware prepends a 2-byte alignment pad at the start).
*/
skb_reserve(skb, NET_SKB_PAD);
skb_put(skb, len);
memcpy(skb->data, data, len);
skb_mark_for_recycle(skb);
/* Skip the 2-byte hardware alignment pad. */
skb_pull(skb, 2);
len -= 2;
if (likely(intf->crc_fwd)) {
@ -558,6 +582,7 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget)
u64_stats_update_end(&stats->syncp);
next:
/* Return this portion of the streaming ring buffer to HW. */
rx_edpkt_cfg_wq(intf, (DESC_ADDR(desc->buf) + desc->size),
RX_EDPKT_RING_BUFFER_READ);
@ -661,12 +686,31 @@ static void bcmasp_adj_link(struct net_device *dev)
phy_print_status(phydev);
}
static int bcmasp_alloc_buffers(struct bcmasp_intf *intf)
static struct page_pool *
bcmasp_rx_page_pool_create(struct bcmasp_intf *intf)
{
struct page_pool_params pp_params = {
.order = 0,
.flags = 0,
.pool_size = NUM_4K_BUFFERS,
.nid = NUMA_NO_NODE,
.dev = &intf->parent->pdev->dev,
.napi = &intf->rx_napi,
.netdev = intf->ndev,
.offset = 0,
.max_len = PAGE_SIZE,
};
return page_pool_create(&pp_params);
}
static int bcmasp_alloc_rx_buffers(struct bcmasp_intf *intf)
{
struct device *kdev = &intf->parent->pdev->dev;
struct page *buffer_pg;
int ret;
/* Alloc RX */
/* Contiguous streaming ring that hardware writes packet data into. */
intf->rx_buf_order = get_order(RING_BUFFER_SIZE);
buffer_pg = alloc_pages(GFP_KERNEL, intf->rx_buf_order);
if (!buffer_pg)
@ -675,13 +719,55 @@ static int bcmasp_alloc_buffers(struct bcmasp_intf *intf)
intf->rx_ring_cpu = page_to_virt(buffer_pg);
intf->rx_ring_dma = dma_map_page(kdev, buffer_pg, 0, RING_BUFFER_SIZE,
DMA_FROM_DEVICE);
if (dma_mapping_error(kdev, intf->rx_ring_dma))
goto free_rx_buffer;
if (dma_mapping_error(kdev, intf->rx_ring_dma)) {
ret = -ENOMEM;
goto free_ring_pages;
}
/* Page pool for SKB data areas (copy targets, not DMA buffers). */
intf->rx_page_pool = bcmasp_rx_page_pool_create(intf);
if (IS_ERR(intf->rx_page_pool)) {
ret = PTR_ERR(intf->rx_page_pool);
intf->rx_page_pool = NULL;
goto free_ring_dma;
}
return 0;
free_ring_dma:
dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
DMA_FROM_DEVICE);
free_ring_pages:
__free_pages(buffer_pg, intf->rx_buf_order);
return ret;
}
static void bcmasp_reclaim_rx_buffers(struct bcmasp_intf *intf)
{
struct device *kdev = &intf->parent->pdev->dev;
page_pool_destroy(intf->rx_page_pool);
intf->rx_page_pool = NULL;
dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
DMA_FROM_DEVICE);
__free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order);
}
static int bcmasp_alloc_buffers(struct bcmasp_intf *intf)
{
struct device *kdev = &intf->parent->pdev->dev;
int ret;
/* Alloc RX */
ret = bcmasp_alloc_rx_buffers(intf);
if (ret)
return ret;
intf->rx_edpkt_cpu = dma_alloc_coherent(kdev, DESC_RING_SIZE,
&intf->rx_edpkt_dma_addr, GFP_KERNEL);
&intf->rx_edpkt_dma_addr,
GFP_KERNEL);
if (!intf->rx_edpkt_cpu)
goto free_rx_buffer_dma;
goto free_rx_buffers;
/* Alloc TX */
intf->tx_spb_cpu = dma_alloc_coherent(kdev, DESC_RING_SIZE,
@ -701,11 +787,8 @@ static int bcmasp_alloc_buffers(struct bcmasp_intf *intf)
free_rx_edpkt_dma:
dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu,
intf->rx_edpkt_dma_addr);
free_rx_buffer_dma:
dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
DMA_FROM_DEVICE);
free_rx_buffer:
__free_pages(buffer_pg, intf->rx_buf_order);
free_rx_buffers:
bcmasp_reclaim_rx_buffers(intf);
return -ENOMEM;
}
@ -717,9 +800,7 @@ static void bcmasp_reclaim_free_buffers(struct bcmasp_intf *intf)
/* RX buffers */
dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu,
intf->rx_edpkt_dma_addr);
dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
DMA_FROM_DEVICE);
__free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order);
bcmasp_reclaim_rx_buffers(intf);
/* TX buffers */
dma_free_coherent(kdev, DESC_RING_SIZE, intf->tx_spb_cpu,
@ -738,7 +819,7 @@ static void bcmasp_init_rx(struct bcmasp_intf *intf)
/* Make sure channels are disabled */
rx_edpkt_cfg_wl(intf, 0x0, RX_EDPKT_CFG_ENABLE);
/* Rx SPB */
/* Streaming data ring: hardware writes raw packet bytes here. */
rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_READ);
rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_WRITE);
rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_BASE);
@ -747,7 +828,9 @@ static void bcmasp_init_rx(struct bcmasp_intf *intf)
rx_edpkt_cfg_wq(intf, intf->rx_ring_dma_valid,
RX_EDPKT_RING_BUFFER_VALID);
/* EDPKT */
/* EDPKT descriptor ring: hardware fills descriptors pointing into
* the streaming ring buffer above (RBUF_4K mode).
*/
rx_edpkt_cfg_wl(intf, (RX_EDPKT_CFG_CFG0_RBUF_4K <<
RX_EDPKT_CFG_CFG0_DBUF_SHIFT) |
(RX_EDPKT_CFG_CFG0_64_ALN <<

View File

@ -246,6 +246,10 @@
((((intf)->channel - 6) * 0x14) + 0xa2000)
#define RX_SPB_TOP_BLKOUT 0x00
/*
* Number of 4 KB pages that make up the contiguous RBUF_4K streaming ring
* and the page pool used as copy-target SKB data areas.
*/
#define NUM_4K_BUFFERS 32
#define RING_BUFFER_SIZE (PAGE_SIZE * NUM_4K_BUFFERS)