Merge branch 'page_pool-a-couple-of-assorted-optimizations'

Alexander Lobakin says:

====================
page_pool: a couple of assorted optimizations

That initially was a spin-off of the IAVF PP series[0], but has grown
(and shrunk) since then a bunch. In fact, it consists of three
semi-independent blocks:

* #1-2: Compile-time optimization. Split page_pool.h into 2 headers to
  not overbloat the consumers not needing complex inline helpers and
  then stop including it in skbuff.h at all. The first patch is also
  prereq for the whole series.
* #3: Improve cacheline locality for users of the Page Pool frag API.
* #4-6: Use direct cache recycling more aggressively, when it is safe
  obviously. In addition, make sure nobody wants to use Page Pool API
  with disabled interrupts.

Patches #1 and #5 are authored by Yunsheng and Jakub respectively, with
small modifications from my side as per ML discussions.
For the perf numbers for #3-6, please see individual commit messages.

Also available on my GH with many more Page Pool goodies[1].

[0] https://lore.kernel.org/netdev/20230530150035.1943669-1-aleksander.lobakin@intel.com
[1] https://github.com/alobakin/linux/commits/iavf-pp-frag
====================

Link: https://lore.kernel.org/r/20230804180529.2483231-1-aleksander.lobakin@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2023-08-07 12:28:54 -07:00
commit 6624433751
44 changed files with 339 additions and 308 deletions

View File

@ -67,10 +67,10 @@ a page will cause no race conditions is enough.
.. kernel-doc:: net/core/page_pool.c
:identifiers: page_pool_create
.. kernel-doc:: include/net/page_pool.h
.. kernel-doc:: include/net/page_pool/types.h
:identifiers: struct page_pool_params
.. kernel-doc:: include/net/page_pool.h
.. kernel-doc:: include/net/page_pool/helpers.h
:identifiers: page_pool_put_page page_pool_put_full_page
page_pool_recycle_direct page_pool_dev_alloc_pages
page_pool_get_dma_addr page_pool_get_dma_dir
@ -122,7 +122,7 @@ page_pool_stats allocated by the caller.
The API will fill in the provided struct page_pool_stats with
statistics about the page_pool.
.. kernel-doc:: include/net/page_pool.h
.. kernel-doc:: include/net/page_pool/types.h
:identifiers: struct page_pool_recycle_stats
struct page_pool_alloc_stats
struct page_pool_stats

View File

@ -16020,7 +16020,7 @@ M: Ilias Apalodimas <ilias.apalodimas@linaro.org>
L: netdev@vger.kernel.org
S: Supported
F: Documentation/networking/page_pool.rst
F: include/net/page_pool.h
F: include/net/page_pool/
F: include/trace/events/page_pool.h
F: net/core/page_pool.c

View File

@ -54,7 +54,7 @@
#include <net/pkt_cls.h>
#include <linux/hwmon.h>
#include <linux/hwmon-sysfs.h>
#include <net/page_pool.h>
#include <net/page_pool/helpers.h>
#include <linux/align.h>
#include <net/netdev_queues.h>

View File

@ -15,7 +15,7 @@
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/filter.h>
#include <net/page_pool.h>
#include <net/page_pool/helpers.h>
#include "bnxt_hsi.h"
#include "bnxt.h"
#include "bnxt_xdp.h"

View File

@ -28,6 +28,7 @@
#include <linux/iopoll.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <net/page_pool/helpers.h>
#include <net/xdp_sock_drv.h>
#define TSNEP_RX_OFFSET (max(NET_SKB_PAD, XDP_PACKET_HEADROOM) + NET_IP_ALIGN)

View File

@ -38,6 +38,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <net/ip.h>
#include <net/page_pool/helpers.h>
#include <net/selftests.h>
#include <net/tso.h>
#include <linux/tcp.h>

View File

@ -18,6 +18,7 @@
#include <net/gre.h>
#include <net/gro.h>
#include <net/ip6_checksum.h>
#include <net/page_pool/helpers.h>
#include <net/pkt_cls.h>
#include <net/pkt_sched.h>
#include <net/tcp.h>

View File

@ -6,7 +6,7 @@
#include <linux/dim.h>
#include <linux/if_vlan.h>
#include <net/page_pool.h>
#include <net/page_pool/types.h>
#include <asm/barrier.h>
#include "hnae3.h"

View File

@ -37,7 +37,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/tso.h>
#include <net/page_pool.h>
#include <net/page_pool/helpers.h>
#include <net/pkt_sched.h>
#include <linux/bpf_trace.h>

View File

@ -16,7 +16,7 @@
#include <linux/phy.h>
#include <linux/phylink.h>
#include <net/flow_offload.h>
#include <net/page_pool.h>
#include <net/page_pool/types.h>
#include <linux/bpf.h>
#include <net/xdp.h>

View File

@ -35,6 +35,7 @@
#include <uapi/linux/ppp_defs.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/page_pool/helpers.h>
#include <net/tso.h>
#include <linux/bpf_trace.h>

View File

@ -7,6 +7,7 @@
#include <linux/interrupt.h>
#include <linux/pci.h>
#include <net/page_pool/helpers.h>
#include <net/tso.h>
#include <linux/bitfield.h>

View File

@ -16,6 +16,7 @@
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/bitfield.h>
#include <net/page_pool/types.h>
#include "otx2_reg.h"
#include "otx2_common.h"

View File

@ -26,6 +26,7 @@
#include <linux/bitfield.h>
#include <net/dsa.h>
#include <net/dst_metadata.h>
#include <net/page_pool/helpers.h>
#include "mtk_eth_soc.h"
#include "mtk_wed.h"

View File

@ -18,7 +18,7 @@
#include <linux/rhashtable.h>
#include <linux/dim.h>
#include <linux/bitfield.h>
#include <net/page_pool.h>
#include <net/page_pool/types.h>
#include <linux/bpf_trace.h>
#include "mtk_ppe.h"

View File

@ -6,6 +6,7 @@
#include "en/port.h"
#include "en_accel/en_accel.h"
#include "en_accel/ipsec.h"
#include <net/page_pool/types.h>
#include <net/xdp_sock_drv.h>
static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev)

View File

@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020 Mellanox Technologies */
#include <net/page_pool.h>
#include "en/txrx.h"
#include "en/params.h"
#include "en/trap.h"

View File

@ -35,6 +35,7 @@
#include "en/xdp.h"
#include "en/params.h"
#include <linux/bitfield.h>
#include <net/page_pool/helpers.h>
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk)
{

View File

@ -38,7 +38,7 @@
#include <linux/debugfs.h>
#include <linux/if_bridge.h>
#include <linux/filter.h>
#include <net/page_pool.h>
#include <net/page_pool/types.h>
#include <net/pkt_sched.h>
#include <net/xdp_sock_drv.h>
#include "eswitch.h"

View File

@ -36,7 +36,7 @@
#include <linux/bitmap.h>
#include <linux/filter.h>
#include <net/ip6_checksum.h>
#include <net/page_pool.h>
#include <net/page_pool/helpers.h>
#include <net/inet_ecn.h>
#include <net/gro.h>
#include <net/udp.h>

View File

@ -38,7 +38,7 @@
#include "en/port.h"
#ifdef CONFIG_PAGE_POOL_STATS
#include <net/page_pool.h>
#include <net/page_pool/helpers.h>
#endif
static unsigned int stats_grps_num(struct mlx5e_priv *priv)

View File

@ -2,6 +2,7 @@
#include <linux/bpf.h>
#include <linux/filter.h>
#include <net/page_pool/helpers.h>
#include "lan966x_main.h"

View File

@ -10,7 +10,7 @@
#include <linux/phy.h>
#include <linux/phylink.h>
#include <linux/ptp_clock_kernel.h>
#include <net/page_pool.h>
#include <net/page_pool/types.h>
#include <net/pkt_cls.h>
#include <net/pkt_sched.h>
#include <net/switchdev.h>

View File

@ -11,6 +11,7 @@
#include <net/checksum.h>
#include <net/ip6_checksum.h>
#include <net/page_pool/helpers.h>
#include <net/xdp.h>
#include <net/mana/mana.h>

View File

@ -15,7 +15,7 @@
#include <linux/bpf_trace.h>
#include <net/tcp.h>
#include <net/page_pool.h>
#include <net/page_pool/helpers.h>
#include <net/ip6_checksum.h>
#define NETSEC_REG_SOFT_RST 0x104

View File

@ -21,7 +21,7 @@
#include <linux/ptp_clock_kernel.h>
#include <linux/net_tstamp.h>
#include <linux/reset.h>
#include <net/page_pool.h>
#include <net/page_pool/types.h>
#include <net/xdp.h>
#include <uapi/linux/bpf.h>

View File

@ -39,6 +39,7 @@
#include <linux/phylink.h>
#include <linux/udp.h>
#include <linux/bpf_trace.h>
#include <net/page_pool/helpers.h>
#include <net/pkt_cls.h>
#include <net/xdp_sock_drv.h>
#include "stmmac_ptp.h"

View File

@ -31,7 +31,7 @@
#include <linux/if_vlan.h>
#include <linux/kmemleak.h>
#include <linux/sys_soc.h>
#include <net/page_pool.h>
#include <net/page_pool/helpers.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>

View File

@ -30,7 +30,7 @@
#include <linux/sys_soc.h>
#include <net/switchdev.h>
#include <net/page_pool.h>
#include <net/page_pool/helpers.h>
#include <net/pkt_cls.h>
#include <net/devlink.h>

View File

@ -18,7 +18,7 @@
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/skbuff.h>
#include <net/page_pool.h>
#include <net/page_pool/helpers.h>
#include <net/pkt_cls.h>
#include <net/pkt_sched.h>

View File

@ -3,7 +3,7 @@
#include <linux/etherdevice.h>
#include <net/ip6_checksum.h>
#include <net/page_pool.h>
#include <net/page_pool/helpers.h>
#include <net/inet_ecn.h>
#include <linux/iopoll.h>
#include <linux/sctp.h>

View File

@ -26,7 +26,7 @@
#include <linux/ptr_ring.h>
#include <linux/bpf_trace.h>
#include <linux/net_tstamp.h>
#include <net/page_pool.h>
#include <net/page_pool/helpers.h>
#define DRV_NAME "veth"
#define DRV_VERSION "1.0"

View File

@ -4,7 +4,6 @@
*/
#include <linux/sched.h>
#include <linux/of.h>
#include <net/page_pool.h>
#include "mt76.h"
#define CHAN2G(_idx, _freq) { \

View File

@ -15,6 +15,7 @@
#include <linux/average.h>
#include <linux/soc/mediatek/mtk_wed.h>
#include <net/mac80211.h>
#include <net/page_pool/helpers.h>
#include "util.h"
#include "testmode.h"

View File

@ -45,7 +45,7 @@
#include <linux/slab.h>
#include <net/ip.h>
#include <linux/bpf.h>
#include <net/page_pool.h>
#include <net/page_pool/types.h>
#include <linux/bpf_trace.h>
#include <xen/xen.h>

View File

@ -625,6 +625,12 @@ do { \
WARN_ON_ONCE(__lockdep_enabled && !this_cpu_read(hardirq_context)); \
} while (0)
#define lockdep_assert_no_hardirq() \
do { \
WARN_ON_ONCE(__lockdep_enabled && (this_cpu_read(hardirq_context) || \
!this_cpu_read(hardirqs_enabled))); \
} while (0)
#define lockdep_assert_preemption_enabled() \
do { \
WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \
@ -659,6 +665,7 @@ do { \
# define lockdep_assert_irqs_enabled() do { } while (0)
# define lockdep_assert_irqs_disabled() do { } while (0)
# define lockdep_assert_in_irq() do { } while (0)
# define lockdep_assert_no_hardirq() do { } while (0)
# define lockdep_assert_preemption_enabled() do { } while (0)
# define lockdep_assert_preemption_disabled() do { } while (0)

View File

@ -32,7 +32,6 @@
#include <linux/if_packet.h>
#include <linux/llist.h>
#include <net/flow.h>
#include <net/page_pool.h>
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <linux/netfilter/nf_conntrack_common.h>
#endif
@ -3421,13 +3420,15 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f)
__skb_frag_ref(&skb_shinfo(skb)->frags[f]);
}
bool napi_pp_put_page(struct page *page, bool napi_safe);
static inline void
napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe)
{
struct page *page = skb_frag_page(frag);
#ifdef CONFIG_PAGE_POOL
if (recycle && page_pool_return_skb_page(page, napi_safe))
if (recycle && napi_pp_put_page(page, napi_safe))
return;
#endif
put_page(page);

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0
*
* page_pool.h
* page_pool/helpers.h
* Author: Jesper Dangaard Brouer <netoptimizer@brouer.com>
* Copyright (C) 2016 Red Hat, Inc.
*/
@ -26,126 +26,12 @@
* will release the DMA mapping and in-flight state accounting. We
* hope to lift this requirement in the future.
*/
#ifndef _NET_PAGE_POOL_H
#define _NET_PAGE_POOL_H
#ifndef _NET_PAGE_POOL_HELPERS_H
#define _NET_PAGE_POOL_HELPERS_H
#include <linux/mm.h> /* Needed by ptr_ring */
#include <linux/ptr_ring.h>
#include <linux/dma-direction.h>
#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA
* map/unmap
*/
#define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets
* from page_pool will be
* DMA-synced-for-device according to
* the length provided by the device
* driver.
* Please note DMA-sync-for-CPU is still
* device driver responsibility
*/
#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */
#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\
PP_FLAG_DMA_SYNC_DEV |\
PP_FLAG_PAGE_FRAG)
/*
* Fast allocation side cache array/stack
*
* The cache size and refill watermark is related to the network
* use-case. The NAPI budget is 64 packets. After a NAPI poll the RX
* ring is usually refilled and the max consumed elements will be 64,
* thus a natural max size of objects needed in the cache.
*
* Keeping room for more objects, is due to XDP_DROP use-case. As
* XDP_DROP allows the opportunity to recycle objects directly into
* this array, as it shares the same softirq/NAPI protection. If
* cache is already full (or partly full) then the XDP_DROP recycles
* would have to take a slower code path.
*/
#define PP_ALLOC_CACHE_SIZE 128
#define PP_ALLOC_CACHE_REFILL 64
struct pp_alloc_cache {
u32 count;
struct page *cache[PP_ALLOC_CACHE_SIZE];
};
/**
* struct page_pool_params - page pool parameters
* @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_PAGE_FRAG
* @order: 2^order pages on allocation
* @pool_size: size of the ptr_ring
* @nid: NUMA node id to allocate from pages from
* @dev: device, for DMA pre-mapping purposes
* @napi: NAPI which is the sole consumer of pages, otherwise NULL
* @dma_dir: DMA mapping direction
* @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV
* @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
*/
struct page_pool_params {
unsigned int flags;
unsigned int order;
unsigned int pool_size;
int nid;
struct device *dev;
struct napi_struct *napi;
enum dma_data_direction dma_dir;
unsigned int max_len;
unsigned int offset;
/* private: used by test code only */
void (*init_callback)(struct page *page, void *arg);
void *init_arg;
};
#include <net/page_pool/types.h>
#ifdef CONFIG_PAGE_POOL_STATS
/**
* struct page_pool_alloc_stats - allocation statistics
* @fast: successful fast path allocations
* @slow: slow path order-0 allocations
* @slow_high_order: slow path high order allocations
* @empty: ptr ring is empty, so a slow path allocation was forced
* @refill: an allocation which triggered a refill of the cache
* @waive: pages obtained from the ptr ring that cannot be added to
* the cache due to a NUMA mismatch
*/
struct page_pool_alloc_stats {
u64 fast;
u64 slow;
u64 slow_high_order;
u64 empty;
u64 refill;
u64 waive;
};
/**
* struct page_pool_recycle_stats - recycling (freeing) statistics
* @cached: recycling placed page in the page pool cache
* @cache_full: page pool cache was full
* @ring: page placed into the ptr ring
* @ring_full: page released from page pool because the ptr ring was full
* @released_refcnt: page released (and not recycled) because refcnt > 1
*/
struct page_pool_recycle_stats {
u64 cached;
u64 cache_full;
u64 ring;
u64 ring_full;
u64 released_refcnt;
};
/**
* struct page_pool_stats - combined page pool use statistics
* @alloc_stats: see struct page_pool_alloc_stats
* @recycle_stats: see struct page_pool_recycle_stats
*
* Wrapper struct for combining page pool stats with different storage
* requirements.
*/
struct page_pool_stats {
struct page_pool_alloc_stats alloc_stats;
struct page_pool_recycle_stats recycle_stats;
};
int page_pool_ethtool_stats_get_count(void);
u8 *page_pool_ethtool_stats_get_strings(u8 *data);
u64 *page_pool_ethtool_stats_get(u64 *data, void *stats);
@ -158,7 +44,6 @@ u64 *page_pool_ethtool_stats_get(u64 *data, void *stats);
bool page_pool_get_stats(struct page_pool *pool,
struct page_pool_stats *stats);
#else
static inline int page_pool_ethtool_stats_get_count(void)
{
return 0;
@ -173,73 +58,8 @@ static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats)
{
return data;
}
#endif
struct page_pool {
struct page_pool_params p;
struct delayed_work release_dw;
void (*disconnect)(void *);
unsigned long defer_start;
unsigned long defer_warn;
u32 pages_state_hold_cnt;
unsigned int frag_offset;
struct page *frag_page;
long frag_users;
#ifdef CONFIG_PAGE_POOL_STATS
/* these stats are incremented while in softirq context */
struct page_pool_alloc_stats alloc_stats;
#endif
u32 xdp_mem_id;
/*
* Data structure for allocation side
*
* Drivers allocation side usually already perform some kind
* of resource protection. Piggyback on this protection, and
* require driver to protect allocation side.
*
* For NIC drivers this means, allocate a page_pool per
* RX-queue. As the RX-queue is already protected by
* Softirq/BH scheduling and napi_schedule. NAPI schedule
* guarantee that a single napi_struct will only be scheduled
* on a single CPU (see napi_schedule).
*/
struct pp_alloc_cache alloc ____cacheline_aligned_in_smp;
/* Data structure for storing recycled pages.
*
* Returning/freeing pages is more complicated synchronization
* wise, because free's can happen on remote CPUs, with no
* association with allocation resource.
*
* Use ptr_ring, as it separates consumer and producer
* effeciently, it a way that doesn't bounce cache-lines.
*
* TODO: Implement bulk return pages into this structure.
*/
struct ptr_ring ring;
#ifdef CONFIG_PAGE_POOL_STATS
/* recycle stats are per-cpu to avoid locking */
struct page_pool_recycle_stats __percpu *recycle_stats;
#endif
atomic_t pages_state_release_cnt;
/* A page_pool is strictly tied to a single RX-queue being
* protected by NAPI, due to above pp_alloc_cache. This
* refcnt serves purpose is to simplify drivers error handling.
*/
refcount_t user_cnt;
u64 destroy_cnt;
};
struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
/**
* page_pool_dev_alloc_pages() - allocate a page.
* @pool: pool from which to allocate
@ -253,9 +73,6 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
return page_pool_alloc_pages(pool, gfp);
}
struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
unsigned int size, gfp_t gfp);
static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
unsigned int *offset,
unsigned int size)
@ -278,44 +95,6 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
return pool->p.dma_dir;
}
bool page_pool_return_skb_page(struct page *page, bool napi_safe);
struct page_pool *page_pool_create(const struct page_pool_params *params);
struct xdp_mem_info;
#ifdef CONFIG_PAGE_POOL
void page_pool_unlink_napi(struct page_pool *pool);
void page_pool_destroy(struct page_pool *pool);
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
struct xdp_mem_info *mem);
void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count);
#else
static inline void page_pool_unlink_napi(struct page_pool *pool)
{
}
static inline void page_pool_destroy(struct page_pool *pool)
{
}
static inline void page_pool_use_xdp_mem(struct page_pool *pool,
void (*disconnect)(void *),
struct xdp_mem_info *mem)
{
}
static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count)
{
}
#endif
void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
unsigned int dma_sync_size,
bool allow_direct);
/* pp_frag_count represents the number of writers who can update the page
* either by updating skb->data or via DMA mappings for the device.
* We can't rely on the page refcnt for that as we don't know who might be
@ -445,26 +224,15 @@ static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
page->dma_addr_upper = upper_32_bits(addr);
}
static inline bool is_page_pool_compiled_in(void)
{
#ifdef CONFIG_PAGE_POOL
return true;
#else
return false;
#endif
}
static inline bool page_pool_put(struct page_pool *pool)
{
return refcount_dec_and_test(&pool->user_cnt);
}
/* Caller must provide appropriate safe context, e.g. NAPI. */
void page_pool_update_nid(struct page_pool *pool, int new_nid);
static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
{
if (unlikely(pool->p.nid != new_nid))
page_pool_update_nid(pool, new_nid);
}
#endif /* _NET_PAGE_POOL_H */
#endif /* _NET_PAGE_POOL_HELPERS_H */

View File

@ -0,0 +1,236 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _NET_PAGE_POOL_TYPES_H
#define _NET_PAGE_POOL_TYPES_H
#include <linux/dma-direction.h>
#include <linux/ptr_ring.h>
#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA
* map/unmap
*/
#define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets
* from page_pool will be
* DMA-synced-for-device according to
* the length provided by the device
* driver.
* Please note DMA-sync-for-CPU is still
* device driver responsibility
*/
#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */
#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\
PP_FLAG_DMA_SYNC_DEV |\
PP_FLAG_PAGE_FRAG)
/*
* Fast allocation side cache array/stack
*
* The cache size and refill watermark is related to the network
* use-case. The NAPI budget is 64 packets. After a NAPI poll the RX
* ring is usually refilled and the max consumed elements will be 64,
* thus a natural max size of objects needed in the cache.
*
* Keeping room for more objects, is due to XDP_DROP use-case. As
* XDP_DROP allows the opportunity to recycle objects directly into
* this array, as it shares the same softirq/NAPI protection. If
* cache is already full (or partly full) then the XDP_DROP recycles
* would have to take a slower code path.
*/
#define PP_ALLOC_CACHE_SIZE 128
#define PP_ALLOC_CACHE_REFILL 64
struct pp_alloc_cache {
u32 count;
struct page *cache[PP_ALLOC_CACHE_SIZE];
};
/**
* struct page_pool_params - page pool parameters
* @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_PAGE_FRAG
* @order: 2^order pages on allocation
* @pool_size: size of the ptr_ring
* @nid: NUMA node id to allocate from pages from
* @dev: device, for DMA pre-mapping purposes
* @napi: NAPI which is the sole consumer of pages, otherwise NULL
* @dma_dir: DMA mapping direction
* @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV
* @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
*/
struct page_pool_params {
unsigned int flags;
unsigned int order;
unsigned int pool_size;
int nid;
struct device *dev;
struct napi_struct *napi;
enum dma_data_direction dma_dir;
unsigned int max_len;
unsigned int offset;
/* private: used by test code only */
void (*init_callback)(struct page *page, void *arg);
void *init_arg;
};
#ifdef CONFIG_PAGE_POOL_STATS
/**
* struct page_pool_alloc_stats - allocation statistics
* @fast: successful fast path allocations
* @slow: slow path order-0 allocations
* @slow_high_order: slow path high order allocations
* @empty: ptr ring is empty, so a slow path allocation was forced
* @refill: an allocation which triggered a refill of the cache
* @waive: pages obtained from the ptr ring that cannot be added to
* the cache due to a NUMA mismatch
*/
struct page_pool_alloc_stats {
u64 fast;
u64 slow;
u64 slow_high_order;
u64 empty;
u64 refill;
u64 waive;
};
/**
* struct page_pool_recycle_stats - recycling (freeing) statistics
* @cached: recycling placed page in the page pool cache
* @cache_full: page pool cache was full
* @ring: page placed into the ptr ring
* @ring_full: page released from page pool because the ptr ring was full
* @released_refcnt: page released (and not recycled) because refcnt > 1
*/
struct page_pool_recycle_stats {
u64 cached;
u64 cache_full;
u64 ring;
u64 ring_full;
u64 released_refcnt;
};
/**
* struct page_pool_stats - combined page pool use statistics
* @alloc_stats: see struct page_pool_alloc_stats
* @recycle_stats: see struct page_pool_recycle_stats
*
* Wrapper struct for combining page pool stats with different storage
* requirements.
*/
struct page_pool_stats {
struct page_pool_alloc_stats alloc_stats;
struct page_pool_recycle_stats recycle_stats;
};
#endif
struct page_pool {
struct page_pool_params p;
long frag_users;
struct page *frag_page;
unsigned int frag_offset;
u32 pages_state_hold_cnt;
struct delayed_work release_dw;
void (*disconnect)(void *pool);
unsigned long defer_start;
unsigned long defer_warn;
#ifdef CONFIG_PAGE_POOL_STATS
/* these stats are incremented while in softirq context */
struct page_pool_alloc_stats alloc_stats;
#endif
u32 xdp_mem_id;
/*
* Data structure for allocation side
*
* Drivers allocation side usually already perform some kind
* of resource protection. Piggyback on this protection, and
* require driver to protect allocation side.
*
* For NIC drivers this means, allocate a page_pool per
* RX-queue. As the RX-queue is already protected by
* Softirq/BH scheduling and napi_schedule. NAPI schedule
* guarantee that a single napi_struct will only be scheduled
* on a single CPU (see napi_schedule).
*/
struct pp_alloc_cache alloc ____cacheline_aligned_in_smp;
/* Data structure for storing recycled pages.
*
* Returning/freeing pages is more complicated synchronization
* wise, because free's can happen on remote CPUs, with no
* association with allocation resource.
*
* Use ptr_ring, as it separates consumer and producer
* efficiently, it a way that doesn't bounce cache-lines.
*
* TODO: Implement bulk return pages into this structure.
*/
struct ptr_ring ring;
#ifdef CONFIG_PAGE_POOL_STATS
/* recycle stats are per-cpu to avoid locking */
struct page_pool_recycle_stats __percpu *recycle_stats;
#endif
atomic_t pages_state_release_cnt;
/* A page_pool is strictly tied to a single RX-queue being
* protected by NAPI, due to above pp_alloc_cache. This
* refcnt serves purpose is to simplify drivers error handling.
*/
refcount_t user_cnt;
u64 destroy_cnt;
};
struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
unsigned int size, gfp_t gfp);
struct page_pool *page_pool_create(const struct page_pool_params *params);
struct xdp_mem_info;
#ifdef CONFIG_PAGE_POOL
void page_pool_unlink_napi(struct page_pool *pool);
void page_pool_destroy(struct page_pool *pool);
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
struct xdp_mem_info *mem);
void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count);
#else
static inline void page_pool_unlink_napi(struct page_pool *pool)
{
}
static inline void page_pool_destroy(struct page_pool *pool)
{
}
static inline void page_pool_use_xdp_mem(struct page_pool *pool,
void (*disconnect)(void *),
struct xdp_mem_info *mem)
{
}
static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count)
{
}
#endif
void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
unsigned int dma_sync_size,
bool allow_direct);
static inline bool is_page_pool_compiled_in(void)
{
#ifdef CONFIG_PAGE_POOL
return true;
#else
return false;
#endif
}
/* Caller must provide appropriate safe context, e.g. NAPI. */
void page_pool_update_nid(struct page_pool *pool, int new_nid);
#endif /* _NET_PAGE_POOL_H */

View File

@ -9,7 +9,7 @@
#include <linux/tracepoint.h>
#include <trace/events/mmflags.h>
#include <net/page_pool.h>
#include <net/page_pool/types.h>
TRACE_EVENT(page_pool_release,

View File

@ -15,7 +15,7 @@
#include <net/sock.h>
#include <net/tcp.h>
#include <net/net_namespace.h>
#include <net/page_pool.h>
#include <net/page_pool/helpers.h>
#include <linux/error-injection.h>
#include <linux/smp.h>
#include <linux/sock_diag.h>

View File

@ -10,7 +10,7 @@
#include <linux/slab.h>
#include <linux/device.h>
#include <net/page_pool.h>
#include <net/page_pool/helpers.h>
#include <net/xdp.h>
#include <linux/dma-direction.h>
@ -587,6 +587,8 @@ static __always_inline struct page *
__page_pool_put_page(struct page_pool *pool, struct page *page,
unsigned int dma_sync_size, bool allow_direct)
{
lockdep_assert_no_hardirq();
/* This allocator is optimized for the XDP mode that uses
* one-frame-per-page, but have fallbacks that act like the
* regular page allocator APIs.
@ -935,42 +937,3 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
}
}
EXPORT_SYMBOL(page_pool_update_nid);
bool page_pool_return_skb_page(struct page *page, bool napi_safe)
{
struct napi_struct *napi;
struct page_pool *pp;
bool allow_direct;
page = compound_head(page);
/* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
* in order to preserve any existing bits, such as bit 0 for the
* head page of compound page and bit 1 for pfmemalloc page, so
* mask those bits for freeing side when doing below checking,
* and page_is_pfmemalloc() is checked in __page_pool_put_page()
* to avoid recycling the pfmemalloc page.
*/
if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE))
return false;
pp = page->pp;
/* Allow direct recycle if we have reasons to believe that we are
* in the same context as the consumer would run, so there's
* no possible race.
*/
napi = READ_ONCE(pp->p.napi);
allow_direct = napi_safe && napi &&
READ_ONCE(napi->list_owner) == smp_processor_id();
/* Driver set this to memory recycling info. Reset it on recycle.
* This will *not* work for NIC using a split-page memory model.
* The page will be returned to the pool here regardless of the
* 'flipped' fragment being in use or not.
*/
page_pool_put_full_page(pp, page, allow_direct);
return true;
}
EXPORT_SYMBOL(page_pool_return_skb_page);

View File

@ -73,7 +73,7 @@
#include <net/mpls.h>
#include <net/mptcp.h>
#include <net/mctp.h>
#include <net/page_pool.h>
#include <net/page_pool/helpers.h>
#include <net/dropreason.h>
#include <linux/uaccess.h>
@ -879,11 +879,56 @@ static void skb_clone_fraglist(struct sk_buff *skb)
skb_get(list);
}
#if IS_ENABLED(CONFIG_PAGE_POOL)
bool napi_pp_put_page(struct page *page, bool napi_safe)
{
bool allow_direct = false;
struct page_pool *pp;
page = compound_head(page);
/* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
* in order to preserve any existing bits, such as bit 0 for the
* head page of compound page and bit 1 for pfmemalloc page, so
* mask those bits for freeing side when doing below checking,
* and page_is_pfmemalloc() is checked in __page_pool_put_page()
* to avoid recycling the pfmemalloc page.
*/
if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE))
return false;
pp = page->pp;
/* Allow direct recycle if we have reasons to believe that we are
* in the same context as the consumer would run, so there's
* no possible race.
* __page_pool_put_page() makes sure we're not in hardirq context
* and interrupts are enabled prior to accessing the cache.
*/
if (napi_safe || in_softirq()) {
const struct napi_struct *napi = READ_ONCE(pp->p.napi);
allow_direct = napi &&
READ_ONCE(napi->list_owner) == smp_processor_id();
}
/* Driver set this to memory recycling info. Reset it on recycle.
* This will *not* work for NIC using a split-page memory model.
* The page will be returned to the pool here regardless of the
* 'flipped' fragment being in use or not.
*/
page_pool_put_full_page(pp, page, allow_direct);
return true;
}
EXPORT_SYMBOL(napi_pp_put_page);
#endif
static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
{
if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
return false;
return page_pool_return_skb_page(virt_to_page(data), napi_safe);
return napi_pp_put_page(virt_to_page(data), napi_safe);
}
static void skb_kfree_head(void *head, unsigned int end_offset)

View File

@ -14,7 +14,7 @@
#include <linux/idr.h>
#include <linux/rhashtable.h>
#include <linux/bug.h>
#include <net/page_pool.h>
#include <net/page_pool/helpers.h>
#include <net/xdp.h>
#include <net/xdp_priv.h> /* struct xdp_mem_allocator */