mirror of
https://github.com/torvalds/linux.git
synced 2026-05-23 06:31:58 +02:00
Merge branch 'net-af_packet-optimize-retire-operation'
Xin Zhao says: ==================== net: af_packet: optimize retire operation In a system with high real-time requirements, the timeout mechanism of ordinary timers with jiffies granularity is insufficient to meet the demands for real-time performance. Meanwhile, the optimization of CPU usage with af_packet is quite significant. Use hrtimer instead of timer to help compensate for the shortcomings in real-time performance. In HZ=100 or HZ=250 system, the update of TP_STATUS_USER is not real-time enough, with fluctuations reaching over 8ms (on a system with HZ=250). This is unacceptable in some high real-time systems that require timely processing of network packets. By replacing it with hrtimer, if a timeout of 2ms is set, the update of TP_STATUS_USER can be stabilized to within 3 ms. ==================== Link: https://patch.msgid.link/20250908104549.204412-1-jackzxcui1989@163.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
be30f56386
|
|
@ -203,8 +203,7 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *,
|
|||
static int prb_queue_frozen(struct tpacket_kbdq_core *);
|
||||
static void prb_open_block(struct tpacket_kbdq_core *,
|
||||
struct tpacket_block_desc *);
|
||||
static void prb_retire_rx_blk_timer_expired(struct timer_list *);
|
||||
static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
|
||||
static enum hrtimer_restart prb_retire_rx_blk_timer_expired(struct hrtimer *);
|
||||
static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
|
||||
static void prb_clear_rxhash(struct tpacket_kbdq_core *,
|
||||
struct tpacket3_hdr *);
|
||||
|
|
@ -579,33 +578,13 @@ static __be16 vlan_get_protocol_dgram(const struct sk_buff *skb)
|
|||
return proto;
|
||||
}
|
||||
|
||||
static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
|
||||
{
|
||||
timer_delete_sync(&pkc->retire_blk_timer);
|
||||
}
|
||||
|
||||
static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
|
||||
struct sk_buff_head *rb_queue)
|
||||
{
|
||||
struct tpacket_kbdq_core *pkc;
|
||||
|
||||
pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
|
||||
|
||||
spin_lock_bh(&rb_queue->lock);
|
||||
pkc->delete_blk_timer = 1;
|
||||
spin_unlock_bh(&rb_queue->lock);
|
||||
|
||||
prb_del_retire_blk_timer(pkc);
|
||||
}
|
||||
|
||||
static void prb_setup_retire_blk_timer(struct packet_sock *po)
|
||||
{
|
||||
struct tpacket_kbdq_core *pkc;
|
||||
|
||||
pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
|
||||
timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired,
|
||||
0);
|
||||
pkc->retire_blk_timer.expires = jiffies;
|
||||
hrtimer_cancel(&pkc->retire_blk_timer);
|
||||
}
|
||||
|
||||
static int prb_calc_retire_blk_tmo(struct packet_sock *po,
|
||||
|
|
@ -669,57 +648,36 @@ static void init_prb_bdqc(struct packet_sock *po,
|
|||
p1->knum_blocks = req_u->req3.tp_block_nr;
|
||||
p1->hdrlen = po->tp_hdrlen;
|
||||
p1->version = po->tp_version;
|
||||
p1->last_kactive_blk_num = 0;
|
||||
po->stats.stats3.tp_freeze_q_cnt = 0;
|
||||
if (req_u->req3.tp_retire_blk_tov)
|
||||
p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
|
||||
p1->interval_ktime = ms_to_ktime(req_u->req3.tp_retire_blk_tov);
|
||||
else
|
||||
p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
|
||||
req_u->req3.tp_block_size);
|
||||
p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
|
||||
p1->interval_ktime = ms_to_ktime(prb_calc_retire_blk_tmo(po,
|
||||
req_u->req3.tp_block_size));
|
||||
p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
|
||||
rwlock_init(&p1->blk_fill_in_prog_lock);
|
||||
|
||||
p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
|
||||
prb_init_ft_ops(p1, req_u);
|
||||
prb_setup_retire_blk_timer(po);
|
||||
hrtimer_setup(&p1->retire_blk_timer, prb_retire_rx_blk_timer_expired,
|
||||
CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
|
||||
hrtimer_start(&p1->retire_blk_timer, p1->interval_ktime,
|
||||
HRTIMER_MODE_REL_SOFT);
|
||||
prb_open_block(p1, pbd);
|
||||
}
|
||||
|
||||
/* Do NOT update the last_blk_num first.
|
||||
* Assumes sk_buff_head lock is held.
|
||||
*/
|
||||
static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
|
||||
{
|
||||
mod_timer(&pkc->retire_blk_timer,
|
||||
jiffies + pkc->tov_in_jiffies);
|
||||
pkc->last_kactive_blk_num = pkc->kactive_blk_num;
|
||||
}
|
||||
|
||||
/*
|
||||
* Timer logic:
|
||||
* 1) We refresh the timer only when we open a block.
|
||||
* By doing this we don't waste cycles refreshing the timer
|
||||
* on packet-by-packet basis.
|
||||
*
|
||||
* With a 1MB block-size, on a 1Gbps line, it will take
|
||||
* i) ~8 ms to fill a block + ii) memcpy etc.
|
||||
* In this cut we are not accounting for the memcpy time.
|
||||
*
|
||||
* So, if the user sets the 'tmo' to 10ms then the timer
|
||||
* will never fire while the block is still getting filled
|
||||
* (which is what we want). However, the user could choose
|
||||
* to close a block early and that's fine.
|
||||
*
|
||||
* But when the timer does fire, we check whether or not to refresh it.
|
||||
* Since the tmo granularity is in msecs, it is not too expensive
|
||||
* to refresh the timer, lets say every '8' msecs.
|
||||
* Either the user can set the 'tmo' or we can derive it based on
|
||||
* a) line-speed and b) block-size.
|
||||
* prb_calc_retire_blk_tmo() calculates the tmo.
|
||||
*
|
||||
*/
|
||||
static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
|
||||
static enum hrtimer_restart prb_retire_rx_blk_timer_expired(struct hrtimer *t)
|
||||
{
|
||||
struct packet_sock *po =
|
||||
timer_container_of(po, t, rx_ring.prb_bdqc.retire_blk_timer);
|
||||
|
|
@ -732,9 +690,6 @@ static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
|
|||
frozen = prb_queue_frozen(pkc);
|
||||
pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
|
||||
|
||||
if (unlikely(pkc->delete_blk_timer))
|
||||
goto out;
|
||||
|
||||
/* We only need to plug the race when the block is partially filled.
|
||||
* tpacket_rcv:
|
||||
* lock(); increment BLOCK_NUM_PKTS; unlock()
|
||||
|
|
@ -750,46 +705,31 @@ static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
|
|||
write_unlock(&pkc->blk_fill_in_prog_lock);
|
||||
}
|
||||
|
||||
if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
|
||||
if (!frozen) {
|
||||
if (!BLOCK_NUM_PKTS(pbd)) {
|
||||
/* An empty block. Just refresh the timer. */
|
||||
goto refresh_timer;
|
||||
}
|
||||
if (!frozen) {
|
||||
if (BLOCK_NUM_PKTS(pbd)) {
|
||||
/* Not an empty block. Need retire the block. */
|
||||
prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
|
||||
if (!prb_dispatch_next_block(pkc, po))
|
||||
goto refresh_timer;
|
||||
else
|
||||
goto out;
|
||||
} else {
|
||||
/* Case 1. Queue was frozen because user-space was
|
||||
* lagging behind.
|
||||
prb_dispatch_next_block(pkc, po);
|
||||
}
|
||||
} else {
|
||||
/* Case 1. Queue was frozen because user-space was
|
||||
* lagging behind.
|
||||
*/
|
||||
if (!prb_curr_blk_in_use(pbd)) {
|
||||
/* Case 2. queue was frozen,user-space caught up,
|
||||
* now the link went idle && the timer fired.
|
||||
* We don't have a block to close.So we open this
|
||||
* block and restart the timer.
|
||||
* opening a block thaws the queue,restarts timer
|
||||
* Thawing/timer-refresh is a side effect.
|
||||
*/
|
||||
if (prb_curr_blk_in_use(pbd)) {
|
||||
/*
|
||||
* Ok, user-space is still behind.
|
||||
* So just refresh the timer.
|
||||
*/
|
||||
goto refresh_timer;
|
||||
} else {
|
||||
/* Case 2. queue was frozen,user-space caught up,
|
||||
* now the link went idle && the timer fired.
|
||||
* We don't have a block to close.So we open this
|
||||
* block and restart the timer.
|
||||
* opening a block thaws the queue,restarts timer
|
||||
* Thawing/timer-refresh is a side effect.
|
||||
*/
|
||||
prb_open_block(pkc, pbd);
|
||||
goto out;
|
||||
}
|
||||
prb_open_block(pkc, pbd);
|
||||
}
|
||||
}
|
||||
|
||||
refresh_timer:
|
||||
_prb_refresh_rx_retire_blk_timer(pkc);
|
||||
|
||||
out:
|
||||
hrtimer_forward_now(&pkc->retire_blk_timer, pkc->interval_ktime);
|
||||
spin_unlock(&po->sk.sk_receive_queue.lock);
|
||||
return HRTIMER_RESTART;
|
||||
}
|
||||
|
||||
static void prb_flush_block(struct tpacket_kbdq_core *pkc1,
|
||||
|
|
@ -883,11 +823,18 @@ static void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
|
|||
}
|
||||
|
||||
/*
|
||||
* Side effect of opening a block:
|
||||
* prb_open_block is called by tpacket_rcv or timer callback.
|
||||
*
|
||||
* 1) prb_queue is thawed.
|
||||
* 2) retire_blk_timer is refreshed.
|
||||
* Reasons why NOT update hrtimer in prb_open_block:
|
||||
* 1) It will increase complexity to distinguish the two caller scenario.
|
||||
* 2) hrtimer_cancel and hrtimer_start need to be called if you want to update
|
||||
* TMO of an already enqueued hrtimer, leading to complex shutdown logic.
|
||||
*
|
||||
* One side effect of NOT update hrtimer when called by tpacket_rcv is that
|
||||
* a newly opened block triggered by tpacket_rcv may be retired earlier than
|
||||
* expected. On the other hand, if timeout is updated in prb_open_block, the
|
||||
* frequent reception of network packets that leads to prb_open_block being
|
||||
* called may cause hrtimer to be removed and enqueued repeatedly.
|
||||
*/
|
||||
static void prb_open_block(struct tpacket_kbdq_core *pkc1,
|
||||
struct tpacket_block_desc *pbd1)
|
||||
|
|
@ -921,7 +868,6 @@ static void prb_open_block(struct tpacket_kbdq_core *pkc1,
|
|||
pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
|
||||
|
||||
prb_thaw_queue(pkc1);
|
||||
_prb_refresh_rx_retire_blk_timer(pkc1);
|
||||
|
||||
smp_wmb();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ static int pdiag_put_ring(struct packet_ring_buffer *ring, int ver, int nl_type,
|
|||
pdr.pdr_frame_nr = ring->frame_max + 1;
|
||||
|
||||
if (ver > TPACKET_V2) {
|
||||
pdr.pdr_retire_tmo = ring->prb_bdqc.retire_blk_tov;
|
||||
pdr.pdr_retire_tmo = ktime_to_ms(ring->prb_bdqc.interval_ktime);
|
||||
pdr.pdr_sizeof_priv = ring->prb_bdqc.blk_sizeof_priv;
|
||||
pdr.pdr_features = ring->prb_bdqc.feature_req_word;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -20,15 +20,10 @@ struct tpacket_kbdq_core {
|
|||
unsigned int feature_req_word;
|
||||
unsigned int hdrlen;
|
||||
unsigned char reset_pending_on_curr_blk;
|
||||
unsigned char delete_blk_timer;
|
||||
unsigned short kactive_blk_num;
|
||||
unsigned short blk_sizeof_priv;
|
||||
|
||||
/* last_kactive_blk_num:
|
||||
* trick to see if user-space has caught up
|
||||
* in order to avoid refreshing timer when every single pkt arrives.
|
||||
*/
|
||||
unsigned short last_kactive_blk_num;
|
||||
unsigned short version;
|
||||
|
||||
char *pkblk_start;
|
||||
char *pkblk_end;
|
||||
|
|
@ -38,6 +33,7 @@ struct tpacket_kbdq_core {
|
|||
uint64_t knxt_seq_num;
|
||||
char *prev;
|
||||
char *nxt_offset;
|
||||
|
||||
struct sk_buff *skb;
|
||||
|
||||
rwlock_t blk_fill_in_prog_lock;
|
||||
|
|
@ -45,12 +41,10 @@ struct tpacket_kbdq_core {
|
|||
/* Default is set to 8ms */
|
||||
#define DEFAULT_PRB_RETIRE_TOV (8)
|
||||
|
||||
unsigned short retire_blk_tov;
|
||||
unsigned short version;
|
||||
unsigned long tov_in_jiffies;
|
||||
ktime_t interval_ktime;
|
||||
|
||||
/* timer to retire an outstanding block */
|
||||
struct timer_list retire_blk_timer;
|
||||
struct hrtimer retire_blk_timer;
|
||||
};
|
||||
|
||||
struct pgv {
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user