mirror of
https://github.com/torvalds/linux.git
synced 2026-06-03 12:03:54 +02:00
Merge branch 'net-prevent-rps-table-overwrite-of-active-flows'
Krishna Kumar says: ==================== net: Prevent RPS table overwrite of active flows This series splits the original RPS patch [1] into two patches for net-next. It also addresses a kernel test robot warning by defining rps_flow_is_active() only when aRFS is enabled. I tested v3 with four builds and reboots: two for [PATCH 1/2] with aRFS enabled & disabled, and two for [PATCH 2/2]. There are no code changes in v4 and v5, only documentation. Patch v6 has one line change to keep 'hash' field under #ifdef, and was test built with aRFS=on and aRFS=off. The same two builds were done for v7, along with 15m load testing with aRFS=on to ensure the new changes are correct. The first patch prevents RPS table overwrite for active flows thereby improving aRFS stability. The second patch caches hash & flow_id in get_rps_cpu() to avoid recalculating it in set_rps_cpu(). [1] lore.kernel.org/netdev/20250708081516.53048-1-krikku@gmail.com/ [2] lore.kernel.org/netdev/20250729104109.1687418-1-krikku@gmail.com/ ==================== Link: https://patch.msgid.link/20250825031005.3674864-1-krikku@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
86b2676816
|
|
@ -25,13 +25,16 @@ struct rps_map {
|
|||
|
||||
/*
|
||||
* The rps_dev_flow structure contains the mapping of a flow to a CPU, the
|
||||
* tail pointer for that CPU's input queue at the time of last enqueue, and
|
||||
* a hardware filter index.
|
||||
* tail pointer for that CPU's input queue at the time of last enqueue, a
|
||||
* hardware filter index, and the hash of the flow if aRFS is enabled.
|
||||
*/
|
||||
struct rps_dev_flow {
|
||||
u16 cpu;
|
||||
u16 filter;
|
||||
unsigned int last_qtail;
|
||||
#ifdef CONFIG_RFS_ACCEL
|
||||
u32 hash;
|
||||
#endif
|
||||
};
|
||||
#define RPS_NO_FILTER 0xffff
|
||||
|
||||
|
|
|
|||
|
|
@ -4849,9 +4849,40 @@ static u32 rfs_slot(u32 hash, const struct rps_dev_flow_table *flow_table)
|
|||
return hash_32(hash, flow_table->log);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RFS_ACCEL
|
||||
/**
|
||||
* rps_flow_is_active - check whether the flow is recently active.
|
||||
* @rflow: Specific flow to check activity.
|
||||
* @flow_table: per-queue flowtable that @rflow belongs to.
|
||||
* @cpu: CPU saved in @rflow.
|
||||
*
|
||||
* If the CPU has processed many packets since the flow's last activity
|
||||
* (beyond 10 times the table size), the flow is considered stale.
|
||||
*
|
||||
* Return: true if flow was recently active.
|
||||
*/
|
||||
static bool rps_flow_is_active(struct rps_dev_flow *rflow,
|
||||
struct rps_dev_flow_table *flow_table,
|
||||
unsigned int cpu)
|
||||
{
|
||||
unsigned int flow_last_active;
|
||||
unsigned int sd_input_head;
|
||||
|
||||
if (cpu >= nr_cpu_ids)
|
||||
return false;
|
||||
|
||||
sd_input_head = READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head);
|
||||
flow_last_active = READ_ONCE(rflow->last_qtail);
|
||||
|
||||
return (int)(sd_input_head - flow_last_active) <
|
||||
(int)(10 << flow_table->log);
|
||||
}
|
||||
#endif
|
||||
|
||||
static struct rps_dev_flow *
|
||||
set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
|
||||
struct rps_dev_flow *rflow, u16 next_cpu)
|
||||
struct rps_dev_flow *rflow, u16 next_cpu, u32 hash,
|
||||
u32 flow_id)
|
||||
{
|
||||
if (next_cpu < nr_cpu_ids) {
|
||||
u32 head;
|
||||
|
|
@ -4859,8 +4890,9 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
|
|||
struct netdev_rx_queue *rxqueue;
|
||||
struct rps_dev_flow_table *flow_table;
|
||||
struct rps_dev_flow *old_rflow;
|
||||
struct rps_dev_flow *tmp_rflow;
|
||||
unsigned int tmp_cpu;
|
||||
u16 rxq_index;
|
||||
u32 flow_id;
|
||||
int rc;
|
||||
|
||||
/* Should we steer this flow to a different hardware queue? */
|
||||
|
|
@ -4875,14 +4907,29 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
|
|||
flow_table = rcu_dereference(rxqueue->rps_flow_table);
|
||||
if (!flow_table)
|
||||
goto out;
|
||||
flow_id = rfs_slot(skb_get_hash(skb), flow_table);
|
||||
|
||||
tmp_rflow = &flow_table->flows[flow_id];
|
||||
tmp_cpu = READ_ONCE(tmp_rflow->cpu);
|
||||
|
||||
if (READ_ONCE(tmp_rflow->filter) != RPS_NO_FILTER) {
|
||||
if (rps_flow_is_active(tmp_rflow, flow_table,
|
||||
tmp_cpu)) {
|
||||
if (hash != READ_ONCE(tmp_rflow->hash) ||
|
||||
next_cpu == tmp_cpu)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
|
||||
rxq_index, flow_id);
|
||||
if (rc < 0)
|
||||
goto out;
|
||||
|
||||
old_rflow = rflow;
|
||||
rflow = &flow_table->flows[flow_id];
|
||||
rflow = tmp_rflow;
|
||||
WRITE_ONCE(rflow->filter, rc);
|
||||
WRITE_ONCE(rflow->hash, hash);
|
||||
|
||||
if (old_rflow->filter == rc)
|
||||
WRITE_ONCE(old_rflow->filter, RPS_NO_FILTER);
|
||||
out:
|
||||
|
|
@ -4908,6 +4955,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
|
|||
struct rps_dev_flow_table *flow_table;
|
||||
struct rps_map *map;
|
||||
int cpu = -1;
|
||||
u32 flow_id;
|
||||
u32 tcpu;
|
||||
u32 hash;
|
||||
|
||||
|
|
@ -4954,7 +5002,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
|
|||
/* OK, now we know there is a match,
|
||||
* we can look at the local (per receive queue) flow table
|
||||
*/
|
||||
rflow = &flow_table->flows[rfs_slot(hash, flow_table)];
|
||||
flow_id = rfs_slot(hash, flow_table);
|
||||
rflow = &flow_table->flows[flow_id];
|
||||
tcpu = rflow->cpu;
|
||||
|
||||
/*
|
||||
|
|
@ -4973,7 +5022,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
|
|||
((int)(READ_ONCE(per_cpu(softnet_data, tcpu).input_queue_head) -
|
||||
rflow->last_qtail)) >= 0)) {
|
||||
tcpu = next_cpu;
|
||||
rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
|
||||
rflow = set_rps_cpu(dev, skb, rflow, next_cpu, hash,
|
||||
flow_id);
|
||||
}
|
||||
|
||||
if (tcpu < nr_cpu_ids && cpu_online(tcpu)) {
|
||||
|
|
@ -5017,17 +5067,16 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
|
|||
struct rps_dev_flow_table *flow_table;
|
||||
struct rps_dev_flow *rflow;
|
||||
bool expire = true;
|
||||
unsigned int cpu;
|
||||
|
||||
rcu_read_lock();
|
||||
flow_table = rcu_dereference(rxqueue->rps_flow_table);
|
||||
if (flow_table && flow_id < (1UL << flow_table->log)) {
|
||||
unsigned int cpu;
|
||||
|
||||
rflow = &flow_table->flows[flow_id];
|
||||
cpu = READ_ONCE(rflow->cpu);
|
||||
if (READ_ONCE(rflow->filter) == filter_id && cpu < nr_cpu_ids &&
|
||||
((int)(READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head) -
|
||||
READ_ONCE(rflow->last_qtail)) <
|
||||
(int)(10 << flow_table->log)))
|
||||
if (READ_ONCE(rflow->filter) == filter_id &&
|
||||
rps_flow_is_active(rflow, flow_table, cpu))
|
||||
expire = false;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
|
|
|||
|
|
@ -1120,8 +1120,10 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
|
|||
return -ENOMEM;
|
||||
|
||||
table->log = ilog2(mask) + 1;
|
||||
for (count = 0; count <= mask; count++)
|
||||
for (count = 0; count <= mask; count++) {
|
||||
table->flows[count].cpu = RPS_NO_CPU;
|
||||
table->flows[count].filter = RPS_NO_FILTER;
|
||||
}
|
||||
} else {
|
||||
table = NULL;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user