From 8490e75cdbb734829d3b324c3a52492c2edbfbd6 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 22 Nov 2019 17:57:03 +0000 Subject: [PATCH 1/4] sfc: change ARFS expiry mechanism The old rfs_filters_added method for determining the quota could potentially allow the NIC to become filled with old filters, which never get tested for expiry. Instead, explicitly make expiry check work depend on the number of filters installed, and don't count checking slots without filters in as doing work. This guarantees that each filter will be checked for expiry at least once every thirty seconds (assuming the channel to which it belongs is NAPI polling actively) regardless of fill level. Signed-off-by: Edward Cree Tested-by: David Ahern Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/efx.c | 8 +++-- drivers/net/ethernet/sfc/efx.h | 9 +++--- drivers/net/ethernet/sfc/net_driver.h | 14 ++++---- drivers/net/ethernet/sfc/rx.c | 46 ++++++++++++++++----------- 4 files changed, 45 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 0fa9972027db..38d186b949be 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1969,6 +1969,8 @@ static int efx_probe_filters(struct efx_nic *efx) ++i) channel->rps_flow_id[i] = RPS_FLOW_ID_INVALID; + channel->rfs_expire_index = 0; + channel->rfs_filter_count = 0; } if (!success) { @@ -1978,8 +1980,6 @@ static int efx_probe_filters(struct efx_nic *efx) rc = -ENOMEM; goto out_unlock; } - - efx->rps_expire_index = efx->rps_expire_channel = 0; } #endif out_unlock: @@ -1993,8 +1993,10 @@ static void efx_remove_filters(struct efx_nic *efx) #ifdef CONFIG_RFS_ACCEL struct efx_channel *channel; - efx_for_each_channel(channel, efx) + efx_for_each_channel(channel, efx) { + flush_work(&channel->filter_work); kfree(channel->rps_flow_id); + } #endif down_write(&efx->filter_sem); efx->type->filter_table_remove(efx); diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index 45c7ae4114ec..e58c2b6d64d9 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -166,15 +166,16 @@ static inline s32 efx_filter_get_rx_ids(struct efx_nic *efx, #ifdef CONFIG_RFS_ACCEL int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id); -bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned quota); +bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota); static inline void efx_filter_rfs_expire(struct work_struct *data) { struct efx_channel *channel = container_of(data, struct efx_channel, filter_work); + unsigned int time = jiffies - channel->rfs_last_expiry, quota; - if (channel->rfs_filters_added >= 60 && - __efx_filter_rfs_expire(channel->efx, 100)) - channel->rfs_filters_added -= 60; + quota = channel->rfs_filter_count * time / (30 * HZ); + if (quota > 20 && __efx_filter_rfs_expire(channel, min(channel->rfs_filter_count, quota))) + channel->rfs_last_expiry += time; } #define efx_filter_rfs_enabled() 1 #else diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 04e49eac7327..5b1b882f6c67 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -439,6 +439,11 @@ enum efx_sync_events_state { * @event_test_cpu: Last CPU to handle interrupt or test event for this channel * @irq_count: Number of IRQs since last adaptive moderation decision * @irq_mod_score: IRQ moderation score + * @rfs_filter_count: number of accelerated RFS filters currently in place; + * equals the count of @rps_flow_id slots filled + * @rfs_last_expiry: value of jiffies last time some accelerated RFS filters + * were checked for expiry + * @rfs_expire_index: next accelerated RFS filter ID to check for expiry * @filter_work: Work item for efx_filter_rfs_expire() * @rps_flow_id: Flow IDs of filters allocated for accelerated RFS, * indexed by filter ID @@ -489,7 +494,9 @@ struct efx_channel { unsigned int irq_count; unsigned int irq_mod_score; #ifdef CONFIG_RFS_ACCEL - unsigned int rfs_filters_added; + unsigned int rfs_filter_count; + unsigned int rfs_last_expiry; + unsigned int rfs_expire_index; struct work_struct filter_work; #define RPS_FLOW_ID_INVALID 0xFFFFFFFF u32 *rps_flow_id; @@ -923,9 +930,6 @@ struct efx_async_filter_insertion { * @filter_sem: Filter table rw_semaphore, protects existence of @filter_state * @filter_state: Architecture-dependent filter table state * @rps_mutex: Protects RPS state of all channels - * @rps_expire_channel: Next channel to check for expiry - * @rps_expire_index: Next index to check for expiry in - * @rps_expire_channel's @rps_flow_id * @rps_slot_map: bitmap of in-flight entries in @rps_slot * @rps_slot: array of ARFS insertion requests for efx_filter_rfs_work() * @rps_hash_lock: Protects ARFS filter mapping state (@rps_hash_table and @@ -1096,8 +1100,6 @@ struct efx_nic { void *filter_state; #ifdef CONFIG_RFS_ACCEL struct mutex rps_mutex; - unsigned int rps_expire_channel; - unsigned int rps_expire_index; unsigned long rps_slot_map; struct efx_async_filter_insertion rps_slot[EFX_RPS_MAX_IN_FLIGHT]; spinlock_t rps_hash_lock; diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index bec261905530..bbf2393f7599 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -988,6 +988,7 @@ static void efx_filter_rfs_work(struct work_struct *data) rc = efx->type->filter_insert(efx, &req->spec, true); if (rc >= 0) + /* Discard 'priority' part of EF10+ filter ID (mcdi_filters) */ rc %= efx->type->max_rx_ip_filters; if (efx->rps_hash_table) { spin_lock_bh(&efx->rps_hash_lock); @@ -1012,8 +1013,9 @@ static void efx_filter_rfs_work(struct work_struct *data) * later. */ mutex_lock(&efx->rps_mutex); + if (channel->rps_flow_id[rc] == RPS_FLOW_ID_INVALID) + channel->rfs_filter_count++; channel->rps_flow_id[rc] = req->flow_id; - ++channel->rfs_filters_added; mutex_unlock(&efx->rps_mutex); if (req->spec.ether_type == htons(ETH_P_IP)) @@ -1139,38 +1141,44 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, return rc; } -bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota) +bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota) { bool (*expire_one)(struct efx_nic *efx, u32 flow_id, unsigned int index); - unsigned int channel_idx, index, size; + struct efx_nic *efx = channel->efx; + unsigned int index, size, start; u32 flow_id; if (!mutex_trylock(&efx->rps_mutex)) return false; expire_one = efx->type->filter_rfs_expire_one; - channel_idx = efx->rps_expire_channel; - index = efx->rps_expire_index; + index = channel->rfs_expire_index; + start = index; size = efx->type->max_rx_ip_filters; - while (quota--) { - struct efx_channel *channel = efx_get_channel(efx, channel_idx); + while (quota) { flow_id = channel->rps_flow_id[index]; - if (flow_id != RPS_FLOW_ID_INVALID && - expire_one(efx, flow_id, index)) { - netif_info(efx, rx_status, efx->net_dev, - "expired filter %d [queue %u flow %u]\n", - index, channel_idx, flow_id); - channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID; + if (flow_id != RPS_FLOW_ID_INVALID) { + quota--; + if (expire_one(efx, flow_id, index)) { + netif_info(efx, rx_status, efx->net_dev, + "expired filter %d [channel %u flow %u]\n", + index, channel->channel, flow_id); + channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID; + channel->rfs_filter_count--; + } } - if (++index == size) { - if (++channel_idx == efx->n_channels) - channel_idx = 0; + if (++index == size) index = 0; - } + /* If we were called with a quota that exceeds the total number + * of filters in the table (which should never happen), ensure + * that we don't loop forever - stop when we've examined every + * row of the table. + */ + if (WARN_ON(index == start && quota)) + break; } - efx->rps_expire_channel = channel_idx; - efx->rps_expire_index = index; + channel->rfs_expire_index = index; mutex_unlock(&efx->rps_mutex); return true; } From 0aa6608daec5d19275809d30df6a02529eb66c7d Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 22 Nov 2019 17:57:19 +0000 Subject: [PATCH 2/4] sfc: suppress MCDI errors from ARFS In high connection count usage, the NIC's filter table may be filled with sufficiently many ARFS filters that further insertions fail. As this does not represent a correctness issue, do not log the resulting MCDI errors. Add a debug-level message under the (by default disabled) rx_status category instead; and take the opportunity to do a little extra expiry work. Since there are now multiple workitems able to call __efx_filter_rfs_expire on a given channel, it is possible for them to race and thus pass quotas which, combined, exceed rfs_filter_count. Thus, don't WARN_ON if we loop all the way around the table with quota left over. Signed-off-by: Edward Cree Tested-by: David Ahern Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/ef10.c | 8 ++++++-- drivers/net/ethernet/sfc/rx.c | 28 ++++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index ad68eb0cb8fd..4d9bbccc6f89 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -4202,11 +4202,15 @@ static int efx_ef10_filter_push(struct efx_nic *efx, { MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN); MCDI_DECLARE_BUF(outbuf, MC_CMD_FILTER_OP_EXT_OUT_LEN); + size_t outlen; int rc; efx_ef10_filter_push_prep(efx, spec, inbuf, *handle, ctx, replacing); - rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), NULL); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc && spec->priority != EFX_FILTER_PRI_HINT) + efx_mcdi_display_error(efx, MC_CMD_FILTER_OP, sizeof(inbuf), + outbuf, outlen, rc); if (rc == 0) *handle = MCDI_QWORD(outbuf, FILTER_OP_OUT_HANDLE); if (rc == -ENOSPC) diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index bbf2393f7599..252a5f10596d 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -1032,6 +1032,26 @@ static void efx_filter_rfs_work(struct work_struct *data) req->spec.rem_host, ntohs(req->spec.rem_port), req->spec.loc_host, ntohs(req->spec.loc_port), req->rxq_index, req->flow_id, rc, arfs_id); + } else { + if (req->spec.ether_type == htons(ETH_P_IP)) + netif_dbg(efx, rx_status, efx->net_dev, + "failed to steer %s %pI4:%u:%pI4:%u to queue %u [flow %u rc %d id %u]\n", + (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", + req->spec.rem_host, ntohs(req->spec.rem_port), + req->spec.loc_host, ntohs(req->spec.loc_port), + req->rxq_index, req->flow_id, rc, arfs_id); + else + netif_dbg(efx, rx_status, efx->net_dev, + "failed to steer %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u rc %d id %u]\n", + (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", + req->spec.rem_host, ntohs(req->spec.rem_port), + req->spec.loc_host, ntohs(req->spec.loc_port), + req->rxq_index, req->flow_id, rc, arfs_id); + /* We're overloading the NIC's filter tables, so let's do a + * chunk of extra expiry work. + */ + __efx_filter_rfs_expire(channel, min(channel->rfs_filter_count, + 100u)); } /* Release references */ @@ -1170,11 +1190,11 @@ bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota) if (++index == size) index = 0; /* If we were called with a quota that exceeds the total number - * of filters in the table (which should never happen), ensure - * that we don't loop forever - stop when we've examined every - * row of the table. + * of filters in the table (which shouldn't happen, but could + * if two callers race), ensure that we don't loop forever - + * stop when we've examined every row of the table. */ - if (WARN_ON(index == start && quota)) + if (index == start) break; } From ca70bd423f10b004f1bf7b2424d34025a9408e54 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 22 Nov 2019 17:57:27 +0000 Subject: [PATCH 3/4] sfc: add statistics for ARFS Report the number of successful and failed insertions, and also the current count of filters, to aid in tuning e.g. rps_flow_cnt. Signed-off-by: Edward Cree Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/ethtool.c | 6 ++++++ drivers/net/ethernet/sfc/net_driver.h | 4 ++++ drivers/net/ethernet/sfc/rx.c | 2 ++ 3 files changed, 12 insertions(+) diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 8db593fb9699..6a9347cd67f3 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -56,6 +56,9 @@ static u64 efx_get_atomic_stat(void *field) #define EFX_ETHTOOL_UINT_CHANNEL_STAT(field) \ EFX_ETHTOOL_STAT(field, channel, n_##field, \ unsigned int, efx_get_uint_stat) +#define EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(field) \ + EFX_ETHTOOL_STAT(field, channel, field, \ + unsigned int, efx_get_uint_stat) #define EFX_ETHTOOL_UINT_TXQ_STAT(field) \ EFX_ETHTOOL_STAT(tx_##field, tx_queue, field, \ @@ -87,6 +90,9 @@ static const struct efx_sw_stat_desc efx_sw_stat_desc[] = { EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_bad_drops), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_tx), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_redirect), + EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(rfs_filter_count), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_succeeded), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_failed), }; #define EFX_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(efx_sw_stat_desc) diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 5b1b882f6c67..ccd480e699d3 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -444,6 +444,8 @@ enum efx_sync_events_state { * @rfs_last_expiry: value of jiffies last time some accelerated RFS filters * were checked for expiry * @rfs_expire_index: next accelerated RFS filter ID to check for expiry + * @n_rfs_succeeded: number of successful accelerated RFS filter insertions + * @n_rfs_failed; number of failed accelerated RFS filter insertions * @filter_work: Work item for efx_filter_rfs_expire() * @rps_flow_id: Flow IDs of filters allocated for accelerated RFS, * indexed by filter ID @@ -497,6 +499,8 @@ struct efx_channel { unsigned int rfs_filter_count; unsigned int rfs_last_expiry; unsigned int rfs_expire_index; + unsigned int n_rfs_succeeded; + unsigned int n_rfs_failed; struct work_struct filter_work; #define RPS_FLOW_ID_INVALID 0xFFFFFFFF u32 *rps_flow_id; diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 252a5f10596d..ef52b24ad9e7 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -1032,6 +1032,7 @@ static void efx_filter_rfs_work(struct work_struct *data) req->spec.rem_host, ntohs(req->spec.rem_port), req->spec.loc_host, ntohs(req->spec.loc_port), req->rxq_index, req->flow_id, rc, arfs_id); + channel->n_rfs_succeeded++; } else { if (req->spec.ether_type == htons(ETH_P_IP)) netif_dbg(efx, rx_status, efx->net_dev, @@ -1047,6 +1048,7 @@ static void efx_filter_rfs_work(struct work_struct *data) req->spec.rem_host, ntohs(req->spec.rem_port), req->spec.loc_host, ntohs(req->spec.loc_port), req->rxq_index, req->flow_id, rc, arfs_id); + channel->n_rfs_failed++; /* We're overloading the NIC's filter tables, so let's do a * chunk of extra expiry work. */ From 6fbc05e59163e66795a2bbdb4068abd7f7ae3510 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 22 Nov 2019 17:57:40 +0000 Subject: [PATCH 4/4] sfc: do ARFS expiry work occasionally even without NAPI poll If there's no traffic on a channel, its ARFS expiry work will never get scheduled by efx_poll() as that isn't being run. So make efx_filter_rfs_expire() reschedule itself to run after 30 seconds. Signed-off-by: Edward Cree Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/efx.c | 8 ++++---- drivers/net/ethernet/sfc/efx.h | 10 +++++++--- drivers/net/ethernet/sfc/net_driver.h | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 38d186b949be..992c773620ec 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -355,7 +355,7 @@ static int efx_poll(struct napi_struct *napi, int budget) #ifdef CONFIG_RFS_ACCEL /* Perhaps expire some ARFS filters */ - schedule_work(&channel->filter_work); + mod_delayed_work(system_wq, &channel->filter_work, 0); #endif /* There is no race here; although napi_disable() will @@ -487,7 +487,7 @@ efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel) } #ifdef CONFIG_RFS_ACCEL - INIT_WORK(&channel->filter_work, efx_filter_rfs_expire); + INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire); #endif rx_queue = &channel->rx_queue; @@ -533,7 +533,7 @@ efx_copy_channel(const struct efx_channel *old_channel) memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd)); timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); #ifdef CONFIG_RFS_ACCEL - INIT_WORK(&channel->filter_work, efx_filter_rfs_expire); + INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire); #endif return channel; @@ -1994,7 +1994,7 @@ static void efx_remove_filters(struct efx_nic *efx) struct efx_channel *channel; efx_for_each_channel(channel, efx) { - flush_work(&channel->filter_work); + cancel_delayed_work_sync(&channel->filter_work); kfree(channel->rps_flow_id); } #endif diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index e58c2b6d64d9..2dd8d5002315 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -169,13 +169,17 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota); static inline void efx_filter_rfs_expire(struct work_struct *data) { - struct efx_channel *channel = container_of(data, struct efx_channel, - filter_work); - unsigned int time = jiffies - channel->rfs_last_expiry, quota; + struct delayed_work *dwork = to_delayed_work(data); + struct efx_channel *channel; + unsigned int time, quota; + channel = container_of(dwork, struct efx_channel, filter_work); + time = jiffies - channel->rfs_last_expiry; quota = channel->rfs_filter_count * time / (30 * HZ); if (quota > 20 && __efx_filter_rfs_expire(channel, min(channel->rfs_filter_count, quota))) channel->rfs_last_expiry += time; + /* Ensure we do more work eventually even if NAPI poll is not happening */ + schedule_delayed_work(dwork, 30 * HZ); } #define efx_filter_rfs_enabled() 1 #else diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index ccd480e699d3..1f88212be085 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -501,7 +501,7 @@ struct efx_channel { unsigned int rfs_expire_index; unsigned int n_rfs_succeeded; unsigned int n_rfs_failed; - struct work_struct filter_work; + struct delayed_work filter_work; #define RPS_FLOW_ID_INVALID 0xFFFFFFFF u32 *rps_flow_id; #endif