From 0ab10314747364e621ab95b528c2bd874ff3f528 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Jul 2017 13:57:18 +0200 Subject: [PATCH 01/10] drivers: net: add missing interrupt.h include these drivers use tasklets or irq apis, but don't include interrupt.h. Once flow cache is removed the implicit interrupt.h inclusion goes away which will break the build. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- drivers/net/arcnet/arcdevice.h | 2 +- drivers/net/ethernet/amd/xgbe/xgbe.h | 1 + drivers/net/ethernet/synopsys/dwc-xlgmac-net.c | 1 + drivers/net/ieee802154/ca8210.c | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h index cbb4f8566bbe..d09b2b46ab63 100644 --- a/drivers/net/arcnet/arcdevice.h +++ b/drivers/net/arcnet/arcdevice.h @@ -20,7 +20,7 @@ #include #ifdef __KERNEL__ -#include +#include /* * RECON_THRESHOLD is the maximum number of RECON messages to receive diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 0938294f640a..e9282c924621 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -129,6 +129,7 @@ #include #include #include +#include #define XGBE_DRV_NAME "amd-xgbe" #define XGBE_DRV_VERSION "1.0.3" diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c index 3b91257683bc..e1b55b8fb8e0 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c @@ -17,6 +17,7 @@ #include #include +#include #include "dwc-xlgmac.h" #include "dwc-xlgmac-reg.h" diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index a626c539fb17..326243fae7e2 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include From 6b1c42e9726bdb00370342909d95efdc331d10ac Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Jul 2017 13:57:19 +0200 Subject: [PATCH 02/10] vti: revert flush x-netns xfrm cache when vti interface is removed flow cache is removed in next commit. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/ip_vti.c | 31 ------------------------------- net/ipv6/ip6_vti.c | 31 ------------------------------- 2 files changed, 62 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 0192c255e508..5ed63d250950 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -584,33 +584,6 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = { .get_link_net = ip_tunnel_get_link_net, }; -static bool is_vti_tunnel(const struct net_device *dev) -{ - return dev->netdev_ops == &vti_netdev_ops; -} - -static int vti_device_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct ip_tunnel *tunnel = netdev_priv(dev); - - if (!is_vti_tunnel(dev)) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_DOWN: - if (!net_eq(tunnel->net, dev_net(dev))) - xfrm_garbage_collect(tunnel->net); - break; - } - return NOTIFY_DONE; -} - -static struct notifier_block vti_notifier_block __read_mostly = { - .notifier_call = vti_device_event, -}; - static int __init vti_init(void) { const char *msg; @@ -618,8 +591,6 @@ static int __init vti_init(void) pr_info("IPv4 over IPsec tunneling driver\n"); - register_netdevice_notifier(&vti_notifier_block); - msg = "tunnel device"; err = register_pernet_device(&vti_net_ops); if (err < 0) @@ -652,7 +623,6 @@ static int __init vti_init(void) xfrm_proto_esp_failed: unregister_pernet_device(&vti_net_ops); pernet_dev_failed: - unregister_netdevice_notifier(&vti_notifier_block); pr_err("vti init: failed to register %s\n", msg); return err; } @@ -664,7 +634,6 @@ static void __exit vti_fini(void) xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); unregister_pernet_device(&vti_net_ops); - unregister_netdevice_notifier(&vti_notifier_block); } module_init(vti_init); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 486c2305f53c..79444a4bfd6d 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1145,33 +1145,6 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { .priority = 100, }; -static bool is_vti6_tunnel(const struct net_device *dev) -{ - return dev->netdev_ops == &vti6_netdev_ops; -} - -static int vti6_device_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct ip6_tnl *t = netdev_priv(dev); - - if (!is_vti6_tunnel(dev)) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_DOWN: - if (!net_eq(t->net, dev_net(dev))) - xfrm_garbage_collect(t->net); - break; - } - return NOTIFY_DONE; -} - -static struct notifier_block vti6_notifier_block __read_mostly = { - .notifier_call = vti6_device_event, -}; - /** * vti6_tunnel_init - register protocol and reserve needed resources * @@ -1182,8 +1155,6 @@ static int __init vti6_tunnel_init(void) const char *msg; int err; - register_netdevice_notifier(&vti6_notifier_block); - msg = "tunnel device"; err = register_pernet_device(&vti6_net_ops); if (err < 0) @@ -1216,7 +1187,6 @@ static int __init vti6_tunnel_init(void) xfrm_proto_esp_failed: unregister_pernet_device(&vti6_net_ops); pernet_dev_failed: - unregister_netdevice_notifier(&vti6_notifier_block); pr_err("vti6 init: failed to register %s\n", msg); return err; } @@ -1231,7 +1201,6 @@ static void __exit vti6_tunnel_cleanup(void) xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); unregister_pernet_device(&vti6_net_ops); - unregister_netdevice_notifier(&vti6_notifier_block); } module_init(vti6_tunnel_init); From 3c2a89ddc11896cf5498115c0380ab54b1c424b7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Jul 2017 13:57:20 +0200 Subject: [PATCH 03/10] net: xfrm: revert to lower xfrm dst gc limit revert c386578f1cdb4dac230395 ("xfrm: Let the flowcache handle its size by default."). Once we remove flow cache, we don't have a flow cache limit anymore. We must not allow (virtually) unlimited allocations of xfrm dst entries. Revert back to the old xfrm dst gc limits. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 6 ++---- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/xfrm6_policy.c | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 974ab47ae53a..f485d553e65c 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1291,8 +1291,7 @@ tag - INTEGER xfrm4_gc_thresh - INTEGER The threshold at which we will start garbage collecting for IPv4 destination cache entries. At twice this value the system will - refuse new allocations. The value must be set below the flowcache - limit (4096 * number of online cpus) to take effect. + refuse new allocations. igmp_link_local_mcast_reports - BOOLEAN Enable IGMP reports for link local multicast groups in the @@ -1778,8 +1777,7 @@ ratelimit - INTEGER xfrm6_gc_thresh - INTEGER The threshold at which we will start garbage collecting for IPv6 destination cache entries. At twice this value the system will - refuse new allocations. The value must be set below the flowcache - limit (4096 * number of online cpus) to take effect. + refuse new allocations. IPv6 Update by: diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 71b4ecc195c7..19455a5fc328 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -266,7 +266,7 @@ static struct dst_ops xfrm4_dst_ops_template = { .destroy = xfrm4_dst_destroy, .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, - .gc_thresh = INT_MAX, + .gc_thresh = 32768, }; static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 79651bc71bf0..ae30dc4973e8 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -286,7 +286,7 @@ static struct dst_ops xfrm6_dst_ops_template = { .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, - .gc_thresh = INT_MAX, + .gc_thresh = 32768, }; static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = { From 3ca28286ea809685d273d41674da34f45111482a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Jul 2017 13:57:21 +0200 Subject: [PATCH 04/10] xfrm_policy: bypass flow_cache_lookup Instead of consulting flow cache, call the xfrm bundle/policy lookup functions directly. This pretends the flow cache had no entry. This helps to gradually remove flow cache integration, followup commit will remove the dead code that this change adds. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ff61d8557929..1c7126ab752c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2052,13 +2052,12 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, } static struct flow_cache_object * -xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, - struct flow_cache_object *oldflo, void *ctx) +xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo) { - struct xfrm_flo *xflo = (struct xfrm_flo *)ctx; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; struct xfrm_dst *xdst, *new_xdst; int num_pols = 0, num_xfrms = 0, i, err, pol_dead; + struct flow_cache_object *oldflo = NULL; /* Check if the policies from old bundle are usable */ xdst = NULL; @@ -2128,8 +2127,6 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, dst_release_immediate(&xdst->u.dst); } - /* We do need to return one reference for original caller */ - dst_hold(&new_xdst->u.dst); return &new_xdst->flo; make_dummy_bundle: @@ -2242,8 +2239,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, !net->xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; - flo = flow_cache_lookup(net, fl, family, dir, - xfrm_bundle_lookup, &xflo); + flo = xfrm_bundle_lookup(net, fl, family, dir, &xflo); if (flo == NULL) goto nopol; if (IS_ERR(flo)) { @@ -2489,8 +2485,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (!pol) { struct flow_cache_object *flo; - flo = flow_cache_lookup(net, &fl, family, fl_dir, - xfrm_policy_lookup, NULL); + flo = xfrm_policy_lookup(net, &fl, family, dir, NULL, NULL); + if (IS_ERR_OR_NULL(flo)) pol = ERR_CAST(flo); else From 855dad99c07434065c0f21be6185e9cd8a6daab0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Jul 2017 13:57:22 +0200 Subject: [PATCH 05/10] xfrm_policy: remove always true/false branches after previous change oldflo and xdst are always NULL. These branches were already removed by gcc, this doesn't change code. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 74 ++++++++---------------------------------- 1 file changed, 14 insertions(+), 60 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 1c7126ab752c..19d457db3a09 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2056,48 +2056,23 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; struct xfrm_dst *xdst, *new_xdst; - int num_pols = 0, num_xfrms = 0, i, err, pol_dead; - struct flow_cache_object *oldflo = NULL; + int num_pols = 0, num_xfrms = 0, err; /* Check if the policies from old bundle are usable */ xdst = NULL; - if (oldflo) { - xdst = container_of(oldflo, struct xfrm_dst, flo); - num_pols = xdst->num_pols; - num_xfrms = xdst->num_xfrms; - pol_dead = 0; - for (i = 0; i < num_pols; i++) { - pols[i] = xdst->pols[i]; - pol_dead |= pols[i]->walk.dead; - } - if (pol_dead) { - /* Mark DST_OBSOLETE_DEAD to fail the next - * xfrm_dst_check() - */ - xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(&xdst->u.dst); - xdst = NULL; - num_pols = 0; - num_xfrms = 0; - oldflo = NULL; - } - } - /* Resolve policies to use if we couldn't get them from * previous cache entry */ - if (xdst == NULL) { - num_pols = 1; - pols[0] = __xfrm_policy_lookup(net, fl, family, - flow_to_policy_dir(dir)); - err = xfrm_expand_policies(fl, family, pols, + num_pols = 1; + pols[0] = __xfrm_policy_lookup(net, fl, family, + flow_to_policy_dir(dir)); + err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); - if (err < 0) - goto inc_error; - if (num_pols == 0) - return NULL; - if (num_xfrms <= 0) - goto make_dummy_bundle; - } + if (err < 0) + goto inc_error; + if (num_pols == 0) + return NULL; + if (num_xfrms <= 0) + goto make_dummy_bundle; new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, xflo->dst_orig); @@ -2105,26 +2080,10 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, err = PTR_ERR(new_xdst); if (err != -EAGAIN) goto error; - if (oldflo == NULL) - goto make_dummy_bundle; - dst_hold(&xdst->u.dst); - return oldflo; + goto make_dummy_bundle; } else if (new_xdst == NULL) { num_xfrms = 0; - if (oldflo == NULL) - goto make_dummy_bundle; - xdst->num_xfrms = 0; - dst_hold(&xdst->u.dst); - return oldflo; - } - - /* Kill the previous bundle */ - if (xdst) { - /* The policies were stolen for newly generated bundle */ - xdst->num_pols = 0; - /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ - xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(&xdst->u.dst); + goto make_dummy_bundle; } return &new_xdst->flo; @@ -2148,12 +2107,7 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, inc_error: XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); error: - if (xdst != NULL) { - /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ - xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(&xdst->u.dst); - } else - xfrm_pols_put(pols, num_pols); + xfrm_pols_put(pols, num_pols); return ERR_PTR(err); } From aff669bc286eb3a459acb6e192ae7d2adc3967a3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Jul 2017 13:57:23 +0200 Subject: [PATCH 06/10] xfrm_policy: kill flow to policy dir conversion XFRM_POLICY_IN/OUT/FWD are identical to FLOW_DIR_*, so gcc already removed this function as its just returns the argument. Again, no code change. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 46 ++++-------------------------------------- 1 file changed, 4 insertions(+), 42 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 19d457db3a09..9f724a688475 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1187,24 +1187,6 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); } -static int flow_to_policy_dir(int dir) -{ - if (XFRM_POLICY_IN == FLOW_DIR_IN && - XFRM_POLICY_OUT == FLOW_DIR_OUT && - XFRM_POLICY_FWD == FLOW_DIR_FWD) - return dir; - - switch (dir) { - default: - case FLOW_DIR_IN: - return XFRM_POLICY_IN; - case FLOW_DIR_OUT: - return XFRM_POLICY_OUT; - case FLOW_DIR_FWD: - return XFRM_POLICY_FWD; - } -} - static struct flow_cache_object * xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct flow_cache_object *old_obj, void *ctx) @@ -1214,7 +1196,7 @@ xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, if (old_obj) xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); - pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir)); + pol = __xfrm_policy_lookup(net, fl, family, dir); if (IS_ERR_OR_NULL(pol)) return ERR_CAST(pol); @@ -1225,23 +1207,6 @@ xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, return &pol->flo; } -static inline int policy_to_flow_dir(int dir) -{ - if (XFRM_POLICY_IN == FLOW_DIR_IN && - XFRM_POLICY_OUT == FLOW_DIR_OUT && - XFRM_POLICY_FWD == FLOW_DIR_FWD) - return dir; - switch (dir) { - default: - case XFRM_POLICY_IN: - return FLOW_DIR_IN; - case XFRM_POLICY_OUT: - return FLOW_DIR_OUT; - case XFRM_POLICY_FWD: - return FLOW_DIR_FWD; - } -} - static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, const struct flowi *fl, u16 family) { @@ -1261,7 +1226,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, } err = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, - policy_to_flow_dir(dir)); + dir); if (!err) { if (!xfrm_pol_hold_rcu(pol)) goto again; @@ -2063,8 +2028,7 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, /* Resolve policies to use if we couldn't get them from * previous cache entry */ num_pols = 1; - pols[0] = __xfrm_policy_lookup(net, fl, family, - flow_to_policy_dir(dir)); + pols[0] = __xfrm_policy_lookup(net, fl, family, dir); err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); if (err < 0) @@ -2142,7 +2106,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, struct xfrm_dst *xdst; struct dst_entry *dst, *route; u16 family = dst_orig->ops->family; - u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); + u8 dir = XFRM_POLICY_OUT; int i, err, num_pols, num_xfrms = 0, drop_pols = 0; dst = NULL; @@ -2399,12 +2363,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, int pi; int reverse; struct flowi fl; - u8 fl_dir; int xerr_idx = -1; reverse = dir & ~XFRM_POLICY_MASK; dir &= XFRM_POLICY_MASK; - fl_dir = policy_to_flow_dir(dir); if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); From 86dc8ee0b2c524d47864ee1bdf2b36ea157405a8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Jul 2017 13:57:24 +0200 Subject: [PATCH 07/10] xfrm_policy: remove xfrm_policy_lookup This removes the wrapper and renames the __xfrm_policy_lookup variant to get rid of another place that used flow cache objects. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 36 ++++-------------------------------- 1 file changed, 4 insertions(+), 32 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9f724a688475..339bb3ac2797 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1175,7 +1175,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } static struct xfrm_policy * -__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) +xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_policy *pol; @@ -1187,26 +1187,6 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); } -static struct flow_cache_object * -xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, - u8 dir, struct flow_cache_object *old_obj, void *ctx) -{ - struct xfrm_policy *pol; - - if (old_obj) - xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); - - pol = __xfrm_policy_lookup(net, fl, family, dir); - if (IS_ERR_OR_NULL(pol)) - return ERR_CAST(pol); - - /* Resolver returns two references: - * one for cache and one for caller of flow_cache_lookup() */ - xfrm_pol_hold(pol); - - return &pol->flo; -} - static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, const struct flowi *fl, u16 family) { @@ -2028,7 +2008,7 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, /* Resolve policies to use if we couldn't get them from * previous cache entry */ num_pols = 1; - pols[0] = __xfrm_policy_lookup(net, fl, family, dir); + pols[0] = xfrm_policy_lookup(net, fl, family, dir); err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); if (err < 0) @@ -2398,16 +2378,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } } - if (!pol) { - struct flow_cache_object *flo; - - flo = xfrm_policy_lookup(net, &fl, family, dir, NULL, NULL); - - if (IS_ERR_OR_NULL(flo)) - pol = ERR_CAST(flo); - else - pol = container_of(flo, struct xfrm_policy, flo); - } + if (!pol) + pol = xfrm_policy_lookup(net, &fl, family, dir); if (IS_ERR(pol)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); From bd45c539bf56650fb8fbab09c36f4b9afcbd4e1c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Jul 2017 13:57:25 +0200 Subject: [PATCH 08/10] xfrm_policy: make xfrm_bundle_lookup return xfrm dst object This allows to remove flow cache object embedded in struct xfrm_dst. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 339bb3ac2797..145d2395f3c0 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1996,15 +1996,13 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, goto out; } -static struct flow_cache_object * +static struct xfrm_dst * xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo) { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; - struct xfrm_dst *xdst, *new_xdst; int num_pols = 0, num_xfrms = 0, err; + struct xfrm_dst *xdst; - /* Check if the policies from old bundle are usable */ - xdst = NULL; /* Resolve policies to use if we couldn't get them from * previous cache entry */ num_pols = 1; @@ -2018,19 +2016,19 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, if (num_xfrms <= 0) goto make_dummy_bundle; - new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, + xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, xflo->dst_orig); - if (IS_ERR(new_xdst)) { - err = PTR_ERR(new_xdst); + if (IS_ERR(xdst)) { + err = PTR_ERR(xdst); if (err != -EAGAIN) goto error; goto make_dummy_bundle; - } else if (new_xdst == NULL) { + } else if (xdst == NULL) { num_xfrms = 0; goto make_dummy_bundle; } - return &new_xdst->flo; + return xdst; make_dummy_bundle: /* We found policies, but there's no bundles to instantiate: @@ -2046,7 +2044,7 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); dst_hold(&xdst->u.dst); - return &xdst->flo; + return xdst; inc_error: XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); @@ -2082,7 +2080,6 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, const struct sock *sk, int flags) { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; - struct flow_cache_object *flo; struct xfrm_dst *xdst; struct dst_entry *dst, *route; u16 family = dst_orig->ops->family; @@ -2137,14 +2134,13 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, !net->xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; - flo = xfrm_bundle_lookup(net, fl, family, dir, &xflo); - if (flo == NULL) + xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo); + if (xdst == NULL) goto nopol; - if (IS_ERR(flo)) { - err = PTR_ERR(flo); + if (IS_ERR(xdst)) { + err = PTR_ERR(xdst); goto dropdst; } - xdst = container_of(flo, struct xfrm_dst, flo); num_pols = xdst->num_pols; num_xfrms = xdst->num_xfrms; From 09c7570480f7544ffbf8e6db365208b0b0c154c6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Jul 2017 13:57:26 +0200 Subject: [PATCH 09/10] xfrm: remove flow cache After rcu conversions performance degradation in forward tests isn't that noticeable anymore. See next patch for some numbers. A followup patcg could then also remove genid from the policies as we do not cache bundles anymore. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/flow.h | 34 --- include/net/flowcache.h | 25 -- include/net/netns/xfrm.h | 11 - include/net/xfrm.h | 8 - net/core/Makefile | 1 - net/core/flow.c | 516 -------------------------------- net/ipv4/xfrm4_policy.c | 9 - net/ipv6/xfrm6_policy.c | 9 - net/key/af_key.c | 6 - net/xfrm/xfrm_device.c | 2 - net/xfrm/xfrm_policy.c | 108 +------ net/xfrm/xfrm_user.c | 3 - security/selinux/include/xfrm.h | 4 +- 13 files changed, 2 insertions(+), 734 deletions(-) delete mode 100644 include/net/flowcache.h delete mode 100644 net/core/flow.c diff --git a/include/net/flow.h b/include/net/flow.h index bae198b3039e..f3dc61b29bb5 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -218,40 +218,6 @@ static inline unsigned int flow_key_size(u16 family) return 0; } -#define FLOW_DIR_IN 0 -#define FLOW_DIR_OUT 1 -#define FLOW_DIR_FWD 2 - -struct net; -struct sock; -struct flow_cache_ops; - -struct flow_cache_object { - const struct flow_cache_ops *ops; -}; - -struct flow_cache_ops { - struct flow_cache_object *(*get)(struct flow_cache_object *); - int (*check)(struct flow_cache_object *); - void (*delete)(struct flow_cache_object *); -}; - -typedef struct flow_cache_object *(*flow_resolve_t)( - struct net *net, const struct flowi *key, u16 family, - u8 dir, struct flow_cache_object *oldobj, void *ctx); - -struct flow_cache_object *flow_cache_lookup(struct net *net, - const struct flowi *key, u16 family, - u8 dir, flow_resolve_t resolver, - void *ctx); -int flow_cache_init(struct net *net); -void flow_cache_fini(struct net *net); -void flow_cache_hp_init(void); - -void flow_cache_flush(struct net *net); -void flow_cache_flush_deferred(struct net *net); -extern atomic_t flow_cache_genid; - __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys); static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6) diff --git a/include/net/flowcache.h b/include/net/flowcache.h deleted file mode 100644 index 51eb971e8973..000000000000 --- a/include/net/flowcache.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _NET_FLOWCACHE_H -#define _NET_FLOWCACHE_H - -#include -#include -#include -#include - -struct flow_cache_percpu { - struct hlist_head *hash_table; - unsigned int hash_count; - u32 hash_rnd; - int hash_rnd_recalc; - struct tasklet_struct flush_tasklet; -}; - -struct flow_cache { - u32 hash_shift; - struct flow_cache_percpu __percpu *percpu; - struct hlist_node node; - unsigned int low_watermark; - unsigned int high_watermark; - struct timer_list rnd_timer; -}; -#endif /* _NET_FLOWCACHE_H */ diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 27bb9633c69d..611521646dd4 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -6,7 +6,6 @@ #include #include #include -#include struct ctl_table_header; @@ -73,16 +72,6 @@ struct netns_xfrm { spinlock_t xfrm_state_lock; spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; - - /* flow cache part */ - struct flow_cache flow_cache_global; - atomic_t flow_cache_genid; - struct list_head flow_cache_gc_list; - atomic_t flow_cache_gc_count; - spinlock_t flow_cache_gc_lock; - struct work_struct flow_cache_gc_work; - struct work_struct flow_cache_flush_work; - struct mutex flow_flush_sem; }; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c0916ab18d32..e0feba2ce76a 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -563,7 +563,6 @@ struct xfrm_policy { refcount_t refcnt; struct timer_list timer; - struct flow_cache_object flo; atomic_t genid; u32 priority; u32 index; @@ -978,7 +977,6 @@ struct xfrm_dst { struct rt6_info rt6; } u; struct dst_entry *route; - struct flow_cache_object flo; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int num_pols, num_xfrms; u32 xfrm_genid; @@ -1226,9 +1224,6 @@ static inline void xfrm_sk_free_policy(struct sock *sk) } } -void xfrm_garbage_collect(struct net *net); -void xfrm_garbage_collect_deferred(struct net *net); - #else static inline void xfrm_sk_free_policy(struct sock *sk) {} @@ -1263,9 +1258,6 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir, { return 1; } -static inline void xfrm_garbage_collect(struct net *net) -{ -} #endif static __inline__ diff --git a/net/core/Makefile b/net/core/Makefile index 79f9479e9658..d501c4278015 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -11,7 +11,6 @@ obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o -obj-$(CONFIG_XFRM) += flow.o obj-y += net-sysfs.o obj-$(CONFIG_PROC_FS) += net-procfs.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o diff --git a/net/core/flow.c b/net/core/flow.c deleted file mode 100644 index f7f5d1932a27..000000000000 --- a/net/core/flow.c +++ /dev/null @@ -1,516 +0,0 @@ -/* flow.c: Generic flow cache. - * - * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru) - * Copyright (C) 2003 David S. Miller (davem@redhat.com) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct flow_cache_entry { - union { - struct hlist_node hlist; - struct list_head gc_list; - } u; - struct net *net; - u16 family; - u8 dir; - u32 genid; - struct flowi key; - struct flow_cache_object *object; -}; - -struct flow_flush_info { - struct flow_cache *cache; - atomic_t cpuleft; - struct completion completion; -}; - -static struct kmem_cache *flow_cachep __read_mostly; - -#define flow_cache_hash_size(cache) (1U << (cache)->hash_shift) -#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) - -static void flow_cache_new_hashrnd(unsigned long arg) -{ - struct flow_cache *fc = (void *) arg; - int i; - - for_each_possible_cpu(i) - per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1; - - fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; - add_timer(&fc->rnd_timer); -} - -static int flow_entry_valid(struct flow_cache_entry *fle, - struct netns_xfrm *xfrm) -{ - if (atomic_read(&xfrm->flow_cache_genid) != fle->genid) - return 0; - if (fle->object && !fle->object->ops->check(fle->object)) - return 0; - return 1; -} - -static void flow_entry_kill(struct flow_cache_entry *fle, - struct netns_xfrm *xfrm) -{ - if (fle->object) - fle->object->ops->delete(fle->object); - kmem_cache_free(flow_cachep, fle); -} - -static void flow_cache_gc_task(struct work_struct *work) -{ - struct list_head gc_list; - struct flow_cache_entry *fce, *n; - struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, - flow_cache_gc_work); - - INIT_LIST_HEAD(&gc_list); - spin_lock_bh(&xfrm->flow_cache_gc_lock); - list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list); - spin_unlock_bh(&xfrm->flow_cache_gc_lock); - - list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) { - flow_entry_kill(fce, xfrm); - atomic_dec(&xfrm->flow_cache_gc_count); - } -} - -static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, - unsigned int deleted, - struct list_head *gc_list, - struct netns_xfrm *xfrm) -{ - if (deleted) { - atomic_add(deleted, &xfrm->flow_cache_gc_count); - fcp->hash_count -= deleted; - spin_lock_bh(&xfrm->flow_cache_gc_lock); - list_splice_tail(gc_list, &xfrm->flow_cache_gc_list); - spin_unlock_bh(&xfrm->flow_cache_gc_lock); - schedule_work(&xfrm->flow_cache_gc_work); - } -} - -static void __flow_cache_shrink(struct flow_cache *fc, - struct flow_cache_percpu *fcp, - unsigned int shrink_to) -{ - struct flow_cache_entry *fle; - struct hlist_node *tmp; - LIST_HEAD(gc_list); - unsigned int deleted = 0; - struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, - flow_cache_global); - unsigned int i; - - for (i = 0; i < flow_cache_hash_size(fc); i++) { - unsigned int saved = 0; - - hlist_for_each_entry_safe(fle, tmp, - &fcp->hash_table[i], u.hlist) { - if (saved < shrink_to && - flow_entry_valid(fle, xfrm)) { - saved++; - } else { - deleted++; - hlist_del(&fle->u.hlist); - list_add_tail(&fle->u.gc_list, &gc_list); - } - } - } - - flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm); -} - -static void flow_cache_shrink(struct flow_cache *fc, - struct flow_cache_percpu *fcp) -{ - unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc); - - __flow_cache_shrink(fc, fcp, shrink_to); -} - -static void flow_new_hash_rnd(struct flow_cache *fc, - struct flow_cache_percpu *fcp) -{ - get_random_bytes(&fcp->hash_rnd, sizeof(u32)); - fcp->hash_rnd_recalc = 0; - __flow_cache_shrink(fc, fcp, 0); -} - -static u32 flow_hash_code(struct flow_cache *fc, - struct flow_cache_percpu *fcp, - const struct flowi *key, - unsigned int keysize) -{ - const u32 *k = (const u32 *) key; - const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32); - - return jhash2(k, length, fcp->hash_rnd) - & (flow_cache_hash_size(fc) - 1); -} - -/* I hear what you're saying, use memcmp. But memcmp cannot make - * important assumptions that we can here, such as alignment. - */ -static int flow_key_compare(const struct flowi *key1, const struct flowi *key2, - unsigned int keysize) -{ - const flow_compare_t *k1, *k1_lim, *k2; - - k1 = (const flow_compare_t *) key1; - k1_lim = k1 + keysize; - - k2 = (const flow_compare_t *) key2; - - do { - if (*k1++ != *k2++) - return 1; - } while (k1 < k1_lim); - - return 0; -} - -struct flow_cache_object * -flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, - flow_resolve_t resolver, void *ctx) -{ - struct flow_cache *fc = &net->xfrm.flow_cache_global; - struct flow_cache_percpu *fcp; - struct flow_cache_entry *fle, *tfle; - struct flow_cache_object *flo; - unsigned int keysize; - unsigned int hash; - - local_bh_disable(); - fcp = this_cpu_ptr(fc->percpu); - - fle = NULL; - flo = NULL; - - keysize = flow_key_size(family); - if (!keysize) - goto nocache; - - /* Packet really early in init? Making flow_cache_init a - * pre-smp initcall would solve this. --RR */ - if (!fcp->hash_table) - goto nocache; - - if (fcp->hash_rnd_recalc) - flow_new_hash_rnd(fc, fcp); - - hash = flow_hash_code(fc, fcp, key, keysize); - hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) { - if (tfle->net == net && - tfle->family == family && - tfle->dir == dir && - flow_key_compare(key, &tfle->key, keysize) == 0) { - fle = tfle; - break; - } - } - - if (unlikely(!fle)) { - if (fcp->hash_count > fc->high_watermark) - flow_cache_shrink(fc, fcp); - - if (atomic_read(&net->xfrm.flow_cache_gc_count) > - 2 * num_online_cpus() * fc->high_watermark) { - flo = ERR_PTR(-ENOBUFS); - goto ret_object; - } - - fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); - if (fle) { - fle->net = net; - fle->family = family; - fle->dir = dir; - memcpy(&fle->key, key, keysize * sizeof(flow_compare_t)); - fle->object = NULL; - hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); - fcp->hash_count++; - } - } else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) { - flo = fle->object; - if (!flo) - goto ret_object; - flo = flo->ops->get(flo); - if (flo) - goto ret_object; - } else if (fle->object) { - flo = fle->object; - flo->ops->delete(flo); - fle->object = NULL; - } - -nocache: - flo = NULL; - if (fle) { - flo = fle->object; - fle->object = NULL; - } - flo = resolver(net, key, family, dir, flo, ctx); - if (fle) { - fle->genid = atomic_read(&net->xfrm.flow_cache_genid); - if (!IS_ERR(flo)) - fle->object = flo; - else - fle->genid--; - } else { - if (!IS_ERR_OR_NULL(flo)) - flo->ops->delete(flo); - } -ret_object: - local_bh_enable(); - return flo; -} -EXPORT_SYMBOL(flow_cache_lookup); - -static void flow_cache_flush_tasklet(unsigned long data) -{ - struct flow_flush_info *info = (void *)data; - struct flow_cache *fc = info->cache; - struct flow_cache_percpu *fcp; - struct flow_cache_entry *fle; - struct hlist_node *tmp; - LIST_HEAD(gc_list); - unsigned int deleted = 0; - struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, - flow_cache_global); - unsigned int i; - - fcp = this_cpu_ptr(fc->percpu); - for (i = 0; i < flow_cache_hash_size(fc); i++) { - hlist_for_each_entry_safe(fle, tmp, - &fcp->hash_table[i], u.hlist) { - if (flow_entry_valid(fle, xfrm)) - continue; - - deleted++; - hlist_del(&fle->u.hlist); - list_add_tail(&fle->u.gc_list, &gc_list); - } - } - - flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm); - - if (atomic_dec_and_test(&info->cpuleft)) - complete(&info->completion); -} - -/* - * Return whether a cpu needs flushing. Conservatively, we assume - * the presence of any entries means the core may require flushing, - * since the flow_cache_ops.check() function may assume it's running - * on the same core as the per-cpu cache component. - */ -static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu) -{ - struct flow_cache_percpu *fcp; - unsigned int i; - - fcp = per_cpu_ptr(fc->percpu, cpu); - for (i = 0; i < flow_cache_hash_size(fc); i++) - if (!hlist_empty(&fcp->hash_table[i])) - return 0; - return 1; -} - -static void flow_cache_flush_per_cpu(void *data) -{ - struct flow_flush_info *info = data; - struct tasklet_struct *tasklet; - - tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet; - tasklet->data = (unsigned long)info; - tasklet_schedule(tasklet); -} - -void flow_cache_flush(struct net *net) -{ - struct flow_flush_info info; - cpumask_var_t mask; - int i, self; - - /* Track which cpus need flushing to avoid disturbing all cores. */ - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) - return; - cpumask_clear(mask); - - /* Don't want cpus going down or up during this. */ - get_online_cpus(); - mutex_lock(&net->xfrm.flow_flush_sem); - info.cache = &net->xfrm.flow_cache_global; - for_each_online_cpu(i) - if (!flow_cache_percpu_empty(info.cache, i)) - cpumask_set_cpu(i, mask); - atomic_set(&info.cpuleft, cpumask_weight(mask)); - if (atomic_read(&info.cpuleft) == 0) - goto done; - - init_completion(&info.completion); - - local_bh_disable(); - self = cpumask_test_and_clear_cpu(smp_processor_id(), mask); - on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0); - if (self) - flow_cache_flush_tasklet((unsigned long)&info); - local_bh_enable(); - - wait_for_completion(&info.completion); - -done: - mutex_unlock(&net->xfrm.flow_flush_sem); - put_online_cpus(); - free_cpumask_var(mask); -} - -static void flow_cache_flush_task(struct work_struct *work) -{ - struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, - flow_cache_flush_work); - struct net *net = container_of(xfrm, struct net, xfrm); - - flow_cache_flush(net); -} - -void flow_cache_flush_deferred(struct net *net) -{ - schedule_work(&net->xfrm.flow_cache_flush_work); -} - -static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) -{ - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); - unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc); - - if (!fcp->hash_table) { - fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); - if (!fcp->hash_table) { - pr_err("NET: failed to allocate flow cache sz %u\n", sz); - return -ENOMEM; - } - fcp->hash_rnd_recalc = 1; - fcp->hash_count = 0; - tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0); - } - return 0; -} - -static int flow_cache_cpu_up_prep(unsigned int cpu, struct hlist_node *node) -{ - struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node); - - return flow_cache_cpu_prepare(fc, cpu); -} - -static int flow_cache_cpu_dead(unsigned int cpu, struct hlist_node *node) -{ - struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node); - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); - - __flow_cache_shrink(fc, fcp, 0); - return 0; -} - -int flow_cache_init(struct net *net) -{ - int i; - struct flow_cache *fc = &net->xfrm.flow_cache_global; - - if (!flow_cachep) - flow_cachep = kmem_cache_create("flow_cache", - sizeof(struct flow_cache_entry), - 0, SLAB_PANIC, NULL); - spin_lock_init(&net->xfrm.flow_cache_gc_lock); - INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list); - INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task); - INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task); - mutex_init(&net->xfrm.flow_flush_sem); - atomic_set(&net->xfrm.flow_cache_gc_count, 0); - - fc->hash_shift = 10; - fc->low_watermark = 2 * flow_cache_hash_size(fc); - fc->high_watermark = 4 * flow_cache_hash_size(fc); - - fc->percpu = alloc_percpu(struct flow_cache_percpu); - if (!fc->percpu) - return -ENOMEM; - - if (cpuhp_state_add_instance(CPUHP_NET_FLOW_PREPARE, &fc->node)) - goto err; - - setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, - (unsigned long) fc); - fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; - add_timer(&fc->rnd_timer); - - return 0; - -err: - for_each_possible_cpu(i) { - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); - kfree(fcp->hash_table); - fcp->hash_table = NULL; - } - - free_percpu(fc->percpu); - fc->percpu = NULL; - - return -ENOMEM; -} -EXPORT_SYMBOL(flow_cache_init); - -void flow_cache_fini(struct net *net) -{ - int i; - struct flow_cache *fc = &net->xfrm.flow_cache_global; - - del_timer_sync(&fc->rnd_timer); - - cpuhp_state_remove_instance_nocalls(CPUHP_NET_FLOW_PREPARE, &fc->node); - - for_each_possible_cpu(i) { - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); - kfree(fcp->hash_table); - fcp->hash_table = NULL; - } - - free_percpu(fc->percpu); - fc->percpu = NULL; -} -EXPORT_SYMBOL(flow_cache_fini); - -void __init flow_cache_hp_init(void) -{ - int ret; - - ret = cpuhp_setup_state_multi(CPUHP_NET_FLOW_PREPARE, - "net/flow:prepare", - flow_cache_cpu_up_prep, - flow_cache_cpu_dead); - WARN_ON(ret < 0); -} diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 19455a5fc328..4aefb149fe0a 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -213,14 +213,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) fl4->flowi4_tos = iph->tos; } -static inline int xfrm4_garbage_collect(struct dst_ops *ops) -{ - struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); - - xfrm_garbage_collect_deferred(net); - return (dst_entries_get_slow(ops) > ops->gc_thresh * 2); -} - static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu) { @@ -259,7 +251,6 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static struct dst_ops xfrm4_dst_ops_template = { .family = AF_INET, - .gc = xfrm4_garbage_collect, .update_pmtu = xfrm4_update_pmtu, .redirect = xfrm4_redirect, .cow_metrics = dst_cow_metrics_generic, diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ae30dc4973e8..f44b25a48478 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -214,14 +214,6 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) } } -static inline int xfrm6_garbage_collect(struct dst_ops *ops) -{ - struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); - - xfrm_garbage_collect_deferred(net); - return dst_entries_get_fast(ops) > ops->gc_thresh * 2; -} - static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu) { @@ -279,7 +271,6 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static struct dst_ops xfrm6_dst_ops_template = { .family = AF_INET6, - .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, .redirect = xfrm6_redirect, .cow_metrics = dst_cow_metrics_generic, diff --git a/net/key/af_key.c b/net/key/af_key.c index ca9d3ae665e7..10d7133e4fe9 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2398,8 +2398,6 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa out: xfrm_pol_put(xp); - if (err == 0) - xfrm_garbage_collect(net); return err; } @@ -2650,8 +2648,6 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ out: xfrm_pol_put(xp); - if (delete && err == 0) - xfrm_garbage_collect(net); return err; } @@ -2751,8 +2747,6 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad int err, err2; err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true); - if (!err) - xfrm_garbage_collect(net); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - old silent behavior */ diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 5f7e8bfa0c2d..1f9a079e08b0 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -175,8 +175,6 @@ static int xfrm_dev_down(struct net_device *dev) if (dev->features & NETIF_F_HW_ESP) xfrm_dev_state_flush(dev_net(dev), dev, true); - xfrm_garbage_collect(dev_net(dev)); - return NOTIFY_DONE; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 145d2395f3c0..0f1db4c18b22 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -246,36 +246,6 @@ static void xfrm_policy_timer(unsigned long data) xfrm_pol_put(xp); } -static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo) -{ - struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); - - if (unlikely(pol->walk.dead)) - flo = NULL; - else - xfrm_pol_hold(pol); - - return flo; -} - -static int xfrm_policy_flo_check(struct flow_cache_object *flo) -{ - struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); - - return !pol->walk.dead; -} - -static void xfrm_policy_flo_delete(struct flow_cache_object *flo) -{ - xfrm_pol_put(container_of(flo, struct xfrm_policy, flo)); -} - -static const struct flow_cache_ops xfrm_policy_fc_ops = { - .get = xfrm_policy_flo_get, - .check = xfrm_policy_flo_check, - .delete = xfrm_policy_flo_delete, -}; - /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 * SPD calls. */ @@ -298,7 +268,6 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) (unsigned long)policy); setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, (unsigned long)policy); - policy->flo.ops = &xfrm_policy_fc_ops; } return policy; } @@ -798,7 +767,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) else hlist_add_head(&policy->bydst, chain); __xfrm_policy_link(policy, dir); - atomic_inc(&net->xfrm.flow_cache_genid); /* After previous checking, family can either be AF_INET or AF_INET6 */ if (policy->family == AF_INET) @@ -1490,58 +1458,6 @@ static int xfrm_get_tos(const struct flowi *fl, int family) return tos; } -static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo) -{ - struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); - struct dst_entry *dst = &xdst->u.dst; - - if (xdst->route == NULL) { - /* Dummy bundle - if it has xfrms we were not - * able to build bundle as template resolution failed. - * It means we need to try again resolving. */ - if (xdst->num_xfrms > 0) - return NULL; - } else if (dst->flags & DST_XFRM_QUEUE) { - return NULL; - } else { - /* Real bundle */ - if (stale_bundle(dst)) - return NULL; - } - - dst_hold(dst); - return flo; -} - -static int xfrm_bundle_flo_check(struct flow_cache_object *flo) -{ - struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); - struct dst_entry *dst = &xdst->u.dst; - - if (!xdst->route) - return 0; - if (stale_bundle(dst)) - return 0; - - return 1; -} - -static void xfrm_bundle_flo_delete(struct flow_cache_object *flo) -{ - struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); - struct dst_entry *dst = &xdst->u.dst; - - /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ - dst->obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(dst); -} - -static const struct flow_cache_ops xfrm_bundle_fc_ops = { - .get = xfrm_bundle_flo_get, - .check = xfrm_bundle_flo_check, - .delete = xfrm_bundle_flo_delete, -}; - static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) { const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); @@ -1569,7 +1485,6 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) struct dst_entry *dst = &xdst->u.dst; memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); - xdst->flo.ops = &xfrm_bundle_fc_ops; } else xdst = ERR_PTR(-ENOBUFS); @@ -2521,11 +2436,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) * notice. That's what we are validating here via the * stale_bundle() check. * - * When an xdst is removed from flow cache, DST_OBSOLETE_DEAD will - * be marked on it. * When a dst is removed from the fib tree, DST_OBSOLETE_DEAD will * be marked on it. - * Both will force stable_bundle() to fail on any xdst bundle with + * This will force stale_bundle() to fail on any xdst bundle with * this dst linked in it. */ if (dst->obsolete < 0 && !stale_bundle(dst)) @@ -2565,18 +2478,6 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) return dst; } -void xfrm_garbage_collect(struct net *net) -{ - flow_cache_flush(net); -} -EXPORT_SYMBOL(xfrm_garbage_collect); - -void xfrm_garbage_collect_deferred(struct net *net) -{ - flow_cache_flush_deferred(net); -} -EXPORT_SYMBOL(xfrm_garbage_collect_deferred); - static void xfrm_init_pmtu(struct dst_entry *dst) { do { @@ -2914,14 +2815,9 @@ static int __net_init xfrm_net_init(struct net *net) rv = xfrm_sysctl_init(net); if (rv < 0) goto out_sysctl; - rv = flow_cache_init(net); - if (rv < 0) - goto out; return 0; -out: - xfrm_sysctl_fini(net); out_sysctl: xfrm_policy_fini(net); out_policy: @@ -2934,7 +2830,6 @@ static int __net_init xfrm_net_init(struct net *net) static void __net_exit xfrm_net_exit(struct net *net) { - flow_cache_fini(net); xfrm_sysctl_fini(net); xfrm_policy_fini(net); xfrm_state_fini(net); @@ -2948,7 +2843,6 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { void __init xfrm_init(void) { - flow_cache_hp_init(); register_pernet_subsys(&xfrm_net_ops); seqcount_init(&xfrm_policy_hash_generation); xfrm_input_init(); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2be4c6af008a..1b539b7dcfab 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1815,8 +1815,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, out: xfrm_pol_put(xp); - if (delete && err == 0) - xfrm_garbage_collect(net); return err; } @@ -2027,7 +2025,6 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; return err; } - xfrm_garbage_collect(net); c.data.type = type; c.event = nlh->nlmsg_type; diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index 1450f85b946d..36a7ce9e11ff 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -47,10 +47,8 @@ static inline void selinux_xfrm_notify_policyload(void) struct net *net; rtnl_lock(); - for_each_net(net) { - atomic_inc(&net->xfrm.flow_cache_genid); + for_each_net(net) rt_genid_bump_all(net); - } rtnl_unlock(); } #else From ec30d78c14a813db39a647b6a348b4286ba4abf5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Jul 2017 13:57:27 +0200 Subject: [PATCH 10/10] xfrm: add xdst pcpu cache retain last used xfrm_dst in a pcpu cache. On next request, reuse this dst if the policies are the same. The cache will not help with strict RR workloads as there is no hit. The cache packet-path part is reasonably small, the notifier part is needed so we do not add long hangs when a device is dismantled but some pcpu xdst still holds a reference, there are also calls to the flush operation when userspace deletes SAs so modules can be removed (there is no hit. We need to run the dst_release on the correct cpu to avoid races with packet path. This is done by adding a work_struct for each cpu and then doing the actual test/release on each affected cpu via schedule_work_on(). Test results using 4 network namespaces and null encryption: ns1 ns2 -> ns3 -> ns4 netperf -> xfrm/null enc -> xfrm/null dec -> netserver what TCP_STREAM UDP_STREAM UDP_RR Flow cache: 14644.61 294.35 327231.64 No flow cache: 14349.81 242.64 202301.72 Pcpu cache: 14629.70 292.21 205595.22 UDP tests used 64byte packets, tests ran for one minute each, value is average over ten iterations. 'Flow cache' is 'net-next', 'No flow cache' is net-next plus this series but without this patch. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 + net/xfrm/xfrm_device.c | 2 + net/xfrm/xfrm_policy.c | 127 ++++++++++++++++++++++++++++++++++++++++- net/xfrm/xfrm_state.c | 5 +- 4 files changed, 132 insertions(+), 3 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e0feba2ce76a..afb4929d7232 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -317,6 +317,7 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo); void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c); +void xfrm_policy_cache_flush(void); void km_state_notify(struct xfrm_state *x, const struct km_event *c); struct xfrm_tmpl; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 1f9a079e08b0..5cd7a244e88d 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -153,6 +153,7 @@ static int xfrm_dev_register(struct net_device *dev) static int xfrm_dev_unregister(struct net_device *dev) { + xfrm_policy_cache_flush(); return NOTIFY_DONE; } @@ -175,6 +176,7 @@ static int xfrm_dev_down(struct net_device *dev) if (dev->features & NETIF_F_HW_ESP) xfrm_dev_state_flush(dev_net(dev), dev, true); + xfrm_policy_cache_flush(); return NOTIFY_DONE; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 0f1db4c18b22..06c3bf7ab86b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -44,6 +45,8 @@ struct xfrm_flo { u8 flags; }; +static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst); +static struct work_struct *xfrm_pcpu_work __read_mostly; static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; @@ -972,6 +975,8 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) } if (!cnt) err = -ESRCH; + else + xfrm_policy_cache_flush(); out: spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; @@ -1700,6 +1705,102 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, } +static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old) +{ + this_cpu_write(xfrm_last_dst, xdst); + if (old) + dst_release(&old->u.dst); +} + +static void __xfrm_pcpu_work_fn(void) +{ + struct xfrm_dst *old; + + old = this_cpu_read(xfrm_last_dst); + if (old && !xfrm_bundle_ok(old)) + xfrm_last_dst_update(NULL, old); +} + +static void xfrm_pcpu_work_fn(struct work_struct *work) +{ + local_bh_disable(); + rcu_read_lock(); + __xfrm_pcpu_work_fn(); + rcu_read_unlock(); + local_bh_enable(); +} + +void xfrm_policy_cache_flush(void) +{ + struct xfrm_dst *old; + bool found = 0; + int cpu; + + local_bh_disable(); + rcu_read_lock(); + for_each_possible_cpu(cpu) { + old = per_cpu(xfrm_last_dst, cpu); + if (old && !xfrm_bundle_ok(old)) { + if (smp_processor_id() == cpu) { + __xfrm_pcpu_work_fn(); + continue; + } + found = true; + break; + } + } + + rcu_read_unlock(); + local_bh_enable(); + + if (!found) + return; + + get_online_cpus(); + + for_each_possible_cpu(cpu) { + bool bundle_release; + + rcu_read_lock(); + old = per_cpu(xfrm_last_dst, cpu); + bundle_release = old && !xfrm_bundle_ok(old); + rcu_read_unlock(); + + if (!bundle_release) + continue; + + if (cpu_online(cpu)) { + schedule_work_on(cpu, &xfrm_pcpu_work[cpu]); + continue; + } + + rcu_read_lock(); + old = per_cpu(xfrm_last_dst, cpu); + if (old && !xfrm_bundle_ok(old)) { + per_cpu(xfrm_last_dst, cpu) = NULL; + dst_release(&old->u.dst); + } + rcu_read_unlock(); + } + + put_online_cpus(); +} + +static bool xfrm_pol_dead(struct xfrm_dst *xdst) +{ + unsigned int num_pols = xdst->num_pols; + unsigned int pol_dead = 0, i; + + for (i = 0; i < num_pols; i++) + pol_dead |= xdst->pols[i]->walk.dead; + + /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ + if (pol_dead) + xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; + + return pol_dead; +} + static struct xfrm_dst * xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, const struct flowi *fl, u16 family, @@ -1707,10 +1808,22 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, { struct net *net = xp_net(pols[0]); struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; + struct xfrm_dst *xdst, *old; struct dst_entry *dst; - struct xfrm_dst *xdst; int err; + xdst = this_cpu_read(xfrm_last_dst); + if (xdst && + xdst->u.dst.dev == dst_orig->dev && + xdst->num_pols == num_pols && + !xfrm_pol_dead(xdst) && + memcmp(xdst->pols, pols, + sizeof(struct xfrm_policy *) * num_pols) == 0) { + dst_hold(&xdst->u.dst); + return xdst; + } + + old = xdst; /* Try to instantiate a bundle */ err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); if (err <= 0) { @@ -1731,6 +1844,9 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); xdst->policy_genid = atomic_read(&pols[0]->genid); + atomic_set(&xdst->u.dst.__refcnt, 2); + xfrm_last_dst_update(xdst, old); + return xdst; } @@ -2843,6 +2959,15 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { void __init xfrm_init(void) { + int i; + + xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work), + GFP_KERNEL); + BUG_ON(!xfrm_pcpu_work); + + for (i = 0; i < NR_CPUS; i++) + INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn); + register_pernet_subsys(&xfrm_net_ops); seqcount_init(&xfrm_policy_hash_generation); xfrm_input_init(); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 6c0956d10db6..82cbbce69b79 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -724,9 +724,10 @@ int xfrm_state_flush(struct net *net, u8 proto, bool task_valid) } } } - if (cnt) + if (cnt) { err = 0; - + xfrm_policy_cache_flush(); + } out: spin_unlock_bh(&net->xfrm.xfrm_state_lock); return err;