mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
net/sched: refine indirect call mitigation in tc_wrapper.h
Some modern cpus disable X86_FEATURE_RETPOLINE feature, even if a direct call can still be beneficial. Even when IBRS is present, an indirect call is more expensive than a direct one: Direct Calls: Compilers can perform powerful optimizations like inlining, where the function body is directly inserted at the call site, eliminating call overhead entirely. Indirect Calls: Inlining is much harder, if not impossible, because the compiler doesn't know the target function at compile time. Techniques like Indirect Call Promotion can help by using profile-guided optimization to turn frequently taken indirect calls into conditional direct calls, but they still add complexity and potential overhead compared to a truly direct call. In this patch, I split tc_skip_wrapper in two different static keys, one for tc_act() (tc_skip_wrapper_act) and one for tc_classify() (tc_skip_wrapper_cls). Then I enable the tc_skip_wrapper_cls only if the count of builtin classifiers is above one. I enable tc_skip_wrapper_act only it the count of builtin actions is above one. In our production kernels, we only have CONFIG_NET_CLS_BPF=y and CONFIG_NET_ACT_BPF=y. Other are modules or are not compiled. Tested on AMD Turin cpus, cls_bpf_classify() cost went from 1% down to 0.18 %, and FDO will be able to inline it in tcf_classify() for further gains. Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Reviewed-by: Pedro Tammela <pctammela@mojatatu.com> Reviewed-by: Victor Nogueira <victor@mojatatu.com> Link: https://patch.msgid.link/20260307133601.3863071-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
e8eb33d650
commit
f2db7b80b0
|
|
@ -12,7 +12,8 @@
|
||||||
|
|
||||||
#define TC_INDIRECT_SCOPE
|
#define TC_INDIRECT_SCOPE
|
||||||
|
|
||||||
extern struct static_key_false tc_skip_wrapper;
|
extern struct static_key_false tc_skip_wrapper_act;
|
||||||
|
extern struct static_key_false tc_skip_wrapper_cls;
|
||||||
|
|
||||||
/* TC Actions */
|
/* TC Actions */
|
||||||
#ifdef CONFIG_NET_CLS_ACT
|
#ifdef CONFIG_NET_CLS_ACT
|
||||||
|
|
@ -46,7 +47,7 @@ TC_INDIRECT_ACTION_DECLARE(tunnel_key_act);
|
||||||
static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
|
static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
|
||||||
struct tcf_result *res)
|
struct tcf_result *res)
|
||||||
{
|
{
|
||||||
if (static_branch_likely(&tc_skip_wrapper))
|
if (static_branch_likely(&tc_skip_wrapper_act))
|
||||||
goto skip;
|
goto skip;
|
||||||
|
|
||||||
#if IS_BUILTIN(CONFIG_NET_ACT_GACT)
|
#if IS_BUILTIN(CONFIG_NET_ACT_GACT)
|
||||||
|
|
@ -153,7 +154,7 @@ TC_INDIRECT_FILTER_DECLARE(u32_classify);
|
||||||
static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
|
static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
|
||||||
struct tcf_result *res)
|
struct tcf_result *res)
|
||||||
{
|
{
|
||||||
if (static_branch_likely(&tc_skip_wrapper))
|
if (static_branch_likely(&tc_skip_wrapper_cls))
|
||||||
goto skip;
|
goto skip;
|
||||||
|
|
||||||
#if IS_BUILTIN(CONFIG_NET_CLS_BPF)
|
#if IS_BUILTIN(CONFIG_NET_CLS_BPF)
|
||||||
|
|
@ -202,8 +203,44 @@ static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
|
||||||
static inline void tc_wrapper_init(void)
|
static inline void tc_wrapper_init(void)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_X86
|
#ifdef CONFIG_X86
|
||||||
if (!cpu_feature_enabled(X86_FEATURE_RETPOLINE))
|
int cnt_cls = IS_BUILTIN(CONFIG_NET_CLS_BPF) +
|
||||||
static_branch_enable(&tc_skip_wrapper);
|
IS_BUILTIN(CONFIG_NET_CLS_U32) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_CLS_FLOWER) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_CLS_FW) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_CLS_MATCHALL) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_CLS_BASIC) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_CLS_CGROUP) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_CLS_FLOW) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_CLS_ROUTE4);
|
||||||
|
|
||||||
|
int cnt_act = IS_BUILTIN(CONFIG_NET_ACT_GACT) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_ACT_MIRRED) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_ACT_PEDIT) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_ACT_SKBEDIT) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_ACT_SKBMOD) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_ACT_POLICE) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_ACT_BPF) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_ACT_CONNMARK) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_ACT_CSUM) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_ACT_CT) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_ACT_CTINFO) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_ACT_GATE) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_ACT_MPLS) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_ACT_NAT) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_ACT_TUNNEL_KEY) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_ACT_VLAN) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_ACT_IFE) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_ACT_SIMP) +
|
||||||
|
IS_BUILTIN(CONFIG_NET_ACT_SAMPLE);
|
||||||
|
|
||||||
|
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (cnt_cls > 1)
|
||||||
|
static_branch_enable(&tc_skip_wrapper_cls);
|
||||||
|
|
||||||
|
if (cnt_act > 1)
|
||||||
|
static_branch_enable(&tc_skip_wrapper_act);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2479,7 +2479,8 @@ static struct pernet_operations psched_net_ops = {
|
||||||
};
|
};
|
||||||
|
|
||||||
#if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)
|
#if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)
|
||||||
DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
|
DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper_act);
|
||||||
|
DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper_cls);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static const struct rtnl_msg_handler psched_rtnl_msg_handlers[] __initconst = {
|
static const struct rtnl_msg_handler psched_rtnl_msg_handlers[] __initconst = {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user