From c951a29f6ba52b86223eb00bbcff43142d59a901 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 11 Sep 2024 12:37:43 +0300 Subject: [PATCH 1/6] net: fib_rules: Add DSCP selector attribute The FIB rule TOS selector is implemented differently between IPv4 and IPv6. In IPv4 it is used to match on the three "Type of Services" bits specified in RFC 791, while in IPv6 is it is used to match on the six DSCP bits specified in RFC 2474. Add a new FIB rule attribute to allow matching on DSCP. The attribute will be used to implement a 'dscp' selector in ip-rule with a consistent behavior between IPv4 and IPv6. For now, set the type of the attribute to 'NLA_REJECT' so that user space will not be able to configure it. This restriction will be lifted once both IPv4 and IPv6 support the new attribute. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Reviewed-by: David Ahern Link: https://patch.msgid.link/20240911093748.3662015-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/fib_rules.h | 1 + net/core/fib_rules.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 232df14e1287..a6924dd3aff1 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -67,6 +67,7 @@ enum { FRA_IP_PROTO, /* ip proto */ FRA_SPORT_RANGE, /* sport */ FRA_DPORT_RANGE, /* dport */ + FRA_DSCP, /* dscp */ __FRA_MAX }; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 5a4eb744758c..df41c05f7234 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -766,7 +766,8 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = { [FRA_PROTOCOL] = { .type = NLA_U8 }, [FRA_IP_PROTO] = { .type = NLA_U8 }, [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, - [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) } + [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, + [FRA_DSCP] = { .type = NLA_REJECT }, }; int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, From b9455fef8b1fc662369d982fe97dc66e6c332699 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 11 Sep 2024 12:37:44 +0300 Subject: [PATCH 2/6] ipv4: fib_rules: Add DSCP selector support Implement support for the new DSCP selector that allows IPv4 FIB rules to match on the entire DSCP field, unlike the existing TOS selector that only matches on the three lower DSCP bits. Differentiate between both selectors by adding a new bit in the IPv4 FIB rule structure (in an existing one byte hole) that is only set when the 'FRA_DSCP' attribute is specified by user space. Reject rules that use both selectors. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Reviewed-by: David Ahern Link: https://patch.msgid.link/20240911093748.3662015-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/fib_rules.c | 54 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index c26776b71e97..b07292d50ee7 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -37,6 +37,7 @@ struct fib4_rule { u8 dst_len; u8 src_len; dscp_t dscp; + u8 dscp_full:1; /* DSCP or TOS selector */ __be32 src; __be32 srcmask; __be32 dst; @@ -186,7 +187,15 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule, ((daddr ^ r->dst) & r->dstmask)) return 0; - if (r->dscp && !fib_dscp_masked_match(r->dscp, fl4)) + /* When DSCP selector is used we need to match on the entire DSCP field + * in the flow information structure. When TOS selector is used we need + * to mask the upper three DSCP bits prior to matching to maintain + * legacy behavior. + */ + if (r->dscp_full && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos)) + return 0; + else if (!r->dscp_full && r->dscp && + !fib_dscp_masked_match(r->dscp, fl4)) return 0; if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto)) @@ -217,6 +226,20 @@ static struct fib_table *fib_empty_table(struct net *net) return NULL; } +static int fib4_nl2rule_dscp(const struct nlattr *nla, struct fib4_rule *rule4, + struct netlink_ext_ack *extack) +{ + if (rule4->dscp) { + NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP"); + return -EINVAL; + } + + rule4->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2); + rule4->dscp_full = true; + + return 0; +} + static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh, struct nlattr **tb, @@ -238,6 +261,10 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, } rule4->dscp = inet_dsfield_to_dscp(frh->tos); + if (tb[FRA_DSCP] && + fib4_nl2rule_dscp(tb[FRA_DSCP], rule4, extack) < 0) + goto errout; + /* split local/main if they are not already split */ err = fib_unmerge(net); if (err) @@ -320,9 +347,19 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, if (frh->dst_len && (rule4->dst_len != frh->dst_len)) return 0; - if (frh->tos && inet_dscp_to_dsfield(rule4->dscp) != frh->tos) + if (frh->tos && + (rule4->dscp_full || + inet_dscp_to_dsfield(rule4->dscp) != frh->tos)) return 0; + if (tb[FRA_DSCP]) { + dscp_t dscp; + + dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2); + if (!rule4->dscp_full || rule4->dscp != dscp) + return 0; + } + #ifdef CONFIG_IP_ROUTE_CLASSID if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) return 0; @@ -344,7 +381,15 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, frh->dst_len = rule4->dst_len; frh->src_len = rule4->src_len; - frh->tos = inet_dscp_to_dsfield(rule4->dscp); + + if (rule4->dscp_full) { + frh->tos = 0; + if (nla_put_u8(skb, FRA_DSCP, + inet_dscp_to_dsfield(rule4->dscp) >> 2)) + goto nla_put_failure; + } else { + frh->tos = inet_dscp_to_dsfield(rule4->dscp); + } if ((rule4->dst_len && nla_put_in_addr(skb, FRA_DST, rule4->dst)) || @@ -366,7 +411,8 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) { return nla_total_size(4) /* dst */ + nla_total_size(4) /* src */ - + nla_total_size(4); /* flow */ + + nla_total_size(4) /* flow */ + + nla_total_size(1); /* dscp */ } static void fib4_rule_flush_cache(struct fib_rules_ops *ops) From 2cf630034e4ebcc52e0b69b776cafd90dc4f3919 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 11 Sep 2024 12:37:45 +0300 Subject: [PATCH 3/6] ipv6: fib_rules: Add DSCP selector support Implement support for the new DSCP selector that allows IPv6 FIB rules to match on the entire DSCP field. This is done despite the fact that the above can be achieved using the existing TOS selector, so that user space program will be able to work with IPv4 and IPv6 rules in the same way. Differentiate between both selectors by adding a new bit in the IPv6 FIB rule structure that is only set when the 'FRA_DSCP' attribute is specified by user space. Reject rules that use both selectors. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Reviewed-by: David Ahern Link: https://patch.msgid.link/20240911093748.3662015-4-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv6/fib6_rules.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 9e254de7462f..04a9ed5e8310 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -27,6 +27,7 @@ struct fib6_rule { struct rt6key src; struct rt6key dst; dscp_t dscp; + u8 dscp_full:1; /* DSCP or TOS selector */ }; static bool fib6_rule_matchall(const struct fib_rule *rule) @@ -345,6 +346,20 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule, return 1; } +static int fib6_nl2rule_dscp(const struct nlattr *nla, struct fib6_rule *rule6, + struct netlink_ext_ack *extack) +{ + if (rule6->dscp) { + NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP"); + return -EINVAL; + } + + rule6->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2); + rule6->dscp_full = true; + + return 0; +} + static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh, struct nlattr **tb, @@ -361,6 +376,9 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, } rule6->dscp = inet_dsfield_to_dscp(frh->tos); + if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0) + goto errout; + if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) { if (rule->table == RT6_TABLE_UNSPEC) { NL_SET_ERR_MSG(extack, "Invalid table"); @@ -413,9 +431,19 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, if (frh->dst_len && (rule6->dst.plen != frh->dst_len)) return 0; - if (frh->tos && inet_dscp_to_dsfield(rule6->dscp) != frh->tos) + if (frh->tos && + (rule6->dscp_full || + inet_dscp_to_dsfield(rule6->dscp) != frh->tos)) return 0; + if (tb[FRA_DSCP]) { + dscp_t dscp; + + dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2); + if (!rule6->dscp_full || rule6->dscp != dscp) + return 0; + } + if (frh->src_len && nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr))) return 0; @@ -434,7 +462,15 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, frh->dst_len = rule6->dst.plen; frh->src_len = rule6->src.plen; - frh->tos = inet_dscp_to_dsfield(rule6->dscp); + + if (rule6->dscp_full) { + frh->tos = 0; + if (nla_put_u8(skb, FRA_DSCP, + inet_dscp_to_dsfield(rule6->dscp) >> 2)) + goto nla_put_failure; + } else { + frh->tos = inet_dscp_to_dsfield(rule6->dscp); + } if ((rule6->dst.plen && nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) || @@ -450,7 +486,8 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) { return nla_total_size(16) /* dst */ - + nla_total_size(16); /* src */ + + nla_total_size(16) /* src */ + + nla_total_size(1); /* dscp */ } static void fib6_rule_flush_cache(struct fib_rules_ops *ops) From 4b041d286e918340a21d778e83c00649da2b58cb Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 11 Sep 2024 12:37:46 +0300 Subject: [PATCH 4/6] net: fib_rules: Enable DSCP selector usage Now that both IPv4 and IPv6 support the new DSCP selector, enable user space to configure FIB rules that make use of it by changing the policy of the new DSCP attribute so that it accepts values in the range of [0, 63]. Use NLA_U8 rather than NLA_UINT as the field is of fixed size. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Reviewed-by: David Ahern Link: https://patch.msgid.link/20240911093748.3662015-5-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/core/fib_rules.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index df41c05f7234..154a2681f55c 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -767,7 +768,7 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = { [FRA_IP_PROTO] = { .type = NLA_U8 }, [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, - [FRA_DSCP] = { .type = NLA_REJECT }, + [FRA_DSCP] = NLA_POLICY_MAX(NLA_U8, INET_DSCP_MASK >> 2), }; int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, From ac6ad3f3b5b11731312d9a4d3cfda74e22421b14 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 11 Sep 2024 12:37:47 +0300 Subject: [PATCH 5/6] selftests: fib_rule_tests: Add DSCP selector match tests Add tests for the new FIB rule DSCP selector. Test with both IPv4 and IPv6 and with both input and output routes. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Reviewed-by: David Ahern Link: https://patch.msgid.link/20240911093748.3662015-6-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_rule_tests.sh | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 53c5c1ad437e..21d11d23fab7 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -274,6 +274,23 @@ fib_rule6_test() "$getnomatch" "ipproto ipv6-icmp match" \ "ipproto ipv6-tcp no match" fi + + fib_check_iproute_support "dscp" "tos" + if [ $? -eq 0 ]; then + match="dscp 0x3f" + getmatch="tos 0xfc" + getnomatch="tos 0xf4" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "dscp redirect to table" \ + "dscp no redirect to table" + + match="dscp 0x3f" + getmatch="from $SRC_IP6 iif $DEV tos 0xfc" + getnomatch="from $SRC_IP6 iif $DEV tos 0xf4" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif dscp redirect to table" \ + "iif dscp no redirect to table" + fi } fib_rule6_vrf_test() @@ -468,6 +485,23 @@ fib_rule4_test() "$getnomatch" "ipproto icmp match" \ "ipproto tcp no match" fi + + fib_check_iproute_support "dscp" "tos" + if [ $? -eq 0 ]; then + match="dscp 0x3f" + getmatch="tos 0xfc" + getnomatch="tos 0xf4" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "dscp redirect to table" \ + "dscp no redirect to table" + + match="dscp 0x3f" + getmatch="from $SRC_IP iif $DEV tos 0xfc" + getnomatch="from $SRC_IP iif $DEV tos 0xf4" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif dscp redirect to table" \ + "iif dscp no redirect to table" + fi } fib_rule4_vrf_test() From 2bf1259a6ea1104c55d1a5318eec1ef29e85cf76 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 11 Sep 2024 12:37:48 +0300 Subject: [PATCH 6/6] selftests: fib_rule_tests: Add DSCP selector connect tests Test that locally generated traffic from a socket that specifies a DS Field using the IP_TOS / IPV6_TCLASS socket options is correctly redirected using a FIB rule that matches on DSCP. Add negative tests to verify that the rule is not it when it should not. Test with both IPv4 and IPv6 and with both TCP and UDP sockets. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Reviewed-by: David Ahern Link: https://patch.msgid.link/20240911093748.3662015-7-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_rule_tests.sh | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 21d11d23fab7..1d58b3b87465 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -336,6 +336,34 @@ fib_rule6_connect_test() log_test $? 1 "rule6 dsfield tcp no connect (dsfield 0x20)" $IP -6 rule del dsfield 0x04 table $RTABLE_PEER + + ip rule help 2>&1 | grep -q dscp + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 iprule too old, missing dscp match" + cleanup_peer + return + fi + + $IP -6 rule add dscp 0x3f table $RTABLE_PEER + + nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xfc \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dscp udp connect" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xfc \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dscp tcp connect" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xf4 \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 1 "rule6 dscp udp no connect" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xf4 \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 1 "rule6 dscp tcp no connect" + + $IP -6 rule del dscp 0x3f table $RTABLE_PEER + cleanup_peer } @@ -547,6 +575,34 @@ fib_rule4_connect_test() log_test $? 1 "rule4 dsfield tcp no connect (dsfield 0x20)" $IP -4 rule del dsfield 0x04 table $RTABLE_PEER + + ip rule help 2>&1 | grep -q dscp + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 iprule too old, missing dscp match" + cleanup_peer + return + fi + + $IP -4 rule add dscp 0x3f table $RTABLE_PEER + + nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xfc \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dscp udp connect" + + nettest -q -B -t 5 -N $testns -O $peerns -Q 0xfc \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dscp tcp connect" + + nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xf4 \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 1 "rule4 dscp udp no connect" + + nettest -q -B -t 5 -N $testns -O $peerns -Q 0xf4 \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 1 "rule4 dscp tcp no connect" + + $IP -4 rule del dscp 0x3f table $RTABLE_PEER + cleanup_peer } ################################################################################