netfilter: flowtable: consolidate xmit path

Use dev_queue_xmit() for the XMIT_NEIGH case. Store the interface index
of the real device behind the vlan/pppoe device, this introduces  an
extra lookup for the real device in the xmit path because rt->dst.dev
provides the vlan/pppoe device.

XMIT_NEIGH now looks more similar to XMIT_DIRECT but the check for stale
dst and the neighbour lookup still remain in place which is convenient
to deal with network topology changes.

Note that nft_flow_route() needs to relax the check for _XMIT_NEIGH so
the existing basic xfrm offload (which only works in one direction) does
not break.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
This commit is contained in:
Pablo Neira Ayuso 2025-10-10 12:32:35 +02:00
parent 93d7a7ed07
commit b5964aac51
4 changed files with 57 additions and 39 deletions

View File

@ -141,6 +141,7 @@ struct flow_offload_tuple {
union { union {
struct { struct {
struct dst_entry *dst_cache; struct dst_entry *dst_cache;
u32 ifidx;
u32 dst_cookie; u32 dst_cookie;
}; };
struct { struct {

View File

@ -132,6 +132,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
break; break;
case FLOW_OFFLOAD_XMIT_XFRM: case FLOW_OFFLOAD_XMIT_XFRM:
case FLOW_OFFLOAD_XMIT_NEIGH: case FLOW_OFFLOAD_XMIT_NEIGH:
flow_tuple->ifidx = route->tuple[dir].out.ifindex;
flow_tuple->dst_cache = dst; flow_tuple->dst_cache = dst;
flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple); flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
break; break;

View File

@ -333,19 +333,18 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
} }
} }
static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, struct nf_flow_xmit {
const struct flow_offload_tuple_rhash *tuplehash, const void *dest;
unsigned short type) const void *source;
{
struct net_device *outdev; struct net_device *outdev;
};
outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx); static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
if (!outdev) struct nf_flow_xmit *xmit)
return NF_DROP; {
skb->dev = xmit->outdev;
skb->dev = outdev; dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest, xmit->dest, xmit->source, skb->len);
tuplehash->tuple.out.h_source, skb->len);
dev_queue_xmit(skb); dev_queue_xmit(skb);
return NF_STOLEN; return NF_STOLEN;
@ -424,10 +423,10 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
struct nf_flowtable_ctx ctx = { struct nf_flowtable_ctx ctx = {
.in = state->in, .in = state->in,
}; };
struct nf_flow_xmit xmit = {};
struct flow_offload *flow; struct flow_offload *flow;
struct net_device *outdev; struct neighbour *neigh;
struct rtable *rt; struct rtable *rt;
__be32 nexthop;
int ret; int ret;
tuplehash = nf_flow_offload_lookup(&ctx, flow_table, skb); tuplehash = nf_flow_offload_lookup(&ctx, flow_table, skb);
@ -454,25 +453,34 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
switch (tuplehash->tuple.xmit_type) { switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH: case FLOW_OFFLOAD_XMIT_NEIGH:
rt = dst_rtable(tuplehash->tuple.dst_cache); rt = dst_rtable(tuplehash->tuple.dst_cache);
outdev = rt->dst.dev; xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.ifidx);
skb->dev = outdev; if (!xmit.outdev) {
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); flow_offload_teardown(flow);
return NF_DROP;
}
neigh = ip_neigh_gw4(rt->dst.dev, rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr));
if (IS_ERR(neigh)) {
flow_offload_teardown(flow);
return NF_DROP;
}
xmit.dest = neigh->ha;
skb_dst_set_noref(skb, &rt->dst); skb_dst_set_noref(skb, &rt->dst);
neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
ret = NF_STOLEN;
break; break;
case FLOW_OFFLOAD_XMIT_DIRECT: case FLOW_OFFLOAD_XMIT_DIRECT:
ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP); xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.out.ifidx);
if (ret == NF_DROP) if (!xmit.outdev) {
flow_offload_teardown(flow); flow_offload_teardown(flow);
return NF_DROP;
}
xmit.dest = tuplehash->tuple.out.h_dest;
xmit.source = tuplehash->tuple.out.h_source;
break; break;
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
ret = NF_DROP; return NF_DROP;
break;
} }
return ret; return nf_flow_queue_xmit(state->net, skb, &xmit);
} }
EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
@ -719,9 +727,9 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
struct nf_flowtable_ctx ctx = { struct nf_flowtable_ctx ctx = {
.in = state->in, .in = state->in,
}; };
const struct in6_addr *nexthop; struct nf_flow_xmit xmit = {};
struct flow_offload *flow; struct flow_offload *flow;
struct net_device *outdev; struct neighbour *neigh;
struct rt6_info *rt; struct rt6_info *rt;
int ret; int ret;
@ -749,24 +757,33 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
switch (tuplehash->tuple.xmit_type) { switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH: case FLOW_OFFLOAD_XMIT_NEIGH:
rt = dst_rt6_info(tuplehash->tuple.dst_cache); rt = dst_rt6_info(tuplehash->tuple.dst_cache);
outdev = rt->dst.dev; xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.ifidx);
skb->dev = outdev; if (!xmit.outdev) {
nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); flow_offload_teardown(flow);
return NF_DROP;
}
neigh = ip_neigh_gw6(rt->dst.dev, rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6));
if (IS_ERR(neigh)) {
flow_offload_teardown(flow);
return NF_DROP;
}
xmit.dest = neigh->ha;
skb_dst_set_noref(skb, &rt->dst); skb_dst_set_noref(skb, &rt->dst);
neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
ret = NF_STOLEN;
break; break;
case FLOW_OFFLOAD_XMIT_DIRECT: case FLOW_OFFLOAD_XMIT_DIRECT:
ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6); xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.out.ifidx);
if (ret == NF_DROP) if (!xmit.outdev) {
flow_offload_teardown(flow); flow_offload_teardown(flow);
return NF_DROP;
}
xmit.dest = tuplehash->tuple.out.h_dest;
xmit.source = tuplehash->tuple.out.h_source;
break; break;
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
ret = NF_DROP; return NF_DROP;
break;
} }
return ret; return nf_flow_queue_xmit(state->net, skb, &xmit);
} }
EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook); EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);

View File

@ -211,11 +211,11 @@ static void nft_dev_forward_path(struct nf_flow_route *route,
} }
route->tuple[!dir].in.num_encaps = info.num_encaps; route->tuple[!dir].in.num_encaps = info.num_encaps;
route->tuple[!dir].in.ingress_vlans = info.ingress_vlans; route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
route->tuple[dir].out.ifindex = info.outdev->ifindex;
if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) { if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN); memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN); memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
route->tuple[dir].out.ifindex = info.outdev->ifindex;
route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex; route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
route->tuple[dir].xmit_type = info.xmit_type; route->tuple[dir].xmit_type = info.xmit_type;
} }
@ -263,11 +263,10 @@ int nft_flow_route(const struct nft_pktinfo *pkt, const struct nf_conn *ct,
nft_default_forward_path(route, this_dst, dir); nft_default_forward_path(route, this_dst, dir);
nft_default_forward_path(route, other_dst, !dir); nft_default_forward_path(route, other_dst, !dir);
if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH && if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
nft_dev_forward_path(route, ct, dir, ft); nft_dev_forward_path(route, ct, dir, ft);
if (route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
nft_dev_forward_path(route, ct, !dir, ft); nft_dev_forward_path(route, ct, !dir, ft);
}
return 0; return 0;
} }