net: pull headers in qdisc_pkt_len_segs_init()

Most ndo_start_xmit() methods expects headers of gso packets
to be already in skb->head.

net/core/tso.c users are particularly at risk, because tso_build_hdr()
does a memcpy(hdr, skb->data, hdr_len);

qdisc_pkt_len_segs_init() already does a dissection of gso packets.

Use pskb_may_pull() instead of skb_header_pointer() to make
sure drivers do not have to reimplement this.

Some malicious packets could be fed, detect them so that we can
drop them sooner with a new SKB_DROP_REASON_SKB_BAD_GSO drop_reason.

Fixes: e876f208af ("net: Add a software TSO helper API")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Joe Damato <joe@dama.to>
Link: https://patch.msgid.link/20260403221540.3297753-3-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Eric Dumazet 2026-04-03 22:15:40 +00:00 committed by Jakub Kicinski
parent 30e02ec3b4
commit 7fb4c19670
2 changed files with 34 additions and 20 deletions

View File

@ -74,6 +74,7 @@
FN(UNHANDLED_PROTO) \
FN(SKB_CSUM) \
FN(SKB_GSO_SEG) \
FN(SKB_BAD_GSO) \
FN(SKB_UCOPY_FAULT) \
FN(DEV_HDR) \
FN(DEV_READY) \
@ -392,6 +393,8 @@ enum skb_drop_reason {
SKB_DROP_REASON_SKB_CSUM,
/** @SKB_DROP_REASON_SKB_GSO_SEG: gso segmentation error */
SKB_DROP_REASON_SKB_GSO_SEG,
/** @SKB_DROP_REASON_SKB_BAD_GSO: malicious gso packet. */
SKB_DROP_REASON_SKB_BAD_GSO,
/**
* @SKB_DROP_REASON_SKB_UCOPY_FAULT: failed to copy data from user space,
* e.g., via zerocopy_sg_from_iter() or skb_orphan_frags_rx()

View File

@ -4101,16 +4101,16 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d
}
EXPORT_SYMBOL_GPL(validate_xmit_skb_list);
static void qdisc_pkt_len_segs_init(struct sk_buff *skb)
static enum skb_drop_reason qdisc_pkt_len_segs_init(struct sk_buff *skb)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
unsigned int hdr_len;
unsigned int hdr_len, tlen;
u16 gso_segs;
qdisc_skb_cb(skb)->pkt_len = skb->len;
if (!shinfo->gso_size) {
qdisc_skb_cb(skb)->pkt_segs = 1;
return;
return SKB_NOT_DROPPED_YET;
}
qdisc_skb_cb(skb)->pkt_segs = gso_segs = shinfo->gso_segs;
@ -4118,43 +4118,49 @@ static void qdisc_pkt_len_segs_init(struct sk_buff *skb)
/* To get more precise estimation of bytes sent on wire,
* we add to pkt_len the headers size of all segments
*/
if (unlikely(!skb_transport_header_was_set(skb)))
return;
/* mac layer + network layer */
if (!skb->encapsulation)
if (!skb->encapsulation) {
if (unlikely(!skb_transport_header_was_set(skb)))
return SKB_NOT_DROPPED_YET;
hdr_len = skb_transport_offset(skb);
else
} else {
hdr_len = skb_inner_transport_offset(skb);
}
/* + transport layer */
if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
const struct tcphdr *th;
struct tcphdr _tcphdr;
th = skb_header_pointer(skb, hdr_len,
sizeof(_tcphdr), &_tcphdr);
if (likely(th))
hdr_len += __tcp_hdrlen(th);
if (!pskb_may_pull(skb, hdr_len + sizeof(struct tcphdr)))
return SKB_DROP_REASON_SKB_BAD_GSO;
th = (const struct tcphdr *)(skb->data + hdr_len);
tlen = __tcp_hdrlen(th);
if (tlen < sizeof(*th))
return SKB_DROP_REASON_SKB_BAD_GSO;
hdr_len += tlen;
if (!pskb_may_pull(skb, hdr_len))
return SKB_DROP_REASON_SKB_BAD_GSO;
} else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
struct udphdr _udphdr;
if (skb_header_pointer(skb, hdr_len,
sizeof(_udphdr), &_udphdr))
hdr_len += sizeof(struct udphdr);
if (!pskb_may_pull(skb, hdr_len + sizeof(struct udphdr)))
return SKB_DROP_REASON_SKB_BAD_GSO;
hdr_len += sizeof(struct udphdr);
}
/* prior pskb_may_pull() might have changed skb->head. */
shinfo = skb_shinfo(skb);
if (unlikely(shinfo->gso_type & SKB_GSO_DODGY)) {
int payload = skb->len - hdr_len;
/* Malicious packet. */
if (payload <= 0)
return;
return SKB_DROP_REASON_SKB_BAD_GSO;
gso_segs = DIV_ROUND_UP(payload, shinfo->gso_size);
shinfo->gso_segs = gso_segs;
qdisc_skb_cb(skb)->pkt_segs = gso_segs;
}
qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
return SKB_NOT_DROPPED_YET;
}
static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q,
@ -4771,6 +4777,12 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
(SKBTX_SCHED_TSTAMP | SKBTX_BPF)))
__skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED);
reason = qdisc_pkt_len_segs_init(skb);
if (unlikely(reason)) {
dev_core_stats_tx_dropped_inc(dev);
kfree_skb_reason(skb, reason);
return -EINVAL;
}
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
*/
@ -4778,7 +4790,6 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
skb_update_prio(skb);
qdisc_pkt_len_segs_init(skb);
tcx_set_ingress(skb, false);
#ifdef CONFIG_NET_EGRESS
if (static_branch_unlikely(&egress_needed_key)) {