From 732b463449fd0ef90acd13cda68eab1c91adb00c Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 17 Apr 2026 20:19:39 -0700 Subject: [PATCH 1/6] net/sched: netem: fix probability gaps in 4-state loss model The 4-state Markov chain in loss_4state() has gaps at the boundaries between transition probability ranges. The comparisons use: if (rnd < a4) else if (a4 < rnd && rnd < a1 + a4) When rnd equals a boundary value exactly, neither branch matches and no state transition occurs. The redundant lower-bound check (a4 < rnd) is already implied by being in the else branch. Remove the unnecessary lower-bound comparisons so the ranges are contiguous and every random value produces a transition, matching the GI (General and Intuitive) loss model specification. This bug goes back to original implementation of this model. Fixes: 661b79725fea ("netem: revised correlated loss generator") Signed-off-by: Stephen Hemminger Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260418032027.900913-2-stephen@networkplumber.org Signed-off-by: Jakub Kicinski --- net/sched/sch_netem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 20df1c08b1e9..8ee72cac1faf 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -227,10 +227,10 @@ static bool loss_4state(struct netem_sched_data *q) if (rnd < clg->a4) { clg->state = LOST_IN_GAP_PERIOD; return true; - } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) { + } else if (rnd < clg->a1 + clg->a4) { clg->state = LOST_IN_BURST_PERIOD; return true; - } else if (clg->a1 + clg->a4 < rnd) { + } else { clg->state = TX_IN_GAP_PERIOD; } @@ -247,9 +247,9 @@ static bool loss_4state(struct netem_sched_data *q) case LOST_IN_BURST_PERIOD: if (rnd < clg->a3) clg->state = TX_IN_BURST_PERIOD; - else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) { + else if (rnd < clg->a2 + clg->a3) { clg->state = TX_IN_GAP_PERIOD; - } else if (clg->a2 + clg->a3 < rnd) { + } else { clg->state = LOST_IN_BURST_PERIOD; return true; } From 4185701fcce6b426b6c3630b25330dddd9c47b0d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 17 Apr 2026 20:19:40 -0700 Subject: [PATCH 2/6] net/sched: netem: fix queue limit check to include reordered packets The queue limit check in netem_enqueue() uses q->t_len which only counts packets in the internal tfifo. Packets placed in sch->q by the reorder path (__qdisc_enqueue_head) are not counted, allowing the total queue occupancy to exceed sch->limit under reordering. Include sch->q.qlen in the limit check. Fixes: f8d4bc455047 ("net/sched: netem: account for backlog updates from child qdisc") Signed-off-by: Stephen Hemminger Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260418032027.900913-3-stephen@networkplumber.org Signed-off-by: Jakub Kicinski --- net/sched/sch_netem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 8ee72cac1faf..d400a730eadd 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -524,7 +524,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, 1 << get_random_u32_below(8); } - if (unlikely(q->t_len >= sch->limit)) { + if (unlikely(sch->q.qlen >= sch->limit)) { /* re-link segs, so that qdisc_drop_all() frees them all */ skb->next = segs; qdisc_drop_all(skb, sch, to_free); From 986afaf809940577224a99c3a08d97a15eb37e93 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 17 Apr 2026 20:19:41 -0700 Subject: [PATCH 3/6] net/sched: netem: only reseed PRNG when seed is explicitly provided netem_change() unconditionally reseeds the PRNG on every tc change command. If TCA_NETEM_PRNG_SEED is not specified, a new random seed is generated, destroying reproducibility for users who set a deterministic seed on a previous change. Move the initial random seed generation to netem_init() and only reseed in netem_change() when TCA_NETEM_PRNG_SEED is explicitly provided by the user. Fixes: 4072d97ddc44 ("netem: add prng attribute to netem_sched_data") Signed-off-by: Stephen Hemminger Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260418032027.900913-4-stephen@networkplumber.org Signed-off-by: Jakub Kicinski --- net/sched/sch_netem.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index d400a730eadd..556f9747f0e7 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -1112,11 +1112,10 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, /* capping jitter to the range acceptable by tabledist() */ q->jitter = min_t(s64, abs(q->jitter), INT_MAX); - if (tb[TCA_NETEM_PRNG_SEED]) + if (tb[TCA_NETEM_PRNG_SEED]) { q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]); - else - q->prng.seed = get_random_u64(); - prandom_seed_state(&q->prng.prng_state, q->prng.seed); + prandom_seed_state(&q->prng.prng_state, q->prng.seed); + } unlock: sch_tree_unlock(sch); @@ -1139,6 +1138,9 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt, return -EINVAL; q->loss_model = CLG_RANDOM; + q->prng.seed = get_random_u64(); + prandom_seed_state(&q->prng.prng_state, q->prng.seed); + ret = netem_change(sch, opt, extack); if (ret) pr_info("netem: change failed\n"); From 01801c359a74737b9b1aa28568b60374d857241a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 17 Apr 2026 20:19:42 -0700 Subject: [PATCH 4/6] net/sched: netem: validate slot configuration Reject slot configurations that have no defensible meaning: - negative min_delay or max_delay - min_delay greater than max_delay - negative dist_delay or dist_jitter - negative max_packets or max_bytes Negative or out-of-order delays underflow in get_slot_next(), producing garbage intervals. Negative limits trip the per-slot accounting (packets_left/bytes_left <= 0) on the first packet of every slot, defeating the rate-limiting half of the slot feature. Note that dist_jitter has been silently coerced to its absolute value by get_slot() since the feature was introduced; rejecting negatives here converts that silent coercion into -EINVAL. The abs() can be removed in a follow-up. Fixes: 836af83b54e3 ("netem: support delivering packets in delayed time slots") Signed-off-by: Stephen Hemminger Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260418032027.900913-5-stephen@networkplumber.org Signed-off-by: Jakub Kicinski --- net/sched/sch_netem.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 556f9747f0e7..640b51be807a 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -827,6 +827,29 @@ static int get_dist_table(struct disttable **tbl, const struct nlattr *attr) return 0; } +static int validate_slot(const struct nlattr *attr, struct netlink_ext_ack *extack) +{ + const struct tc_netem_slot *c = nla_data(attr); + + if (c->min_delay < 0 || c->max_delay < 0) { + NL_SET_ERR_MSG_ATTR(extack, attr, "negative slot delay"); + return -EINVAL; + } + if (c->min_delay > c->max_delay) { + NL_SET_ERR_MSG_ATTR(extack, attr, "slot min delay greater than max delay"); + return -EINVAL; + } + if (c->dist_delay < 0 || c->dist_jitter < 0) { + NL_SET_ERR_MSG_ATTR(extack, attr, "negative dist delay"); + return -EINVAL; + } + if (c->max_packets < 0 || c->max_bytes < 0) { + NL_SET_ERR_MSG_ATTR(extack, attr, "negative slot limit"); + return -EINVAL; + } + return 0; +} + static void get_slot(struct netem_sched_data *q, const struct nlattr *attr) { const struct tc_netem_slot *c = nla_data(attr); @@ -1040,6 +1063,12 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, goto table_free; } + if (tb[TCA_NETEM_SLOT]) { + ret = validate_slot(tb[TCA_NETEM_SLOT], extack); + if (ret) + goto table_free; + } + sch_tree_lock(sch); /* backup q->clg and q->loss_model */ old_clg = q->clg; From 51e94e1e2fef351c74d69eb53666df808d26af95 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 17 Apr 2026 20:19:43 -0700 Subject: [PATCH 5/6] net/sched: netem: fix slot delay calculation overflow get_slot_next() computes a random delay between min_delay and max_delay using: get_random_u32() * (max_delay - min_delay) >> 32 This overflows signed 64-bit arithmetic when the delay range exceeds approximately 2.1 seconds (2^31 nanoseconds), producing a negative result that effectively disables slot-based pacing. This is a realistic configuration for WAN emulation (e.g., slot 1s 5s). Use mul_u64_u32_shr() which handles the widening multiply without overflow. Fixes: 0a9fe5c375b5 ("netem: slotting with non-uniform distribution") Signed-off-by: Stephen Hemminger Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260418032027.900913-6-stephen@networkplumber.org Signed-off-by: Jakub Kicinski --- net/sched/sch_netem.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 640b51be807a..475c14b3dbdb 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -659,9 +659,8 @@ static void get_slot_next(struct netem_sched_data *q, u64 now) if (!q->slot_dist) next_delay = q->slot_config.min_delay + - (get_random_u32() * - (q->slot_config.max_delay - - q->slot_config.min_delay) >> 32); + mul_u64_u32_shr(q->slot_config.max_delay - q->slot_config.min_delay, + get_random_u32(), 32); else next_delay = tabledist(q->slot_config.dist_delay, (s32)(q->slot_config.dist_jitter), From 90be9fedb218ee95a1cf59050d1306fbfb0e8b87 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 17 Apr 2026 20:19:44 -0700 Subject: [PATCH 6/6] net/sched: netem: check for negative latency and jitter Reject requests with negative latency or jitter. A negative value added to current timestamp (u64) wraps to an enormous time_to_send, disabling dequeue. The original UAPI used u32 for these values; the conversion to 64-bit time values via TCA_NETEM_LATENCY64 and TCA_NETEM_JITTER64 allowed signed values to reach the kernel without validation. Jitter is already silently clamped by an abs() in netem_change(); that abs() can be removed in a follow-up once this rejection is in place. Fixes: 99803171ef04 ("netem: add uapi to express delay and jitter in nanoseconds") Signed-off-by: Stephen Hemminger Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260418032027.900913-7-stephen@networkplumber.org Signed-off-by: Jakub Kicinski --- net/sched/sch_netem.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 475c14b3dbdb..bc18e1976b6e 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -826,6 +826,16 @@ static int get_dist_table(struct disttable **tbl, const struct nlattr *attr) return 0; } +static int validate_time(const struct nlattr *attr, const char *name, + struct netlink_ext_ack *extack) +{ + if (nla_get_s64(attr) < 0) { + NL_SET_ERR_MSG_ATTR_FMT(extack, attr, "negative %s", name); + return -EINVAL; + } + return 0; +} + static int validate_slot(const struct nlattr *attr, struct netlink_ext_ack *extack) { const struct tc_netem_slot *c = nla_data(attr); @@ -1068,6 +1078,18 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, goto table_free; } + if (tb[TCA_NETEM_LATENCY64]) { + ret = validate_time(tb[TCA_NETEM_LATENCY64], "latency", extack); + if (ret) + goto table_free; + } + + if (tb[TCA_NETEM_JITTER64]) { + ret = validate_time(tb[TCA_NETEM_JITTER64], "jitter", extack); + if (ret) + goto table_free; + } + sch_tree_lock(sch); /* backup q->clg and q->loss_model */ old_clg = q->clg;