From 034bbd806298e9ba4197dd1587b0348ee30996ea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Feb 2026 14:28:28 +0000 Subject: [PATCH 1/5] icmp: prevent possible overflow in icmp_global_allow() Following expression can overflow if sysctl_icmp_msgs_per_sec is big enough. sysctl_icmp_msgs_per_sec * delta / HZ; Fixes: 4cdf507d5452 ("icmp: add a global rate limitation") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260216142832.3834174-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/icmp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e216b6df6331..eff8487c0aba 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -250,7 +250,8 @@ bool icmp_global_allow(struct net *net) if (delta < HZ / 50) return false; - incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec) * delta / HZ; + incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec); + incr = div_u64((u64)incr * delta, HZ); if (!incr) return false; From 87b08913a9ae82082e276d237ece08fc8ee24380 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Feb 2026 14:28:29 +0000 Subject: [PATCH 2/5] inet: move icmp_global_{credit,stamp} to a separate cache line icmp_global_credit was meant to be changed ~1000 times per second, but if an admin sets net.ipv4.icmp_msgs_per_sec to a very high value, icmp_global_credit changes can inflict false sharing to surrounding fields that are read mostly. Move icmp_global_credit and icmp_global_stamp to a separate cacheline aligned group. Fixes: b056b4cd9178 ("icmp: move icmp_global.credit and icmp_global.stamp to per netns storage") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260216142832.3834174-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/netns/ipv4.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2dbd46fc4734..8e971c7bf164 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -88,6 +88,12 @@ struct netns_ipv4 { int sysctl_tcp_rcvbuf_low_rtt; __cacheline_group_end(netns_ipv4_read_rx); + /* ICMP rate limiter hot cache line. */ + __cacheline_group_begin_aligned(icmp); + atomic_t icmp_global_credit; + u32 icmp_global_stamp; + __cacheline_group_end_aligned(icmp); + struct inet_timewait_death_row tcp_death_row; struct udp_table *udp_table; @@ -141,8 +147,7 @@ struct netns_ipv4 { int sysctl_icmp_ratemask; int sysctl_icmp_msgs_per_sec; int sysctl_icmp_msgs_burst; - atomic_t icmp_global_credit; - u32 icmp_global_stamp; + u32 ip_rt_min_pmtu; int ip_rt_mtu_expires; int ip_rt_min_advmss; From 0201eedb69b24a6be9b7c1716287a89c4dde2320 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Feb 2026 14:28:30 +0000 Subject: [PATCH 3/5] ipv6: icmp: remove obsolete code in icmpv6_xrlim_allow() Following part was needed before the blamed commit, because inet_getpeer_v6() second argument was the prefix. /* Give more bandwidth to wider prefixes. */ if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); Now inet_getpeer_v6() retrieves hosts, we need to remove @tmo adjustement or wider prefixes likes /24 allow 8x more ICMP to be sent for a given ratelimit. As we had this issue for a while, this patch changes net.ipv6.icmp.ratelimit default value from 1000ms to 100ms to avoid potential regressions. Also add a READ_ONCE() when reading net->ipv6.sysctl.icmpv6_time. Fixes: fd0273d7939f ("ipv6: Remove external dependency on rt6i_dst and rt6i_src") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Cc: Martin KaFai Lau Link: https://patch.msgid.link/20260216142832.3834174-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 7 ++++--- net/ipv6/af_inet6.c | 2 +- net/ipv6/icmp.c | 7 +------ 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 28c7e4f5ecf9..6921d8594b84 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -3234,12 +3234,13 @@ enhanced_dad - BOOLEAN =========== ratelimit - INTEGER - Limit the maximal rates for sending ICMPv6 messages. + Limit the maximal rates for sending ICMPv6 messages to a particular + peer. 0 to disable any limiting, - otherwise the minimal space between responses in milliseconds. + otherwise the space between responses in milliseconds. - Default: 1000 + Default: 100 ratemask - list of comma separated ranges For ICMPv6 message types matching the ranges in the ratemask, limit diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 31ba677d0442..69be0a67a140 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -952,7 +952,7 @@ static int __net_init inet6_net_init(struct net *net) int err = 0; net->ipv6.sysctl.bindv6only = 0; - net->ipv6.sysctl.icmpv6_time = 1*HZ; + net->ipv6.sysctl.icmpv6_time = HZ / 10; net->ipv6.sysctl.icmpv6_echo_ignore_all = 0; net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0; net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 375ecd779fda..0f41ca6f3d83 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -217,14 +217,9 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, } else if (dev && (dev->flags & IFF_LOOPBACK)) { res = true; } else { - struct rt6_info *rt = dst_rt6_info(dst); - int tmo = net->ipv6.sysctl.icmpv6_time; + int tmo = READ_ONCE(net->ipv6.sysctl.icmpv6_time); struct inet_peer *peer; - /* Give more bandwidth to wider prefixes. */ - if (rt->rt6i_dst.plen < 128) - tmo >>= ((128 - rt->rt6i_dst.plen)>>5); - peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr); res = inet_peer_xrlim_allow(peer, tmo); } From d8d9ef29886733428470655f2f99bc7493589fcb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Feb 2026 14:28:31 +0000 Subject: [PATCH 4/5] ipv4: icmp: icmpv4_xrlim_allow() optimization if net.ipv4.icmp_ratelimit is zero If net.ipv4.icmp_ratelimit is zero, we do not have to call inet_getpeer_v4() and inet_peer_xrlim_allow(). Both can be very expensive under DDOS. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260216142832.3834174-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/icmp.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index eff8487c0aba..a62b4c4033cc 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -316,23 +316,29 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, struct dst_entry *dst = &rt->dst; struct inet_peer *peer; struct net_device *dev; + int peer_timeout; bool rc = true; if (!apply_ratelimit) return true; + peer_timeout = READ_ONCE(net->ipv4.sysctl_icmp_ratelimit); + if (!peer_timeout) + goto out; + /* No rate limit on loopback */ rcu_read_lock(); dev = dst_dev_rcu(dst); if (dev && (dev->flags & IFF_LOOPBACK)) - goto out; + goto out_unlock; peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, l3mdev_master_ifindex_rcu(dev)); - rc = inet_peer_xrlim_allow(peer, - READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); -out: + rc = inet_peer_xrlim_allow(peer, peer_timeout); + +out_unlock: rcu_read_unlock(); +out: if (!rc) __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST); else From 9395b1bb1f14ae3fa1e4e2f7988f029cb1c009ed Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Feb 2026 14:28:32 +0000 Subject: [PATCH 5/5] ipv6: icmp: icmpv6_xrlim_allow() optimization if net.ipv6.icmp.ratelimit is zero If net.ipv6.icmp.ratelimit is zero we do not have to call inet_getpeer_v6() and inet_peer_xrlim_allow(). Both can be very expensive under DDOS. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260216142832.3834174-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/icmp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 0f41ca6f3d83..813d2e9edb8b 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -220,8 +220,12 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, int tmo = READ_ONCE(net->ipv6.sysctl.icmpv6_time); struct inet_peer *peer; - peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr); - res = inet_peer_xrlim_allow(peer, tmo); + if (!tmo) { + res = true; + } else { + peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr); + res = inet_peer_xrlim_allow(peer, tmo); + } } rcu_read_unlock(); if (!res)