Merge branch 'udp-4tuple-hash'

Philo Lu says:

====================
udp: Add 4-tuple hash for connected sockets

This patchset introduces 4-tuple hash for connected udp sockets, to make
connected udp lookup faster.

Stress test results (with 1 cpu fully used) are shown below, in pps:
(1) _un-connected_ socket as server
    [a] w/o hash4: 1,825176
    [b] w/  hash4: 1,831750 (+0.36%)

(2) 500 _connected_ sockets as server
    [c] w/o hash4:   290860 (only 16% of [a])
    [d] w/  hash4: 1,889658 (+3.1% compared with [b])
With hash4, compute_score is skipped when lookup, so [d] is slightly
better than [b].

Patch1: Add a new counter for hslot2 named hash4_cnt, to avoid cache line
        miss when lookup.
Patch2: Add hslot/hlist_nulls for 4-tuple hash.
Patch3 and 4: Implement 4-tuple hash for ipv4 and ipv6.

The detailed motivation is described in Patch 3.

The 4-tuple hash increases the size of udp_sock and udp_hslot. Thus add it
with CONFIG_BASE_SMALL, i.e., it's a no op with CONFIG_BASE_SMALL.

Intentionally, the feature is not available for udplite. Though udplite
shares some structs and functions with udp, its connect() keeps unchanged.
So all udplite sockets perform the same as un-connected udp sockets.
Besides, udplite also shares the additional memory consumption in udp_sock
and udptable.

changelogs:
v8 -> v9 (Paolo Abeni):
- Add explanation about udplite in cover letter
- Update tags for co-developers
- Add acked-by tags of Paolo and Willem

v7 -> v8:
- add EXPORT_SYMBOL for ipv6.ko build

v6 -> v7 (Kuniyuki Iwashima):
- export udp_ehashfn to be used by udpv6 rehash

v5 -> v6 (Paolo Abeni):
- move udp_table_hash4_init from patch2 to patch1
- use hlist_nulls for lookup-rehash race
- add test results in commit log
- add more comment, e.g., for rehash4 used in hash4
- add ipv6 support (Patch4), and refactor some functions for better
  sharing, without functionality change

v4 -> v5 (Paolo Abeni):
- add CONFIG_BASE_SMALL with which udp hash4 does nothing

v3 -> v4 (Willem de Bruijn):
- fix mistakes in udp_pernet_table_alloc()

RFCv2 -> v3 (Gur Stavi):
- minor fix in udp_hashslot2() and udp_table_init()
- add rcu sync in rehash4()

RFCv1 -> RFCv2:
- add a new struct for hslot2
- remove the sockopt UDP_HASH4 because it has little side effect for
  unconnected sockets
- add rehash in connect()
- re-organize the patch into 3 smaller ones
- other minor fix

v8:
https://lore.kernel.org/all/20241108054836.123484-1-lulie@linux.alibaba.com/
v7:
https://lore.kernel.org/all/20241105121225.12513-1-lulie@linux.alibaba.com/
v6:
https://lore.kernel.org/all/20241031124550.20227-1-lulie@linux.alibaba.com/
v5:
https://lore.kernel.org/all/20241018114535.35712-1-lulie@linux.alibaba.com/
v4:
https://lore.kernel.org/all/20241012012918.70888-1-lulie@linux.alibaba.com/
v3:
https://lore.kernel.org/all/20241010090351.79698-1-lulie@linux.alibaba.com/
RFCv2:
https://lore.kernel.org/all/20240924110414.52618-1-lulie@linux.alibaba.com/
RFCv1:
https://lore.kernel.org/all/20240913100941.8565-1-lulie@linux.alibaba.com/
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2024-11-18 11:56:21 +00:00
commit ac60031f79
4 changed files with 468 additions and 42 deletions

View File

@ -56,6 +56,12 @@ struct udp_sock {
int pending; /* Any pending frames ? */
__u8 encap_type; /* Is this an Encapsulation socket? */
#if !IS_ENABLED(CONFIG_BASE_SMALL)
/* For UDP 4-tuple hash */
__u16 udp_lrpa_hash;
struct hlist_nulls_node udp_lrpa_node;
#endif
/*
* Following member retains the information to create a UDP header
* when the socket is uncorked.
@ -206,6 +212,11 @@ static inline void udp_allow_gso(struct sock *sk)
#define udp_portaddr_for_each_entry_rcu(__sk, list) \
hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node)
#if !IS_ENABLED(CONFIG_BASE_SMALL)
#define udp_lrpa_for_each_entry_rcu(__up, node, list) \
hlist_nulls_for_each_entry_rcu(__up, node, list, udp_lrpa_node)
#endif
#define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE)
#endif /* _LINUX_UDP_H */

View File

@ -50,29 +50,56 @@ struct udp_skb_cb {
#define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb))
/**
* struct udp_hslot - UDP hash slot
* struct udp_hslot - UDP hash slot used by udp_table.hash/hash4
*
* @head: head of list of sockets
* @nulls_head: head of list of sockets, only used by hash4
* @count: number of sockets in 'head' list
* @lock: spinlock protecting changes to head/count
*/
struct udp_hslot {
struct hlist_head head;
union {
struct hlist_head head;
/* hash4 uses hlist_nulls to avoid moving wrongly onto another
* hlist, because rehash() can happen with lookup().
*/
struct hlist_nulls_head nulls_head;
};
int count;
spinlock_t lock;
} __attribute__((aligned(2 * sizeof(long))));
} __aligned(2 * sizeof(long));
/**
* struct udp_hslot_main - UDP hash slot used by udp_table.hash2
*
* @hslot: basic hash slot
* @hash4_cnt: number of sockets in hslot4 of the same
* (local port, local address)
*/
struct udp_hslot_main {
struct udp_hslot hslot; /* must be the first member */
#if !IS_ENABLED(CONFIG_BASE_SMALL)
u32 hash4_cnt;
#endif
} __aligned(2 * sizeof(long));
#define UDP_HSLOT_MAIN(__hslot) ((struct udp_hslot_main *)(__hslot))
/**
* struct udp_table - UDP table
*
* @hash: hash table, sockets are hashed on (local port)
* @hash2: hash table, sockets are hashed on (local port, local address)
* @hash4: hash table, connected sockets are hashed on
* (local port, local address, remote port, remote address)
* @mask: number of slots in hash tables, minus 1
* @log: log2(number of slots in hash table)
*/
struct udp_table {
struct udp_hslot *hash;
struct udp_hslot *hash2;
struct udp_hslot_main *hash2;
#if !IS_ENABLED(CONFIG_BASE_SMALL)
struct udp_hslot *hash4;
#endif
unsigned int mask;
unsigned int log;
};
@ -84,6 +111,7 @@ static inline struct udp_hslot *udp_hashslot(struct udp_table *table,
{
return &table->hash[udp_hashfn(net, num, table->mask)];
}
/*
* For secondary hash, net_hash_mix() is performed before calling
* udp_hashslot2(), this explains difference with udp_hashslot()
@ -91,9 +119,90 @@ static inline struct udp_hslot *udp_hashslot(struct udp_table *table,
static inline struct udp_hslot *udp_hashslot2(struct udp_table *table,
unsigned int hash)
{
return &table->hash2[hash & table->mask];
return &table->hash2[hash & table->mask].hslot;
}
#if IS_ENABLED(CONFIG_BASE_SMALL)
static inline void udp_table_hash4_init(struct udp_table *table)
{
}
static inline struct udp_hslot *udp_hashslot4(struct udp_table *table,
unsigned int hash)
{
BUILD_BUG();
return NULL;
}
static inline bool udp_hashed4(const struct sock *sk)
{
return false;
}
static inline unsigned int udp_hash4_slot_size(void)
{
return 0;
}
static inline bool udp_has_hash4(const struct udp_hslot *hslot2)
{
return false;
}
static inline void udp_hash4_inc(struct udp_hslot *hslot2)
{
}
static inline void udp_hash4_dec(struct udp_hslot *hslot2)
{
}
#else /* !CONFIG_BASE_SMALL */
/* Must be called with table->hash2 initialized */
static inline void udp_table_hash4_init(struct udp_table *table)
{
table->hash4 = (void *)(table->hash2 + (table->mask + 1));
for (int i = 0; i <= table->mask; i++) {
table->hash2[i].hash4_cnt = 0;
INIT_HLIST_NULLS_HEAD(&table->hash4[i].nulls_head, i);
table->hash4[i].count = 0;
spin_lock_init(&table->hash4[i].lock);
}
}
static inline struct udp_hslot *udp_hashslot4(struct udp_table *table,
unsigned int hash)
{
return &table->hash4[hash & table->mask];
}
static inline bool udp_hashed4(const struct sock *sk)
{
return !hlist_nulls_unhashed(&udp_sk(sk)->udp_lrpa_node);
}
static inline unsigned int udp_hash4_slot_size(void)
{
return sizeof(struct udp_hslot);
}
static inline bool udp_has_hash4(const struct udp_hslot *hslot2)
{
return UDP_HSLOT_MAIN(hslot2)->hash4_cnt;
}
static inline void udp_hash4_inc(struct udp_hslot *hslot2)
{
UDP_HSLOT_MAIN(hslot2)->hash4_cnt++;
}
static inline void udp_hash4_dec(struct udp_hslot *hslot2)
{
UDP_HSLOT_MAIN(hslot2)->hash4_cnt--;
}
#endif /* CONFIG_BASE_SMALL */
extern struct proto udp_prot;
extern atomic_long_t udp_memory_allocated;
@ -193,13 +302,29 @@ static inline int udp_lib_hash(struct sock *sk)
}
void udp_lib_unhash(struct sock *sk);
void udp_lib_rehash(struct sock *sk, u16 new_hash);
void udp_lib_rehash(struct sock *sk, u16 new_hash, u16 new_hash4);
u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
const __be32 faddr, const __be16 fport);
static inline void udp_lib_close(struct sock *sk, long timeout)
{
sk_common_release(sk);
}
/* hash4 routines shared between UDPv4/6 */
#if IS_ENABLED(CONFIG_BASE_SMALL)
static inline void udp_lib_hash4(struct sock *sk, u16 hash)
{
}
static inline void udp4_hash4(struct sock *sk)
{
}
#else /* !CONFIG_BASE_SMALL */
void udp_lib_hash4(struct sock *sk, u16 hash);
void udp4_hash4(struct sock *sk);
#endif /* CONFIG_BASE_SMALL */
int udp_lib_get_port(struct sock *sk, unsigned short snum,
unsigned int hash2_nulladdr);

View File

@ -410,7 +410,6 @@ static int compute_score(struct sock *sk, const struct net *net,
return score;
}
INDIRECT_CALLABLE_SCOPE
u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
const __be32 faddr, const __be16 fport)
{
@ -419,6 +418,7 @@ u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
return __inet_ehashfn(laddr, lport, faddr, fport,
udp_ehash_secret + net_hash_mix(net));
}
EXPORT_SYMBOL(udp_ehashfn);
/* called with rcu_read_lock() */
static struct sock *udp4_lib_lookup2(const struct net *net,
@ -478,6 +478,159 @@ static struct sock *udp4_lib_lookup2(const struct net *net,
return result;
}
#if IS_ENABLED(CONFIG_BASE_SMALL)
static struct sock *udp4_lib_lookup4(const struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned int hnum,
int dif, int sdif,
struct udp_table *udptable)
{
return NULL;
}
static void udp_rehash4(struct udp_table *udptable, struct sock *sk,
u16 newhash4)
{
}
static void udp_unhash4(struct udp_table *udptable, struct sock *sk)
{
}
#else /* !CONFIG_BASE_SMALL */
static struct sock *udp4_lib_lookup4(const struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned int hnum,
int dif, int sdif,
struct udp_table *udptable)
{
const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
const struct hlist_nulls_node *node;
struct udp_hslot *hslot4;
unsigned int hash4, slot;
struct udp_sock *up;
struct sock *sk;
hash4 = udp_ehashfn(net, daddr, hnum, saddr, sport);
slot = hash4 & udptable->mask;
hslot4 = &udptable->hash4[slot];
INET_ADDR_COOKIE(acookie, saddr, daddr);
begin:
/* SLAB_TYPESAFE_BY_RCU not used, so we don't need to touch sk_refcnt */
udp_lrpa_for_each_entry_rcu(up, node, &hslot4->nulls_head) {
sk = (struct sock *)up;
if (inet_match(net, sk, acookie, ports, dif, sdif))
return sk;
}
/* if the nulls value we got at the end of this lookup is not the
* expected one, we must restart lookup. We probably met an item that
* was moved to another chain due to rehash.
*/
if (get_nulls_value(node) != slot)
goto begin;
return NULL;
}
/* In hash4, rehash can happen in connect(), where hash4_cnt keeps unchanged. */
static void udp_rehash4(struct udp_table *udptable, struct sock *sk,
u16 newhash4)
{
struct udp_hslot *hslot4, *nhslot4;
hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash);
nhslot4 = udp_hashslot4(udptable, newhash4);
udp_sk(sk)->udp_lrpa_hash = newhash4;
if (hslot4 != nhslot4) {
spin_lock_bh(&hslot4->lock);
hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_lrpa_node);
hslot4->count--;
spin_unlock_bh(&hslot4->lock);
spin_lock_bh(&nhslot4->lock);
hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_lrpa_node,
&nhslot4->nulls_head);
nhslot4->count++;
spin_unlock_bh(&nhslot4->lock);
}
}
static void udp_unhash4(struct udp_table *udptable, struct sock *sk)
{
struct udp_hslot *hslot2, *hslot4;
if (udp_hashed4(sk)) {
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash);
spin_lock(&hslot4->lock);
hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_lrpa_node);
hslot4->count--;
spin_unlock(&hslot4->lock);
spin_lock(&hslot2->lock);
udp_hash4_dec(hslot2);
spin_unlock(&hslot2->lock);
}
}
void udp_lib_hash4(struct sock *sk, u16 hash)
{
struct udp_hslot *hslot, *hslot2, *hslot4;
struct net *net = sock_net(sk);
struct udp_table *udptable;
/* Connected udp socket can re-connect to another remote address,
* so rehash4 is needed.
*/
udptable = net->ipv4.udp_table;
if (udp_hashed4(sk)) {
udp_rehash4(udptable, sk, hash);
return;
}
hslot = udp_hashslot(udptable, net, udp_sk(sk)->udp_port_hash);
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
hslot4 = udp_hashslot4(udptable, hash);
udp_sk(sk)->udp_lrpa_hash = hash;
spin_lock_bh(&hslot->lock);
if (rcu_access_pointer(sk->sk_reuseport_cb))
reuseport_detach_sock(sk);
spin_lock(&hslot4->lock);
hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_lrpa_node,
&hslot4->nulls_head);
hslot4->count++;
spin_unlock(&hslot4->lock);
spin_lock(&hslot2->lock);
udp_hash4_inc(hslot2);
spin_unlock(&hslot2->lock);
spin_unlock_bh(&hslot->lock);
}
EXPORT_SYMBOL(udp_lib_hash4);
/* call with sock lock */
void udp4_hash4(struct sock *sk)
{
struct net *net = sock_net(sk);
unsigned int hash;
if (sk_unhashed(sk) || sk->sk_rcv_saddr == htonl(INADDR_ANY))
return;
hash = udp_ehashfn(net, sk->sk_rcv_saddr, sk->sk_num,
sk->sk_daddr, sk->sk_dport);
udp_lib_hash4(sk, hash);
}
EXPORT_SYMBOL(udp4_hash4);
#endif /* CONFIG_BASE_SMALL */
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
@ -486,13 +639,19 @@ struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr,
int sdif, struct udp_table *udptable, struct sk_buff *skb)
{
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2;
struct udp_hslot *hslot2;
struct sock *result, *sk;
unsigned int hash2;
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
hslot2 = udp_hashslot2(udptable, hash2);
if (udp_has_hash4(hslot2)) {
result = udp4_lib_lookup4(net, saddr, sport, daddr, hnum,
dif, sdif, udptable);
if (result) /* udp4_lib_lookup4 return sk or NULL */
return result;
}
/* Lookup connected or non-wildcard socket */
result = udp4_lib_lookup2(net, saddr, sport,
@ -519,8 +678,7 @@ struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr,
/* Lookup wildcard sockets */
hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
hslot2 = udp_hashslot2(udptable, hash2);
result = udp4_lib_lookup2(net, saddr, sport,
htonl(INADDR_ANY), hnum, dif, sdif,
@ -1935,6 +2093,18 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
}
EXPORT_SYMBOL(udp_pre_connect);
static int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
int res;
lock_sock(sk);
res = __ip4_datagram_connect(sk, uaddr, addr_len);
if (!res)
udp4_hash4(sk);
release_sock(sk);
return res;
}
int __udp_disconnect(struct sock *sk, int flags)
{
struct inet_sock *inet = inet_sk(sk);
@ -1994,6 +2164,8 @@ void udp_lib_unhash(struct sock *sk)
hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
hslot2->count--;
spin_unlock(&hslot2->lock);
udp_unhash4(udptable, sk);
}
spin_unlock_bh(&hslot->lock);
}
@ -2003,7 +2175,7 @@ EXPORT_SYMBOL(udp_lib_unhash);
/*
* inet_rcv_saddr was changed, we must rehash secondary hash
*/
void udp_lib_rehash(struct sock *sk, u16 newhash)
void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4)
{
if (sk_hashed(sk)) {
struct udp_table *udptable = udp_get_table_prot(sk);
@ -2035,6 +2207,19 @@ void udp_lib_rehash(struct sock *sk, u16 newhash)
spin_unlock(&nhslot2->lock);
}
if (udp_hashed4(sk)) {
udp_rehash4(udptable, sk, newhash4);
if (hslot2 != nhslot2) {
spin_lock(&hslot2->lock);
udp_hash4_dec(hslot2);
spin_unlock(&hslot2->lock);
spin_lock(&nhslot2->lock);
udp_hash4_inc(nhslot2);
spin_unlock(&nhslot2->lock);
}
}
spin_unlock_bh(&hslot->lock);
}
}
@ -2046,7 +2231,11 @@ void udp_v4_rehash(struct sock *sk)
u16 new_hash = ipv4_portaddr_hash(sock_net(sk),
inet_sk(sk)->inet_rcv_saddr,
inet_sk(sk)->inet_num);
udp_lib_rehash(sk, new_hash);
u16 new_hash4 = udp_ehashfn(sock_net(sk),
sk->sk_rcv_saddr, sk->sk_num,
sk->sk_daddr, sk->sk_dport);
udp_lib_rehash(sk, new_hash, new_hash4);
}
static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
@ -2268,7 +2457,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
udptable->mask;
hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask;
start_lookup:
hslot = &udptable->hash2[hash2];
hslot = &udptable->hash2[hash2].hslot;
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
}
@ -2539,14 +2728,13 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
struct udp_table *udptable = net->ipv4.udp_table;
INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
unsigned short hnum = ntohs(loc_port);
unsigned int hash2, slot2;
struct udp_hslot *hslot2;
unsigned int hash2;
__portpair ports;
struct sock *sk;
hash2 = ipv4_portaddr_hash(net, loc_addr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
hslot2 = udp_hashslot2(udptable, hash2);
ports = INET_COMBINED_PORTS(rmt_port, hnum);
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
@ -2940,7 +3128,7 @@ struct proto udp_prot = {
.owner = THIS_MODULE,
.close = udp_lib_close,
.pre_connect = udp_pre_connect,
.connect = ip4_datagram_connect,
.connect = udp_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
.init = udp_init_sock,
@ -3187,7 +3375,7 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
batch_sks = 0;
for (; state->bucket <= udptable->mask; state->bucket++) {
struct udp_hslot *hslot2 = &udptable->hash2[state->bucket];
struct udp_hslot *hslot2 = &udptable->hash2[state->bucket].hslot;
if (hlist_empty(&hslot2->head))
continue;
@ -3428,10 +3616,12 @@ __setup("uhash_entries=", set_uhash_entries);
void __init udp_table_init(struct udp_table *table, const char *name)
{
unsigned int i;
unsigned int i, slot_size;
slot_size = sizeof(struct udp_hslot) + sizeof(struct udp_hslot_main) +
udp_hash4_slot_size();
table->hash = alloc_large_system_hash(name,
2 * sizeof(struct udp_hslot),
slot_size,
uhash_entries,
21, /* one slot per 2 MB */
0,
@ -3440,17 +3630,18 @@ void __init udp_table_init(struct udp_table *table, const char *name)
UDP_HTABLE_SIZE_MIN,
UDP_HTABLE_SIZE_MAX);
table->hash2 = table->hash + (table->mask + 1);
table->hash2 = (void *)(table->hash + (table->mask + 1));
for (i = 0; i <= table->mask; i++) {
INIT_HLIST_HEAD(&table->hash[i].head);
table->hash[i].count = 0;
spin_lock_init(&table->hash[i].lock);
}
for (i = 0; i <= table->mask; i++) {
INIT_HLIST_HEAD(&table->hash2[i].head);
table->hash2[i].count = 0;
spin_lock_init(&table->hash2[i].lock);
INIT_HLIST_HEAD(&table->hash2[i].hslot.head);
table->hash2[i].hslot.count = 0;
spin_lock_init(&table->hash2[i].hslot.lock);
}
udp_table_hash4_init(table);
}
u32 udp_flow_hashrnd(void)
@ -3476,18 +3667,21 @@ static void __net_init udp_sysctl_init(struct net *net)
static struct udp_table __net_init *udp_pernet_table_alloc(unsigned int hash_entries)
{
struct udp_table *udptable;
unsigned int slot_size;
int i;
udptable = kmalloc(sizeof(*udptable), GFP_KERNEL);
if (!udptable)
goto out;
udptable->hash = vmalloc_huge(hash_entries * 2 * sizeof(struct udp_hslot),
slot_size = sizeof(struct udp_hslot) + sizeof(struct udp_hslot_main) +
udp_hash4_slot_size();
udptable->hash = vmalloc_huge(hash_entries * slot_size,
GFP_KERNEL_ACCOUNT);
if (!udptable->hash)
goto free_table;
udptable->hash2 = udptable->hash + hash_entries;
udptable->hash2 = (void *)(udptable->hash + hash_entries);
udptable->mask = hash_entries - 1;
udptable->log = ilog2(hash_entries);
@ -3496,10 +3690,11 @@ static struct udp_table __net_init *udp_pernet_table_alloc(unsigned int hash_ent
udptable->hash[i].count = 0;
spin_lock_init(&udptable->hash[i].lock);
INIT_HLIST_HEAD(&udptable->hash2[i].head);
udptable->hash2[i].count = 0;
spin_lock_init(&udptable->hash2[i].lock);
INIT_HLIST_HEAD(&udptable->hash2[i].hslot.head);
udptable->hash2[i].hslot.count = 0;
spin_lock_init(&udptable->hash2[i].hslot.lock);
}
udp_table_hash4_init(udptable);
return udptable;

View File

@ -110,8 +110,19 @@ void udp_v6_rehash(struct sock *sk)
u16 new_hash = ipv6_portaddr_hash(sock_net(sk),
&sk->sk_v6_rcv_saddr,
inet_sk(sk)->inet_num);
u16 new_hash4;
udp_lib_rehash(sk, new_hash);
if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) {
new_hash4 = udp_ehashfn(sock_net(sk),
sk->sk_rcv_saddr, sk->sk_num,
sk->sk_daddr, sk->sk_dport);
} else {
new_hash4 = udp6_ehashfn(sock_net(sk),
&sk->sk_v6_rcv_saddr, sk->sk_num,
&sk->sk_v6_daddr, sk->sk_dport);
}
udp_lib_rehash(sk, new_hash, new_hash4);
}
static int compute_score(struct sock *sk, const struct net *net,
@ -216,6 +227,74 @@ static struct sock *udp6_lib_lookup2(const struct net *net,
return result;
}
#if IS_ENABLED(CONFIG_BASE_SMALL)
static struct sock *udp6_lib_lookup4(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr,
unsigned int hnum, int dif, int sdif,
struct udp_table *udptable)
{
return NULL;
}
static void udp6_hash4(struct sock *sk)
{
}
#else /* !CONFIG_BASE_SMALL */
static struct sock *udp6_lib_lookup4(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr,
unsigned int hnum, int dif, int sdif,
struct udp_table *udptable)
{
const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
const struct hlist_nulls_node *node;
struct udp_hslot *hslot4;
unsigned int hash4, slot;
struct udp_sock *up;
struct sock *sk;
hash4 = udp6_ehashfn(net, daddr, hnum, saddr, sport);
slot = hash4 & udptable->mask;
hslot4 = &udptable->hash4[slot];
begin:
udp_lrpa_for_each_entry_rcu(up, node, &hslot4->nulls_head) {
sk = (struct sock *)up;
if (inet6_match(net, sk, saddr, daddr, ports, dif, sdif))
return sk;
}
/* if the nulls value we got at the end of this lookup is not the
* expected one, we must restart lookup. We probably met an item that
* was moved to another chain due to rehash.
*/
if (get_nulls_value(node) != slot)
goto begin;
return NULL;
}
static void udp6_hash4(struct sock *sk)
{
struct net *net = sock_net(sk);
unsigned int hash;
if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) {
udp4_hash4(sk);
return;
}
if (sk_unhashed(sk) || ipv6_addr_any(&sk->sk_v6_rcv_saddr))
return;
hash = udp6_ehashfn(net, &sk->sk_v6_rcv_saddr, sk->sk_num,
&sk->sk_v6_daddr, sk->sk_dport);
udp_lib_hash4(sk, hash);
}
#endif /* CONFIG_BASE_SMALL */
/* rcu_read_lock() must be held */
struct sock *__udp6_lib_lookup(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
@ -224,13 +303,19 @@ struct sock *__udp6_lib_lookup(const struct net *net,
struct sk_buff *skb)
{
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2;
struct udp_hslot *hslot2;
struct sock *result, *sk;
unsigned int hash2;
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
hslot2 = udp_hashslot2(udptable, hash2);
if (udp_has_hash4(hslot2)) {
result = udp6_lib_lookup4(net, saddr, sport, daddr, hnum,
dif, sdif, udptable);
if (result) /* udp6_lib_lookup4 return sk or NULL */
return result;
}
/* Lookup connected or non-wildcard sockets */
result = udp6_lib_lookup2(net, saddr, sport,
@ -257,8 +342,7 @@ struct sock *__udp6_lib_lookup(const struct net *net,
/* Lookup wildcard sockets */
hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
hslot2 = udp_hashslot2(udptable, hash2);
result = udp6_lib_lookup2(net, saddr, sport,
&in6addr_any, hnum, dif, sdif,
@ -859,7 +943,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
udptable->mask;
hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask;
start_lookup:
hslot = &udptable->hash2[hash2];
hslot = &udptable->hash2[hash2].hslot;
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
}
@ -1065,14 +1149,13 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
{
struct udp_table *udptable = net->ipv4.udp_table;
unsigned short hnum = ntohs(loc_port);
unsigned int hash2, slot2;
struct udp_hslot *hslot2;
unsigned int hash2;
__portpair ports;
struct sock *sk;
hash2 = ipv6_portaddr_hash(net, loc_addr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
hslot2 = udp_hashslot2(udptable, hash2);
ports = INET_COMBINED_PORTS(rmt_port, hnum);
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
@ -1169,6 +1252,18 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len);
}
static int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
int res;
lock_sock(sk);
res = __ip6_datagram_connect(sk, uaddr, addr_len);
if (!res)
udp6_hash4(sk);
release_sock(sk);
return res;
}
/**
* udp6_hwcsum_outgoing - handle outgoing HW checksumming
* @sk: socket we are sending on
@ -1764,7 +1859,7 @@ struct proto udpv6_prot = {
.owner = THIS_MODULE,
.close = udp_lib_close,
.pre_connect = udpv6_pre_connect,
.connect = ip6_datagram_connect,
.connect = udpv6_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
.init = udpv6_init_sock,