mirror of
https://github.com/torvalds/linux.git
synced 2026-05-26 16:12:59 +02:00
Merge branch 'udp-4tuple-hash'
Philo Lu says:
====================
udp: Add 4-tuple hash for connected sockets
This patchset introduces 4-tuple hash for connected udp sockets, to make
connected udp lookup faster.
Stress test results (with 1 cpu fully used) are shown below, in pps:
(1) _un-connected_ socket as server
[a] w/o hash4: 1,825176
[b] w/ hash4: 1,831750 (+0.36%)
(2) 500 _connected_ sockets as server
[c] w/o hash4: 290860 (only 16% of [a])
[d] w/ hash4: 1,889658 (+3.1% compared with [b])
With hash4, compute_score is skipped when lookup, so [d] is slightly
better than [b].
Patch1: Add a new counter for hslot2 named hash4_cnt, to avoid cache line
miss when lookup.
Patch2: Add hslot/hlist_nulls for 4-tuple hash.
Patch3 and 4: Implement 4-tuple hash for ipv4 and ipv6.
The detailed motivation is described in Patch 3.
The 4-tuple hash increases the size of udp_sock and udp_hslot. Thus add it
with CONFIG_BASE_SMALL, i.e., it's a no op with CONFIG_BASE_SMALL.
Intentionally, the feature is not available for udplite. Though udplite
shares some structs and functions with udp, its connect() keeps unchanged.
So all udplite sockets perform the same as un-connected udp sockets.
Besides, udplite also shares the additional memory consumption in udp_sock
and udptable.
changelogs:
v8 -> v9 (Paolo Abeni):
- Add explanation about udplite in cover letter
- Update tags for co-developers
- Add acked-by tags of Paolo and Willem
v7 -> v8:
- add EXPORT_SYMBOL for ipv6.ko build
v6 -> v7 (Kuniyuki Iwashima):
- export udp_ehashfn to be used by udpv6 rehash
v5 -> v6 (Paolo Abeni):
- move udp_table_hash4_init from patch2 to patch1
- use hlist_nulls for lookup-rehash race
- add test results in commit log
- add more comment, e.g., for rehash4 used in hash4
- add ipv6 support (Patch4), and refactor some functions for better
sharing, without functionality change
v4 -> v5 (Paolo Abeni):
- add CONFIG_BASE_SMALL with which udp hash4 does nothing
v3 -> v4 (Willem de Bruijn):
- fix mistakes in udp_pernet_table_alloc()
RFCv2 -> v3 (Gur Stavi):
- minor fix in udp_hashslot2() and udp_table_init()
- add rcu sync in rehash4()
RFCv1 -> RFCv2:
- add a new struct for hslot2
- remove the sockopt UDP_HASH4 because it has little side effect for
unconnected sockets
- add rehash in connect()
- re-organize the patch into 3 smaller ones
- other minor fix
v8:
https://lore.kernel.org/all/20241108054836.123484-1-lulie@linux.alibaba.com/
v7:
https://lore.kernel.org/all/20241105121225.12513-1-lulie@linux.alibaba.com/
v6:
https://lore.kernel.org/all/20241031124550.20227-1-lulie@linux.alibaba.com/
v5:
https://lore.kernel.org/all/20241018114535.35712-1-lulie@linux.alibaba.com/
v4:
https://lore.kernel.org/all/20241012012918.70888-1-lulie@linux.alibaba.com/
v3:
https://lore.kernel.org/all/20241010090351.79698-1-lulie@linux.alibaba.com/
RFCv2:
https://lore.kernel.org/all/20240924110414.52618-1-lulie@linux.alibaba.com/
RFCv1:
https://lore.kernel.org/all/20240913100941.8565-1-lulie@linux.alibaba.com/
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
ac60031f79
|
|
@ -56,6 +56,12 @@ struct udp_sock {
|
|||
int pending; /* Any pending frames ? */
|
||||
__u8 encap_type; /* Is this an Encapsulation socket? */
|
||||
|
||||
#if !IS_ENABLED(CONFIG_BASE_SMALL)
|
||||
/* For UDP 4-tuple hash */
|
||||
__u16 udp_lrpa_hash;
|
||||
struct hlist_nulls_node udp_lrpa_node;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Following member retains the information to create a UDP header
|
||||
* when the socket is uncorked.
|
||||
|
|
@ -206,6 +212,11 @@ static inline void udp_allow_gso(struct sock *sk)
|
|||
#define udp_portaddr_for_each_entry_rcu(__sk, list) \
|
||||
hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node)
|
||||
|
||||
#if !IS_ENABLED(CONFIG_BASE_SMALL)
|
||||
#define udp_lrpa_for_each_entry_rcu(__up, node, list) \
|
||||
hlist_nulls_for_each_entry_rcu(__up, node, list, udp_lrpa_node)
|
||||
#endif
|
||||
|
||||
#define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE)
|
||||
|
||||
#endif /* _LINUX_UDP_H */
|
||||
|
|
|
|||
|
|
@ -50,29 +50,56 @@ struct udp_skb_cb {
|
|||
#define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb))
|
||||
|
||||
/**
|
||||
* struct udp_hslot - UDP hash slot
|
||||
* struct udp_hslot - UDP hash slot used by udp_table.hash/hash4
|
||||
*
|
||||
* @head: head of list of sockets
|
||||
* @nulls_head: head of list of sockets, only used by hash4
|
||||
* @count: number of sockets in 'head' list
|
||||
* @lock: spinlock protecting changes to head/count
|
||||
*/
|
||||
struct udp_hslot {
|
||||
struct hlist_head head;
|
||||
union {
|
||||
struct hlist_head head;
|
||||
/* hash4 uses hlist_nulls to avoid moving wrongly onto another
|
||||
* hlist, because rehash() can happen with lookup().
|
||||
*/
|
||||
struct hlist_nulls_head nulls_head;
|
||||
};
|
||||
int count;
|
||||
spinlock_t lock;
|
||||
} __attribute__((aligned(2 * sizeof(long))));
|
||||
} __aligned(2 * sizeof(long));
|
||||
|
||||
/**
|
||||
* struct udp_hslot_main - UDP hash slot used by udp_table.hash2
|
||||
*
|
||||
* @hslot: basic hash slot
|
||||
* @hash4_cnt: number of sockets in hslot4 of the same
|
||||
* (local port, local address)
|
||||
*/
|
||||
struct udp_hslot_main {
|
||||
struct udp_hslot hslot; /* must be the first member */
|
||||
#if !IS_ENABLED(CONFIG_BASE_SMALL)
|
||||
u32 hash4_cnt;
|
||||
#endif
|
||||
} __aligned(2 * sizeof(long));
|
||||
#define UDP_HSLOT_MAIN(__hslot) ((struct udp_hslot_main *)(__hslot))
|
||||
|
||||
/**
|
||||
* struct udp_table - UDP table
|
||||
*
|
||||
* @hash: hash table, sockets are hashed on (local port)
|
||||
* @hash2: hash table, sockets are hashed on (local port, local address)
|
||||
* @hash4: hash table, connected sockets are hashed on
|
||||
* (local port, local address, remote port, remote address)
|
||||
* @mask: number of slots in hash tables, minus 1
|
||||
* @log: log2(number of slots in hash table)
|
||||
*/
|
||||
struct udp_table {
|
||||
struct udp_hslot *hash;
|
||||
struct udp_hslot *hash2;
|
||||
struct udp_hslot_main *hash2;
|
||||
#if !IS_ENABLED(CONFIG_BASE_SMALL)
|
||||
struct udp_hslot *hash4;
|
||||
#endif
|
||||
unsigned int mask;
|
||||
unsigned int log;
|
||||
};
|
||||
|
|
@ -84,6 +111,7 @@ static inline struct udp_hslot *udp_hashslot(struct udp_table *table,
|
|||
{
|
||||
return &table->hash[udp_hashfn(net, num, table->mask)];
|
||||
}
|
||||
|
||||
/*
|
||||
* For secondary hash, net_hash_mix() is performed before calling
|
||||
* udp_hashslot2(), this explains difference with udp_hashslot()
|
||||
|
|
@ -91,9 +119,90 @@ static inline struct udp_hslot *udp_hashslot(struct udp_table *table,
|
|||
static inline struct udp_hslot *udp_hashslot2(struct udp_table *table,
|
||||
unsigned int hash)
|
||||
{
|
||||
return &table->hash2[hash & table->mask];
|
||||
return &table->hash2[hash & table->mask].hslot;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_BASE_SMALL)
|
||||
static inline void udp_table_hash4_init(struct udp_table *table)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct udp_hslot *udp_hashslot4(struct udp_table *table,
|
||||
unsigned int hash)
|
||||
{
|
||||
BUILD_BUG();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline bool udp_hashed4(const struct sock *sk)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline unsigned int udp_hash4_slot_size(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool udp_has_hash4(const struct udp_hslot *hslot2)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void udp_hash4_inc(struct udp_hslot *hslot2)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void udp_hash4_dec(struct udp_hslot *hslot2)
|
||||
{
|
||||
}
|
||||
#else /* !CONFIG_BASE_SMALL */
|
||||
|
||||
/* Must be called with table->hash2 initialized */
|
||||
static inline void udp_table_hash4_init(struct udp_table *table)
|
||||
{
|
||||
table->hash4 = (void *)(table->hash2 + (table->mask + 1));
|
||||
for (int i = 0; i <= table->mask; i++) {
|
||||
table->hash2[i].hash4_cnt = 0;
|
||||
|
||||
INIT_HLIST_NULLS_HEAD(&table->hash4[i].nulls_head, i);
|
||||
table->hash4[i].count = 0;
|
||||
spin_lock_init(&table->hash4[i].lock);
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct udp_hslot *udp_hashslot4(struct udp_table *table,
|
||||
unsigned int hash)
|
||||
{
|
||||
return &table->hash4[hash & table->mask];
|
||||
}
|
||||
|
||||
static inline bool udp_hashed4(const struct sock *sk)
|
||||
{
|
||||
return !hlist_nulls_unhashed(&udp_sk(sk)->udp_lrpa_node);
|
||||
}
|
||||
|
||||
static inline unsigned int udp_hash4_slot_size(void)
|
||||
{
|
||||
return sizeof(struct udp_hslot);
|
||||
}
|
||||
|
||||
static inline bool udp_has_hash4(const struct udp_hslot *hslot2)
|
||||
{
|
||||
return UDP_HSLOT_MAIN(hslot2)->hash4_cnt;
|
||||
}
|
||||
|
||||
static inline void udp_hash4_inc(struct udp_hslot *hslot2)
|
||||
{
|
||||
UDP_HSLOT_MAIN(hslot2)->hash4_cnt++;
|
||||
}
|
||||
|
||||
static inline void udp_hash4_dec(struct udp_hslot *hslot2)
|
||||
{
|
||||
UDP_HSLOT_MAIN(hslot2)->hash4_cnt--;
|
||||
}
|
||||
#endif /* CONFIG_BASE_SMALL */
|
||||
|
||||
extern struct proto udp_prot;
|
||||
|
||||
extern atomic_long_t udp_memory_allocated;
|
||||
|
|
@ -193,13 +302,29 @@ static inline int udp_lib_hash(struct sock *sk)
|
|||
}
|
||||
|
||||
void udp_lib_unhash(struct sock *sk);
|
||||
void udp_lib_rehash(struct sock *sk, u16 new_hash);
|
||||
void udp_lib_rehash(struct sock *sk, u16 new_hash, u16 new_hash4);
|
||||
u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
|
||||
const __be32 faddr, const __be16 fport);
|
||||
|
||||
static inline void udp_lib_close(struct sock *sk, long timeout)
|
||||
{
|
||||
sk_common_release(sk);
|
||||
}
|
||||
|
||||
/* hash4 routines shared between UDPv4/6 */
|
||||
#if IS_ENABLED(CONFIG_BASE_SMALL)
|
||||
static inline void udp_lib_hash4(struct sock *sk, u16 hash)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void udp4_hash4(struct sock *sk)
|
||||
{
|
||||
}
|
||||
#else /* !CONFIG_BASE_SMALL */
|
||||
void udp_lib_hash4(struct sock *sk, u16 hash);
|
||||
void udp4_hash4(struct sock *sk);
|
||||
#endif /* CONFIG_BASE_SMALL */
|
||||
|
||||
int udp_lib_get_port(struct sock *sk, unsigned short snum,
|
||||
unsigned int hash2_nulladdr);
|
||||
|
||||
|
|
|
|||
245
net/ipv4/udp.c
245
net/ipv4/udp.c
|
|
@ -410,7 +410,6 @@ static int compute_score(struct sock *sk, const struct net *net,
|
|||
return score;
|
||||
}
|
||||
|
||||
INDIRECT_CALLABLE_SCOPE
|
||||
u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
|
||||
const __be32 faddr, const __be16 fport)
|
||||
{
|
||||
|
|
@ -419,6 +418,7 @@ u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
|
|||
return __inet_ehashfn(laddr, lport, faddr, fport,
|
||||
udp_ehash_secret + net_hash_mix(net));
|
||||
}
|
||||
EXPORT_SYMBOL(udp_ehashfn);
|
||||
|
||||
/* called with rcu_read_lock() */
|
||||
static struct sock *udp4_lib_lookup2(const struct net *net,
|
||||
|
|
@ -478,6 +478,159 @@ static struct sock *udp4_lib_lookup2(const struct net *net,
|
|||
return result;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_BASE_SMALL)
|
||||
static struct sock *udp4_lib_lookup4(const struct net *net,
|
||||
__be32 saddr, __be16 sport,
|
||||
__be32 daddr, unsigned int hnum,
|
||||
int dif, int sdif,
|
||||
struct udp_table *udptable)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void udp_rehash4(struct udp_table *udptable, struct sock *sk,
|
||||
u16 newhash4)
|
||||
{
|
||||
}
|
||||
|
||||
static void udp_unhash4(struct udp_table *udptable, struct sock *sk)
|
||||
{
|
||||
}
|
||||
#else /* !CONFIG_BASE_SMALL */
|
||||
static struct sock *udp4_lib_lookup4(const struct net *net,
|
||||
__be32 saddr, __be16 sport,
|
||||
__be32 daddr, unsigned int hnum,
|
||||
int dif, int sdif,
|
||||
struct udp_table *udptable)
|
||||
{
|
||||
const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
|
||||
const struct hlist_nulls_node *node;
|
||||
struct udp_hslot *hslot4;
|
||||
unsigned int hash4, slot;
|
||||
struct udp_sock *up;
|
||||
struct sock *sk;
|
||||
|
||||
hash4 = udp_ehashfn(net, daddr, hnum, saddr, sport);
|
||||
slot = hash4 & udptable->mask;
|
||||
hslot4 = &udptable->hash4[slot];
|
||||
INET_ADDR_COOKIE(acookie, saddr, daddr);
|
||||
|
||||
begin:
|
||||
/* SLAB_TYPESAFE_BY_RCU not used, so we don't need to touch sk_refcnt */
|
||||
udp_lrpa_for_each_entry_rcu(up, node, &hslot4->nulls_head) {
|
||||
sk = (struct sock *)up;
|
||||
if (inet_match(net, sk, acookie, ports, dif, sdif))
|
||||
return sk;
|
||||
}
|
||||
|
||||
/* if the nulls value we got at the end of this lookup is not the
|
||||
* expected one, we must restart lookup. We probably met an item that
|
||||
* was moved to another chain due to rehash.
|
||||
*/
|
||||
if (get_nulls_value(node) != slot)
|
||||
goto begin;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* In hash4, rehash can happen in connect(), where hash4_cnt keeps unchanged. */
|
||||
static void udp_rehash4(struct udp_table *udptable, struct sock *sk,
|
||||
u16 newhash4)
|
||||
{
|
||||
struct udp_hslot *hslot4, *nhslot4;
|
||||
|
||||
hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash);
|
||||
nhslot4 = udp_hashslot4(udptable, newhash4);
|
||||
udp_sk(sk)->udp_lrpa_hash = newhash4;
|
||||
|
||||
if (hslot4 != nhslot4) {
|
||||
spin_lock_bh(&hslot4->lock);
|
||||
hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_lrpa_node);
|
||||
hslot4->count--;
|
||||
spin_unlock_bh(&hslot4->lock);
|
||||
|
||||
spin_lock_bh(&nhslot4->lock);
|
||||
hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_lrpa_node,
|
||||
&nhslot4->nulls_head);
|
||||
nhslot4->count++;
|
||||
spin_unlock_bh(&nhslot4->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void udp_unhash4(struct udp_table *udptable, struct sock *sk)
|
||||
{
|
||||
struct udp_hslot *hslot2, *hslot4;
|
||||
|
||||
if (udp_hashed4(sk)) {
|
||||
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
|
||||
hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash);
|
||||
|
||||
spin_lock(&hslot4->lock);
|
||||
hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_lrpa_node);
|
||||
hslot4->count--;
|
||||
spin_unlock(&hslot4->lock);
|
||||
|
||||
spin_lock(&hslot2->lock);
|
||||
udp_hash4_dec(hslot2);
|
||||
spin_unlock(&hslot2->lock);
|
||||
}
|
||||
}
|
||||
|
||||
void udp_lib_hash4(struct sock *sk, u16 hash)
|
||||
{
|
||||
struct udp_hslot *hslot, *hslot2, *hslot4;
|
||||
struct net *net = sock_net(sk);
|
||||
struct udp_table *udptable;
|
||||
|
||||
/* Connected udp socket can re-connect to another remote address,
|
||||
* so rehash4 is needed.
|
||||
*/
|
||||
udptable = net->ipv4.udp_table;
|
||||
if (udp_hashed4(sk)) {
|
||||
udp_rehash4(udptable, sk, hash);
|
||||
return;
|
||||
}
|
||||
|
||||
hslot = udp_hashslot(udptable, net, udp_sk(sk)->udp_port_hash);
|
||||
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
|
||||
hslot4 = udp_hashslot4(udptable, hash);
|
||||
udp_sk(sk)->udp_lrpa_hash = hash;
|
||||
|
||||
spin_lock_bh(&hslot->lock);
|
||||
if (rcu_access_pointer(sk->sk_reuseport_cb))
|
||||
reuseport_detach_sock(sk);
|
||||
|
||||
spin_lock(&hslot4->lock);
|
||||
hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_lrpa_node,
|
||||
&hslot4->nulls_head);
|
||||
hslot4->count++;
|
||||
spin_unlock(&hslot4->lock);
|
||||
|
||||
spin_lock(&hslot2->lock);
|
||||
udp_hash4_inc(hslot2);
|
||||
spin_unlock(&hslot2->lock);
|
||||
|
||||
spin_unlock_bh(&hslot->lock);
|
||||
}
|
||||
EXPORT_SYMBOL(udp_lib_hash4);
|
||||
|
||||
/* call with sock lock */
|
||||
void udp4_hash4(struct sock *sk)
|
||||
{
|
||||
struct net *net = sock_net(sk);
|
||||
unsigned int hash;
|
||||
|
||||
if (sk_unhashed(sk) || sk->sk_rcv_saddr == htonl(INADDR_ANY))
|
||||
return;
|
||||
|
||||
hash = udp_ehashfn(net, sk->sk_rcv_saddr, sk->sk_num,
|
||||
sk->sk_daddr, sk->sk_dport);
|
||||
|
||||
udp_lib_hash4(sk, hash);
|
||||
}
|
||||
EXPORT_SYMBOL(udp4_hash4);
|
||||
#endif /* CONFIG_BASE_SMALL */
|
||||
|
||||
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
|
||||
* harder than this. -DaveM
|
||||
*/
|
||||
|
|
@ -486,13 +639,19 @@ struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr,
|
|||
int sdif, struct udp_table *udptable, struct sk_buff *skb)
|
||||
{
|
||||
unsigned short hnum = ntohs(dport);
|
||||
unsigned int hash2, slot2;
|
||||
struct udp_hslot *hslot2;
|
||||
struct sock *result, *sk;
|
||||
unsigned int hash2;
|
||||
|
||||
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
|
||||
slot2 = hash2 & udptable->mask;
|
||||
hslot2 = &udptable->hash2[slot2];
|
||||
hslot2 = udp_hashslot2(udptable, hash2);
|
||||
|
||||
if (udp_has_hash4(hslot2)) {
|
||||
result = udp4_lib_lookup4(net, saddr, sport, daddr, hnum,
|
||||
dif, sdif, udptable);
|
||||
if (result) /* udp4_lib_lookup4 return sk or NULL */
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Lookup connected or non-wildcard socket */
|
||||
result = udp4_lib_lookup2(net, saddr, sport,
|
||||
|
|
@ -519,8 +678,7 @@ struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr,
|
|||
|
||||
/* Lookup wildcard sockets */
|
||||
hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
|
||||
slot2 = hash2 & udptable->mask;
|
||||
hslot2 = &udptable->hash2[slot2];
|
||||
hslot2 = udp_hashslot2(udptable, hash2);
|
||||
|
||||
result = udp4_lib_lookup2(net, saddr, sport,
|
||||
htonl(INADDR_ANY), hnum, dif, sdif,
|
||||
|
|
@ -1935,6 +2093,18 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
|||
}
|
||||
EXPORT_SYMBOL(udp_pre_connect);
|
||||
|
||||
static int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
||||
{
|
||||
int res;
|
||||
|
||||
lock_sock(sk);
|
||||
res = __ip4_datagram_connect(sk, uaddr, addr_len);
|
||||
if (!res)
|
||||
udp4_hash4(sk);
|
||||
release_sock(sk);
|
||||
return res;
|
||||
}
|
||||
|
||||
int __udp_disconnect(struct sock *sk, int flags)
|
||||
{
|
||||
struct inet_sock *inet = inet_sk(sk);
|
||||
|
|
@ -1994,6 +2164,8 @@ void udp_lib_unhash(struct sock *sk)
|
|||
hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
|
||||
hslot2->count--;
|
||||
spin_unlock(&hslot2->lock);
|
||||
|
||||
udp_unhash4(udptable, sk);
|
||||
}
|
||||
spin_unlock_bh(&hslot->lock);
|
||||
}
|
||||
|
|
@ -2003,7 +2175,7 @@ EXPORT_SYMBOL(udp_lib_unhash);
|
|||
/*
|
||||
* inet_rcv_saddr was changed, we must rehash secondary hash
|
||||
*/
|
||||
void udp_lib_rehash(struct sock *sk, u16 newhash)
|
||||
void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4)
|
||||
{
|
||||
if (sk_hashed(sk)) {
|
||||
struct udp_table *udptable = udp_get_table_prot(sk);
|
||||
|
|
@ -2035,6 +2207,19 @@ void udp_lib_rehash(struct sock *sk, u16 newhash)
|
|||
spin_unlock(&nhslot2->lock);
|
||||
}
|
||||
|
||||
if (udp_hashed4(sk)) {
|
||||
udp_rehash4(udptable, sk, newhash4);
|
||||
|
||||
if (hslot2 != nhslot2) {
|
||||
spin_lock(&hslot2->lock);
|
||||
udp_hash4_dec(hslot2);
|
||||
spin_unlock(&hslot2->lock);
|
||||
|
||||
spin_lock(&nhslot2->lock);
|
||||
udp_hash4_inc(nhslot2);
|
||||
spin_unlock(&nhslot2->lock);
|
||||
}
|
||||
}
|
||||
spin_unlock_bh(&hslot->lock);
|
||||
}
|
||||
}
|
||||
|
|
@ -2046,7 +2231,11 @@ void udp_v4_rehash(struct sock *sk)
|
|||
u16 new_hash = ipv4_portaddr_hash(sock_net(sk),
|
||||
inet_sk(sk)->inet_rcv_saddr,
|
||||
inet_sk(sk)->inet_num);
|
||||
udp_lib_rehash(sk, new_hash);
|
||||
u16 new_hash4 = udp_ehashfn(sock_net(sk),
|
||||
sk->sk_rcv_saddr, sk->sk_num,
|
||||
sk->sk_daddr, sk->sk_dport);
|
||||
|
||||
udp_lib_rehash(sk, new_hash, new_hash4);
|
||||
}
|
||||
|
||||
static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
|
||||
|
|
@ -2268,7 +2457,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
|
|||
udptable->mask;
|
||||
hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask;
|
||||
start_lookup:
|
||||
hslot = &udptable->hash2[hash2];
|
||||
hslot = &udptable->hash2[hash2].hslot;
|
||||
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
|
||||
}
|
||||
|
||||
|
|
@ -2539,14 +2728,13 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
|
|||
struct udp_table *udptable = net->ipv4.udp_table;
|
||||
INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
|
||||
unsigned short hnum = ntohs(loc_port);
|
||||
unsigned int hash2, slot2;
|
||||
struct udp_hslot *hslot2;
|
||||
unsigned int hash2;
|
||||
__portpair ports;
|
||||
struct sock *sk;
|
||||
|
||||
hash2 = ipv4_portaddr_hash(net, loc_addr, hnum);
|
||||
slot2 = hash2 & udptable->mask;
|
||||
hslot2 = &udptable->hash2[slot2];
|
||||
hslot2 = udp_hashslot2(udptable, hash2);
|
||||
ports = INET_COMBINED_PORTS(rmt_port, hnum);
|
||||
|
||||
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
|
||||
|
|
@ -2940,7 +3128,7 @@ struct proto udp_prot = {
|
|||
.owner = THIS_MODULE,
|
||||
.close = udp_lib_close,
|
||||
.pre_connect = udp_pre_connect,
|
||||
.connect = ip4_datagram_connect,
|
||||
.connect = udp_connect,
|
||||
.disconnect = udp_disconnect,
|
||||
.ioctl = udp_ioctl,
|
||||
.init = udp_init_sock,
|
||||
|
|
@ -3187,7 +3375,7 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
|
|||
batch_sks = 0;
|
||||
|
||||
for (; state->bucket <= udptable->mask; state->bucket++) {
|
||||
struct udp_hslot *hslot2 = &udptable->hash2[state->bucket];
|
||||
struct udp_hslot *hslot2 = &udptable->hash2[state->bucket].hslot;
|
||||
|
||||
if (hlist_empty(&hslot2->head))
|
||||
continue;
|
||||
|
|
@ -3428,10 +3616,12 @@ __setup("uhash_entries=", set_uhash_entries);
|
|||
|
||||
void __init udp_table_init(struct udp_table *table, const char *name)
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned int i, slot_size;
|
||||
|
||||
slot_size = sizeof(struct udp_hslot) + sizeof(struct udp_hslot_main) +
|
||||
udp_hash4_slot_size();
|
||||
table->hash = alloc_large_system_hash(name,
|
||||
2 * sizeof(struct udp_hslot),
|
||||
slot_size,
|
||||
uhash_entries,
|
||||
21, /* one slot per 2 MB */
|
||||
0,
|
||||
|
|
@ -3440,17 +3630,18 @@ void __init udp_table_init(struct udp_table *table, const char *name)
|
|||
UDP_HTABLE_SIZE_MIN,
|
||||
UDP_HTABLE_SIZE_MAX);
|
||||
|
||||
table->hash2 = table->hash + (table->mask + 1);
|
||||
table->hash2 = (void *)(table->hash + (table->mask + 1));
|
||||
for (i = 0; i <= table->mask; i++) {
|
||||
INIT_HLIST_HEAD(&table->hash[i].head);
|
||||
table->hash[i].count = 0;
|
||||
spin_lock_init(&table->hash[i].lock);
|
||||
}
|
||||
for (i = 0; i <= table->mask; i++) {
|
||||
INIT_HLIST_HEAD(&table->hash2[i].head);
|
||||
table->hash2[i].count = 0;
|
||||
spin_lock_init(&table->hash2[i].lock);
|
||||
INIT_HLIST_HEAD(&table->hash2[i].hslot.head);
|
||||
table->hash2[i].hslot.count = 0;
|
||||
spin_lock_init(&table->hash2[i].hslot.lock);
|
||||
}
|
||||
udp_table_hash4_init(table);
|
||||
}
|
||||
|
||||
u32 udp_flow_hashrnd(void)
|
||||
|
|
@ -3476,18 +3667,21 @@ static void __net_init udp_sysctl_init(struct net *net)
|
|||
static struct udp_table __net_init *udp_pernet_table_alloc(unsigned int hash_entries)
|
||||
{
|
||||
struct udp_table *udptable;
|
||||
unsigned int slot_size;
|
||||
int i;
|
||||
|
||||
udptable = kmalloc(sizeof(*udptable), GFP_KERNEL);
|
||||
if (!udptable)
|
||||
goto out;
|
||||
|
||||
udptable->hash = vmalloc_huge(hash_entries * 2 * sizeof(struct udp_hslot),
|
||||
slot_size = sizeof(struct udp_hslot) + sizeof(struct udp_hslot_main) +
|
||||
udp_hash4_slot_size();
|
||||
udptable->hash = vmalloc_huge(hash_entries * slot_size,
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!udptable->hash)
|
||||
goto free_table;
|
||||
|
||||
udptable->hash2 = udptable->hash + hash_entries;
|
||||
udptable->hash2 = (void *)(udptable->hash + hash_entries);
|
||||
udptable->mask = hash_entries - 1;
|
||||
udptable->log = ilog2(hash_entries);
|
||||
|
||||
|
|
@ -3496,10 +3690,11 @@ static struct udp_table __net_init *udp_pernet_table_alloc(unsigned int hash_ent
|
|||
udptable->hash[i].count = 0;
|
||||
spin_lock_init(&udptable->hash[i].lock);
|
||||
|
||||
INIT_HLIST_HEAD(&udptable->hash2[i].head);
|
||||
udptable->hash2[i].count = 0;
|
||||
spin_lock_init(&udptable->hash2[i].lock);
|
||||
INIT_HLIST_HEAD(&udptable->hash2[i].hslot.head);
|
||||
udptable->hash2[i].hslot.count = 0;
|
||||
spin_lock_init(&udptable->hash2[i].hslot.lock);
|
||||
}
|
||||
udp_table_hash4_init(udptable);
|
||||
|
||||
return udptable;
|
||||
|
||||
|
|
|
|||
117
net/ipv6/udp.c
117
net/ipv6/udp.c
|
|
@ -110,8 +110,19 @@ void udp_v6_rehash(struct sock *sk)
|
|||
u16 new_hash = ipv6_portaddr_hash(sock_net(sk),
|
||||
&sk->sk_v6_rcv_saddr,
|
||||
inet_sk(sk)->inet_num);
|
||||
u16 new_hash4;
|
||||
|
||||
udp_lib_rehash(sk, new_hash);
|
||||
if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) {
|
||||
new_hash4 = udp_ehashfn(sock_net(sk),
|
||||
sk->sk_rcv_saddr, sk->sk_num,
|
||||
sk->sk_daddr, sk->sk_dport);
|
||||
} else {
|
||||
new_hash4 = udp6_ehashfn(sock_net(sk),
|
||||
&sk->sk_v6_rcv_saddr, sk->sk_num,
|
||||
&sk->sk_v6_daddr, sk->sk_dport);
|
||||
}
|
||||
|
||||
udp_lib_rehash(sk, new_hash, new_hash4);
|
||||
}
|
||||
|
||||
static int compute_score(struct sock *sk, const struct net *net,
|
||||
|
|
@ -216,6 +227,74 @@ static struct sock *udp6_lib_lookup2(const struct net *net,
|
|||
return result;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_BASE_SMALL)
|
||||
static struct sock *udp6_lib_lookup4(const struct net *net,
|
||||
const struct in6_addr *saddr, __be16 sport,
|
||||
const struct in6_addr *daddr,
|
||||
unsigned int hnum, int dif, int sdif,
|
||||
struct udp_table *udptable)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void udp6_hash4(struct sock *sk)
|
||||
{
|
||||
}
|
||||
#else /* !CONFIG_BASE_SMALL */
|
||||
static struct sock *udp6_lib_lookup4(const struct net *net,
|
||||
const struct in6_addr *saddr, __be16 sport,
|
||||
const struct in6_addr *daddr,
|
||||
unsigned int hnum, int dif, int sdif,
|
||||
struct udp_table *udptable)
|
||||
{
|
||||
const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
|
||||
const struct hlist_nulls_node *node;
|
||||
struct udp_hslot *hslot4;
|
||||
unsigned int hash4, slot;
|
||||
struct udp_sock *up;
|
||||
struct sock *sk;
|
||||
|
||||
hash4 = udp6_ehashfn(net, daddr, hnum, saddr, sport);
|
||||
slot = hash4 & udptable->mask;
|
||||
hslot4 = &udptable->hash4[slot];
|
||||
|
||||
begin:
|
||||
udp_lrpa_for_each_entry_rcu(up, node, &hslot4->nulls_head) {
|
||||
sk = (struct sock *)up;
|
||||
if (inet6_match(net, sk, saddr, daddr, ports, dif, sdif))
|
||||
return sk;
|
||||
}
|
||||
|
||||
/* if the nulls value we got at the end of this lookup is not the
|
||||
* expected one, we must restart lookup. We probably met an item that
|
||||
* was moved to another chain due to rehash.
|
||||
*/
|
||||
if (get_nulls_value(node) != slot)
|
||||
goto begin;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void udp6_hash4(struct sock *sk)
|
||||
{
|
||||
struct net *net = sock_net(sk);
|
||||
unsigned int hash;
|
||||
|
||||
if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) {
|
||||
udp4_hash4(sk);
|
||||
return;
|
||||
}
|
||||
|
||||
if (sk_unhashed(sk) || ipv6_addr_any(&sk->sk_v6_rcv_saddr))
|
||||
return;
|
||||
|
||||
hash = udp6_ehashfn(net, &sk->sk_v6_rcv_saddr, sk->sk_num,
|
||||
&sk->sk_v6_daddr, sk->sk_dport);
|
||||
|
||||
udp_lib_hash4(sk, hash);
|
||||
}
|
||||
#endif /* CONFIG_BASE_SMALL */
|
||||
|
||||
/* rcu_read_lock() must be held */
|
||||
struct sock *__udp6_lib_lookup(const struct net *net,
|
||||
const struct in6_addr *saddr, __be16 sport,
|
||||
|
|
@ -224,13 +303,19 @@ struct sock *__udp6_lib_lookup(const struct net *net,
|
|||
struct sk_buff *skb)
|
||||
{
|
||||
unsigned short hnum = ntohs(dport);
|
||||
unsigned int hash2, slot2;
|
||||
struct udp_hslot *hslot2;
|
||||
struct sock *result, *sk;
|
||||
unsigned int hash2;
|
||||
|
||||
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
|
||||
slot2 = hash2 & udptable->mask;
|
||||
hslot2 = &udptable->hash2[slot2];
|
||||
hslot2 = udp_hashslot2(udptable, hash2);
|
||||
|
||||
if (udp_has_hash4(hslot2)) {
|
||||
result = udp6_lib_lookup4(net, saddr, sport, daddr, hnum,
|
||||
dif, sdif, udptable);
|
||||
if (result) /* udp6_lib_lookup4 return sk or NULL */
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Lookup connected or non-wildcard sockets */
|
||||
result = udp6_lib_lookup2(net, saddr, sport,
|
||||
|
|
@ -257,8 +342,7 @@ struct sock *__udp6_lib_lookup(const struct net *net,
|
|||
|
||||
/* Lookup wildcard sockets */
|
||||
hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
|
||||
slot2 = hash2 & udptable->mask;
|
||||
hslot2 = &udptable->hash2[slot2];
|
||||
hslot2 = udp_hashslot2(udptable, hash2);
|
||||
|
||||
result = udp6_lib_lookup2(net, saddr, sport,
|
||||
&in6addr_any, hnum, dif, sdif,
|
||||
|
|
@ -859,7 +943,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
|
|||
udptable->mask;
|
||||
hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask;
|
||||
start_lookup:
|
||||
hslot = &udptable->hash2[hash2];
|
||||
hslot = &udptable->hash2[hash2].hslot;
|
||||
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
|
||||
}
|
||||
|
||||
|
|
@ -1065,14 +1149,13 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
|
|||
{
|
||||
struct udp_table *udptable = net->ipv4.udp_table;
|
||||
unsigned short hnum = ntohs(loc_port);
|
||||
unsigned int hash2, slot2;
|
||||
struct udp_hslot *hslot2;
|
||||
unsigned int hash2;
|
||||
__portpair ports;
|
||||
struct sock *sk;
|
||||
|
||||
hash2 = ipv6_portaddr_hash(net, loc_addr, hnum);
|
||||
slot2 = hash2 & udptable->mask;
|
||||
hslot2 = &udptable->hash2[slot2];
|
||||
hslot2 = udp_hashslot2(udptable, hash2);
|
||||
ports = INET_COMBINED_PORTS(rmt_port, hnum);
|
||||
|
||||
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
|
||||
|
|
@ -1169,6 +1252,18 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
|
|||
return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len);
|
||||
}
|
||||
|
||||
static int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
||||
{
|
||||
int res;
|
||||
|
||||
lock_sock(sk);
|
||||
res = __ip6_datagram_connect(sk, uaddr, addr_len);
|
||||
if (!res)
|
||||
udp6_hash4(sk);
|
||||
release_sock(sk);
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* udp6_hwcsum_outgoing - handle outgoing HW checksumming
|
||||
* @sk: socket we are sending on
|
||||
|
|
@ -1764,7 +1859,7 @@ struct proto udpv6_prot = {
|
|||
.owner = THIS_MODULE,
|
||||
.close = udp_lib_close,
|
||||
.pre_connect = udpv6_pre_connect,
|
||||
.connect = ip6_datagram_connect,
|
||||
.connect = udpv6_connect,
|
||||
.disconnect = udp_disconnect,
|
||||
.ioctl = udp_ioctl,
|
||||
.init = udpv6_init_sock,
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user