aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
authorGravatar Joanne Koong <joannelkoong@gmail.com> 2022-05-19 17:18:33 -0700
committerGravatar Jakub Kicinski <kuba@kernel.org> 2022-05-20 18:16:24 -0700
commitd5a42de8bdbe25081f07b801d8b35f4d75a791f4 (patch)
tree7bc53e45ea3628a75c25931f9f1d5c7f64d63cd5 /net/ipv4/tcp.c
parentwwan: iosm: use a flexible array rather than allocate short objects (diff)
downloadlinux-d5a42de8bdbe25081f07b801d8b35f4d75a791f4.tar.gz
linux-d5a42de8bdbe25081f07b801d8b35f4d75a791f4.tar.bz2
linux-d5a42de8bdbe25081f07b801d8b35f4d75a791f4.zip
net: Add a second bind table hashed by port and address
We currently have one tcp bind table (bhash) which hashes by port number only. In the socket bind path, we check for bind conflicts by traversing the specified port's inet_bind2_bucket while holding the bucket's spinlock (see inet_csk_get_port() and inet_csk_bind_conflict()). In instances where there are tons of sockets hashed to the same port at different addresses, checking for a bind conflict is time-intensive and can cause softirq cpu lockups, as well as stops new tcp connections since __inet_inherit_port() also contests for the spinlock. This patch proposes adding a second bind table, bhash2, that hashes by port and ip address. Searching the bhash2 table leads to significantly faster conflict resolution and less time holding the spinlock. Signed-off-by: Joanne Koong <joannelkoong@gmail.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Acked-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c14
1 files changed, 12 insertions, 2 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 028513d3e2a2..9984d23a7f3e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4604,6 +4604,12 @@ void __init tcp_init(void)
SLAB_HWCACHE_ALIGN | SLAB_PANIC |
SLAB_ACCOUNT,
NULL);
+ tcp_hashinfo.bind2_bucket_cachep =
+ kmem_cache_create("tcp_bind2_bucket",
+ sizeof(struct inet_bind2_bucket), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC |
+ SLAB_ACCOUNT,
+ NULL);
/* Size and allocate the main established and bind bucket
* hash tables.
@@ -4626,8 +4632,9 @@ void __init tcp_init(void)
if (inet_ehash_locks_alloc(&tcp_hashinfo))
panic("TCP: failed to alloc ehash_locks");
tcp_hashinfo.bhash =
- alloc_large_system_hash("TCP bind",
- sizeof(struct inet_bind_hashbucket),
+ alloc_large_system_hash("TCP bind bhash tables",
+ sizeof(struct inet_bind_hashbucket) +
+ sizeof(struct inet_bind2_hashbucket),
tcp_hashinfo.ehash_mask + 1,
17, /* one slot per 128 KB of memory */
0,
@@ -4636,9 +4643,12 @@ void __init tcp_init(void)
0,
64 * 1024);
tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;
+ tcp_hashinfo.bhash2 =
+ (struct inet_bind2_hashbucket *)(tcp_hashinfo.bhash + tcp_hashinfo.bhash_size);
for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
spin_lock_init(&tcp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
+ INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain);
}