From e66d117222047ea90f92e065f929bc0e0eec3647 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Feb 2022 20:50:30 -0800 Subject: ipv6/addrconf: switch to per netns inet6_addr_lst hash table IPv6 does not scale very well with the number of IPv6 addresses. It uses a global (shared by all netns) hash table with 256 buckets. Some functions like addrconf_verify_rtnl() and addrconf_ifdown() have to iterate all addresses in the hash table. I have seen addrconf_verify_rtnl() holding the cpu for 10ms or more. Switch to the per netns hashtable (and spinlock) added in prior patches. This considerably speeds up netns dismantle times on hosts with thousands of netns. This also has an impact on regular (fast path) IPv6 processing. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 77 ++++++++++++++++------------------------------------- 1 file changed, 23 insertions(+), 54 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index dab291cd39ba..4f402bc38f05 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -146,11 +146,6 @@ static int ipv6_generate_stable_address(struct in6_addr *addr, #define IN6_ADDR_HSIZE_SHIFT 8 #define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT) -/* - * Configured unicast address hash table - */ -static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE]; -static DEFINE_SPINLOCK(addrconf_hash_lock); static void addrconf_verify(struct net *net); static void addrconf_verify_rtnl(struct net *net); @@ -1009,9 +1004,7 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, { struct inet6_ifaddr *ifp; - hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) { - if (!net_eq(dev_net(ifp->idev->dev), net)) - continue; + hlist_for_each_entry(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) { if (ipv6_addr_equal(&ifp->addr, addr)) { if (!dev || ifp->idev->dev == dev) return true; @@ -1022,20 +1015,21 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa) { - unsigned int hash = inet6_addr_hash(dev_net(dev), &ifa->addr); + struct net *net = dev_net(dev); + unsigned int hash = inet6_addr_hash(net, &ifa->addr); int err = 0; - spin_lock(&addrconf_hash_lock); + spin_lock(&net->ipv6.addrconf_hash_lock); /* Ignore adding duplicate addresses on an interface */ - if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) { + if (ipv6_chk_same_addr(net, &ifa->addr, dev, hash)) { netdev_dbg(dev, "ipv6_add_addr: already assigned\n"); err = -EEXIST; } else { - hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); + hlist_add_head_rcu(&ifa->addr_lst, &net->ipv6.inet6_addr_lst[hash]); } - spin_unlock(&addrconf_hash_lock); + spin_unlock(&net->ipv6.addrconf_hash_lock); return err; } @@ -1259,9 +1253,10 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, static void ipv6_del_addr(struct inet6_ifaddr *ifp) { - int state; enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP; + struct net *net = dev_net(ifp->idev->dev); unsigned long expires; + int state; ASSERT_RTNL(); @@ -1273,9 +1268,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) if (state == INET6_IFADDR_STATE_DEAD) goto out; - spin_lock_bh(&addrconf_hash_lock); + spin_lock_bh(&net->ipv6.addrconf_hash_lock); hlist_del_init_rcu(&ifp->addr_lst); - spin_unlock_bh(&addrconf_hash_lock); + spin_unlock_bh(&net->ipv6.addrconf_hash_lock); write_lock_bh(&ifp->idev->lock); @@ -1918,10 +1913,8 @@ __ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, if (skip_dev_check) dev = NULL; - hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { + hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) { ndev = ifp->idev->dev; - if (!net_eq(dev_net(ndev), net)) - continue; if (l3mdev_master_dev_rcu(ndev) != l3mdev) continue; @@ -2025,9 +2018,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add struct inet6_ifaddr *ifp, *result = NULL; rcu_read_lock(); - hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { - if (!net_eq(dev_net(ifp->idev->dev), net)) - continue; + hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) { if (ipv6_addr_equal(&ifp->addr, addr)) { if (!dev || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { @@ -2094,7 +2085,7 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp) void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; - struct net *net = dev_net(ifp->idev->dev); + struct net *net = dev_net(idev->dev); if (addrconf_dad_end(ifp)) { in6_ifa_put(ifp); @@ -3770,9 +3761,9 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) /* Step 2: clear hash table */ for (i = 0; i < IN6_ADDR_HSIZE; i++) { - struct hlist_head *h = &inet6_addr_lst[i]; + struct hlist_head *h = &net->ipv6.inet6_addr_lst[i]; - spin_lock_bh(&addrconf_hash_lock); + spin_lock_bh(&net->ipv6.addrconf_hash_lock); restart: hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { @@ -3788,7 +3779,7 @@ restart: } } } - spin_unlock_bh(&addrconf_hash_lock); + spin_unlock_bh(&net->ipv6.addrconf_hash_lock); } write_lock_bh(&idev->lock); @@ -4286,10 +4277,8 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos) } for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { - hlist_for_each_entry_rcu(ifa, &inet6_addr_lst[state->bucket], + hlist_for_each_entry_rcu(ifa, &net->ipv6.inet6_addr_lst[state->bucket], addr_lst) { - if (!net_eq(dev_net(ifa->idev->dev), net)) - continue; /* sync with offset */ if (p < state->offset) { p++; @@ -4312,8 +4301,6 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct net *net = seq_file_net(seq); hlist_for_each_entry_continue_rcu(ifa, addr_lst) { - if (!net_eq(dev_net(ifa->idev->dev), net)) - continue; state->offset++; return ifa; } @@ -4321,9 +4308,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, state->offset = 0; while (++state->bucket < IN6_ADDR_HSIZE) { hlist_for_each_entry_rcu(ifa, - &inet6_addr_lst[state->bucket], addr_lst) { - if (!net_eq(dev_net(ifa->idev->dev), net)) - continue; + &net->ipv6.inet6_addr_lst[state->bucket], addr_lst) { return ifa; } } @@ -4411,9 +4396,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) int ret = 0; rcu_read_lock(); - hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { - if (!net_eq(dev_net(ifp->idev->dev), net)) - continue; + hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) { if (ipv6_addr_equal(&ifp->addr, addr) && (ifp->flags & IFA_F_HOMEADDRESS)) { ret = 1; @@ -4451,9 +4434,7 @@ int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs, hash = inet6_addr_hash(net, addr); hash_found = false; - hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { - if (!net_eq(dev_net(ifp->idev->dev), net)) - continue; + hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) { if (ipv6_addr_equal(&ifp->addr, addr)) { hash_found = true; @@ -4498,7 +4479,7 @@ static void addrconf_verify_rtnl(struct net *net) for (i = 0; i < IN6_ADDR_HSIZE; i++) { restart: - hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) { + hlist_for_each_entry_rcu_bh(ifp, &net->ipv6.inet6_addr_lst[i], addr_lst) { unsigned long age; /* When setting preferred_lft to a value not zero or @@ -7233,7 +7214,7 @@ static struct rtnl_af_ops inet6_ops __read_mostly = { int __init addrconf_init(void) { struct inet6_dev *idev; - int i, err; + int err; err = ipv6_addr_label_init(); if (err < 0) { @@ -7280,9 +7261,6 @@ int __init addrconf_init(void) ip6_route_init_special_entries(); - for (i = 0; i < IN6_ADDR_HSIZE; i++) - INIT_HLIST_HEAD(&inet6_addr_lst[i]); - register_netdevice_notifier(&ipv6_dev_notf); addrconf_verify(&init_net); @@ -7343,7 +7321,6 @@ out: void addrconf_cleanup(void) { struct net_device *dev; - int i; unregister_netdevice_notifier(&ipv6_dev_notf); unregister_pernet_subsys(&addrconf_ops); @@ -7361,14 +7338,6 @@ void addrconf_cleanup(void) } addrconf_ifdown(init_net.loopback_dev, true); - /* - * Check hash table. - */ - spin_lock_bh(&addrconf_hash_lock); - for (i = 0; i < IN6_ADDR_HSIZE; i++) - WARN_ON(!hlist_empty(&inet6_addr_lst[i])); - spin_unlock_bh(&addrconf_hash_lock); - rtnl_unlock(); destroy_workqueue(addrconf_wq); -- cgit v1.2.3