aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/batman-adv/bat_v_elp.c6
-rw-r--r--net/batman-adv/fragmentation.c2
-rw-r--r--net/bridge/br_private.h7
-rw-r--r--net/bridge/br_vlan.c3
-rw-r--r--net/can/raw.c15
-rw-r--r--net/ceph/messenger.c12
-rw-r--r--net/core/dev.c13
-rw-r--r--net/core/filter.c5
-rw-r--r--net/core/flow_dissector.c4
-rw-r--r--net/core/netpoll.c3
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/core/skbuff.c7
-rw-r--r--net/core/sock.c1
-rw-r--r--net/ipv4/inet_fragment.c29
-rw-r--r--net/ipv4/ip_fragment.c12
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/ip_sockglue.c6
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c7
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c38
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c4
-rw-r--r--net/ipv4/tcp_input.c31
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/tcp_timer.c12
-rw-r--r--net/ipv6/addrconf.c19
-rw-r--r--net/ipv6/af_inet6.c5
-rw-r--r--net/ipv6/anycast.c80
-rw-r--r--net/ipv6/ip6_fib.c4
-rw-r--r--net/ipv6/ip6_output.c3
-rw-r--r--net/ipv6/netfilter.c3
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c8
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c13
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c49
-rw-r--r--net/ipv6/netfilter/nft_masq_ipv6.c4
-rw-r--r--net/ipv6/route.c14
-rw-r--r--net/l2tp/l2tp_core.c9
-rw-r--r--net/netfilter/ipset/ip_set_core.c43
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c8
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c17
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c3
-rw-r--r--net/netfilter/nf_conncount.c44
-rw-r--r--net/netfilter/nf_conntrack_core.c13
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c13
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c11
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c14
-rw-r--r--net/netfilter/nf_conntrack_proto_icmp.c11
-rw-r--r--net/netfilter/nf_conntrack_proto_icmpv6.c11
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c11
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c15
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c11
-rw-r--r--net/netfilter/nf_tables_api.c46
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c58
-rw-r--r--net/netfilter/nft_compat.c24
-rw-r--r--net/netfilter/nft_flow_offload.c5
-rw-r--r--net/netfilter/nft_numgen.c127
-rw-r--r--net/netfilter/nft_osf.c2
-rw-r--r--net/netfilter/xt_IDLETIMER.c20
-rw-r--r--net/netfilter/xt_RATEEST.c10
-rw-r--r--net/netfilter/xt_hashlimit.c9
-rw-r--r--net/openvswitch/conntrack.c3
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/rxrpc/af_rxrpc.c27
-rw-r--r--net/rxrpc/ar-internal.h1
-rw-r--r--net/rxrpc/call_event.c18
-rw-r--r--net/rxrpc/output.c35
-rw-r--r--net/sched/act_mirred.c3
-rw-r--r--net/sched/act_pedit.c3
-rw-r--r--net/sched/act_police.c36
-rw-r--r--net/sched/cls_flower.c14
-rw-r--r--net/sched/sch_fq.c31
-rw-r--r--net/sched/sch_netem.c9
-rw-r--r--net/sctp/output.c25
-rw-r--r--net/sctp/outqueue.c2
-rw-r--r--net/sctp/socket.c26
-rw-r--r--net/sctp/stream.c1
-rw-r--r--net/smc/af_smc.c11
-rw-r--r--net/smc/smc_cdc.c26
-rw-r--r--net/smc/smc_cdc.h60
-rw-r--r--net/smc/smc_core.c20
-rw-r--r--net/smc/smc_core.h5
-rw-r--r--net/smc/smc_ism.c43
-rw-r--r--net/smc/smc_ism.h1
-rw-r--r--net/smc/smc_wr.c4
-rw-r--r--net/socket.c2
-rw-r--r--net/sunrpc/auth_generic.c8
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c61
-rw-r--r--net/sunrpc/xdr.c7
-rw-r--r--net/tipc/discover.c19
-rw-r--r--net/tipc/link.c11
-rw-r--r--net/tipc/net.c45
-rw-r--r--net/tipc/net.h2
-rw-r--r--net/tipc/node.c7
-rw-r--r--net/tipc/socket.c15
93 files changed, 915 insertions, 637 deletions
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 9f481cfdf77d..e8090f099eb8 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -352,19 +352,21 @@ out:
*/
int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface)
{
+ static const size_t tvlv_padding = sizeof(__be32);
struct batadv_elp_packet *elp_packet;
unsigned char *elp_buff;
u32 random_seqno;
size_t size;
int res = -ENOMEM;
- size = ETH_HLEN + NET_IP_ALIGN + BATADV_ELP_HLEN;
+ size = ETH_HLEN + NET_IP_ALIGN + BATADV_ELP_HLEN + tvlv_padding;
hard_iface->bat_v.elp_skb = dev_alloc_skb(size);
if (!hard_iface->bat_v.elp_skb)
goto out;
skb_reserve(hard_iface->bat_v.elp_skb, ETH_HLEN + NET_IP_ALIGN);
- elp_buff = skb_put_zero(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN);
+ elp_buff = skb_put_zero(hard_iface->bat_v.elp_skb,
+ BATADV_ELP_HLEN + tvlv_padding);
elp_packet = (struct batadv_elp_packet *)elp_buff;
elp_packet->packet_type = BATADV_ELP;
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 0fddc17106bd..5b71a289d04f 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -275,7 +275,7 @@ batadv_frag_merge_packets(struct hlist_head *chain)
kfree(entry);
packet = (struct batadv_frag_packet *)skb_out->data;
- size = ntohs(packet->total_size);
+ size = ntohs(packet->total_size) + hdr_size;
/* Make room for the rest of the fragments. */
if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 2920e06a5403..04c19a37e500 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -102,12 +102,18 @@ struct br_tunnel_info {
struct metadata_dst *tunnel_dst;
};
+/* private vlan flags */
+enum {
+ BR_VLFLAG_PER_PORT_STATS = BIT(0),
+};
+
/**
* struct net_bridge_vlan - per-vlan entry
*
* @vnode: rhashtable member
* @vid: VLAN id
* @flags: bridge vlan flags
+ * @priv_flags: private (in-kernel) bridge vlan flags
* @stats: per-cpu VLAN statistics
* @br: if MASTER flag set, this points to a bridge struct
* @port: if MASTER flag unset, this points to a port struct
@@ -127,6 +133,7 @@ struct net_bridge_vlan {
struct rhash_head tnode;
u16 vid;
u16 flags;
+ u16 priv_flags;
struct br_vlan_stats __percpu *stats;
union {
struct net_bridge *br;
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 8c9297a01947..e84be08b8285 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -197,7 +197,7 @@ static void nbp_vlan_rcu_free(struct rcu_head *rcu)
v = container_of(rcu, struct net_bridge_vlan, rcu);
WARN_ON(br_vlan_is_master(v));
/* if we had per-port stats configured then free them here */
- if (v->brvlan->stats != v->stats)
+ if (v->priv_flags & BR_VLFLAG_PER_PORT_STATS)
free_percpu(v->stats);
v->stats = NULL;
kfree(v);
@@ -264,6 +264,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
err = -ENOMEM;
goto out_filt;
}
+ v->priv_flags |= BR_VLFLAG_PER_PORT_STATS;
} else {
v->stats = masterv->stats;
}
diff --git a/net/can/raw.c b/net/can/raw.c
index 1051eee82581..3aab7664933f 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -745,18 +745,19 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
} else
ifindex = ro->ifindex;
- if (ro->fd_frames) {
+ dev = dev_get_by_index(sock_net(sk), ifindex);
+ if (!dev)
+ return -ENXIO;
+
+ err = -EINVAL;
+ if (ro->fd_frames && dev->mtu == CANFD_MTU) {
if (unlikely(size != CANFD_MTU && size != CAN_MTU))
- return -EINVAL;
+ goto put_dev;
} else {
if (unlikely(size != CAN_MTU))
- return -EINVAL;
+ goto put_dev;
}
- dev = dev_get_by_index(sock_net(sk), ifindex);
- if (!dev)
- return -ENXIO;
-
skb = sock_alloc_send_skb(sk, size + sizeof(struct can_skb_priv),
msg->msg_flags & MSG_DONTWAIT, &err);
if (!skb)
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 57fcc6b4bf6e..2f126eff275d 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -580,9 +580,15 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
struct bio_vec bvec;
int ret;
- /* sendpage cannot properly handle pages with page_count == 0,
- * we need to fallback to sendmsg if that's the case */
- if (page_count(page) >= 1)
+ /*
+ * sendpage cannot properly handle pages with page_count == 0,
+ * we need to fall back to sendmsg if that's the case.
+ *
+ * Same goes for slab pages: skb_can_coalesce() allows
+ * coalescing neighboring slab objects into a single frag which
+ * triggers one of hardened usercopy checks.
+ */
+ if (page_count(page) >= 1 && !PageSlab(page))
return __ceph_tcp_sendpage(sock, page, offset, size, more);
bvec.bv_page = page;
diff --git a/net/core/dev.c b/net/core/dev.c
index 77d43ae2a7bb..ddc551f24ba2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3272,7 +3272,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *de
}
skb = next;
- if (netif_xmit_stopped(txq) && skb) {
+ if (netif_tx_queue_stopped(txq) && skb) {
rc = NETDEV_TX_BUSY;
break;
}
@@ -5655,6 +5655,10 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->vlan_tci = 0;
skb->dev = napi->dev;
skb->skb_iif = 0;
+
+ /* eth_type_trans() assumes pkt_type is PACKET_HOST */
+ skb->pkt_type = PACKET_HOST;
+
skb->encapsulation = 0;
skb_shinfo(skb)->gso_type = 0;
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
@@ -5966,11 +5970,14 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
if (work_done)
timeout = n->dev->gro_flush_timeout;
+ /* When the NAPI instance uses a timeout and keeps postponing
+ * it, we need to bound somehow the time packets are kept in
+ * the GRO layer
+ */
+ napi_gro_flush(n, !!timeout);
if (timeout)
hrtimer_start(&n->timer, ns_to_ktime(timeout),
HRTIMER_MODE_REL_PINNED);
- else
- napi_gro_flush(n, false);
}
if (unlikely(!list_empty(&n->poll_list))) {
/* If n->poll_list is not empty, we need to mask irqs */
diff --git a/net/core/filter.c b/net/core/filter.c
index e521c5ebc7d1..9a1327eb25fa 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4852,18 +4852,17 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
} else {
struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
- u16 hnum = ntohs(tuple->ipv6.dport);
int sdif = inet6_sdif(skb);
if (proto == IPPROTO_TCP)
sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0,
src6, tuple->ipv6.sport,
- dst6, hnum,
+ dst6, ntohs(tuple->ipv6.dport),
dif, sdif, &refcounted);
else if (likely(ipv6_bpf_stub))
sk = ipv6_bpf_stub->udp6_lib_lookup(net,
src6, tuple->ipv6.sport,
- dst6, hnum,
+ dst6, tuple->ipv6.dport,
dif, sdif,
&udp_table, skb);
#endif
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 676f3ad629f9..588f475019d4 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1166,8 +1166,8 @@ ip_proto_again:
break;
}
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_PORTS)) {
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS) &&
+ !(key_control->flags & FLOW_DIS_IS_FRAGMENT)) {
key_ports = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_PORTS,
target_container);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 5da9552b186b..2b9fdbc43205 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -717,7 +717,8 @@ int netpoll_setup(struct netpoll *np)
read_lock_bh(&idev->lock);
list_for_each_entry(ifp, &idev->addr_list, if_list) {
- if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
+ if (!!(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) !=
+ !!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL))
continue;
np->local_ip.in6 = ifp->addr;
err = 0;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e01274bd5e3e..33d9227a8b80 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3367,7 +3367,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
cb->seq = 0;
}
ret = dumpit(skb, cb);
- if (ret < 0)
+ if (ret)
break;
}
cb->family = idx;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 946de0e24c87..a8217e221e19 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4854,6 +4854,11 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
nf_reset(skb);
nf_reset_trace(skb);
+#ifdef CONFIG_NET_SWITCHDEV
+ skb->offload_fwd_mark = 0;
+ skb->offload_mr_fwd_mark = 0;
+#endif
+
if (!xnet)
return;
@@ -4944,6 +4949,8 @@ static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
*
* This is a helper to do that correctly considering GSO_BY_FRAGS.
*
+ * @skb: GSO skb
+ *
* @seg_len: The segmented length (from skb_gso_*_seglen). In the
* GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
*
diff --git a/net/core/sock.c b/net/core/sock.c
index 6fcc4bc07d19..080a880a1761 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3279,6 +3279,7 @@ int sock_load_diag_module(int family, int protocol)
#ifdef CONFIG_INET
if (family == AF_INET &&
+ protocol != IPPROTO_RAW &&
!rcu_access_pointer(inet_protos[protocol]))
return -ENOENT;
#endif
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index bcb11f3a27c0..760a9e52e02b 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -178,21 +178,22 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
}
static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
- void *arg)
+ void *arg,
+ struct inet_frag_queue **prev)
{
struct inet_frags *f = nf->f;
struct inet_frag_queue *q;
- int err;
q = inet_frag_alloc(nf, f, arg);
- if (!q)
+ if (!q) {
+ *prev = ERR_PTR(-ENOMEM);
return NULL;
-
+ }
mod_timer(&q->timer, jiffies + nf->timeout);
- err = rhashtable_insert_fast(&nf->rhashtable, &q->node,
- f->rhash_params);
- if (err < 0) {
+ *prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key,
+ &q->node, f->rhash_params);
+ if (*prev) {
q->flags |= INET_FRAG_COMPLETE;
inet_frag_kill(q);
inet_frag_destroy(q);
@@ -204,22 +205,22 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
{
- struct inet_frag_queue *fq;
+ struct inet_frag_queue *fq = NULL, *prev;
if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
return NULL;
rcu_read_lock();
- fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
- if (fq) {
+ prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
+ if (!prev)
+ fq = inet_frag_create(nf, key, &prev);
+ if (prev && !IS_ERR(prev)) {
+ fq = prev;
if (!refcount_inc_not_zero(&fq->refcnt))
fq = NULL;
- rcu_read_unlock();
- return fq;
}
rcu_read_unlock();
-
- return inet_frag_create(nf, key);
+ return fq;
}
EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 9b0158fa431f..d6ee343fdb86 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -722,10 +722,14 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
if (ip_is_fragment(&iph)) {
skb = skb_share_check(skb, GFP_ATOMIC);
if (skb) {
- if (!pskb_may_pull(skb, netoff + iph.ihl * 4))
- return skb;
- if (pskb_trim_rcsum(skb, netoff + len))
- return skb;
+ if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) {
+ kfree_skb(skb);
+ return NULL;
+ }
+ if (pskb_trim_rcsum(skb, netoff + len)) {
+ kfree_skb(skb);
+ return NULL;
+ }
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
if (ip_defrag(net, skb, user))
return NULL;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c09219e7f230..5dbec21856f4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -939,7 +939,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
- unsigned int pagedlen = 0;
+ unsigned int pagedlen;
struct sk_buff *skb_prev;
alloc_new_skb:
skb_prev = skb;
@@ -956,6 +956,7 @@ alloc_new_skb:
if (datalen > mtu - fragheaderlen)
datalen = maxfraglen - fragheaderlen;
fraglen = datalen + fragheaderlen;
+ pagedlen = 0;
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 26c36cccabdc..fffcc130900e 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1246,7 +1246,7 @@ int ip_setsockopt(struct sock *sk, int level,
return -ENOPROTOOPT;
err = do_ip_setsockopt(sk, level, optname, optval, optlen);
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
optname < BPFILTER_IPT_SET_MAX)
err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
@@ -1559,7 +1559,7 @@ int ip_getsockopt(struct sock *sk, int level,
int err;
err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
if (optname >= BPFILTER_IPT_SO_GET_INFO &&
optname < BPFILTER_IPT_GET_MAX)
err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
@@ -1596,7 +1596,7 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
err = do_ip_getsockopt(sk, level, optname, optval, optlen,
MSG_CMSG_COMPAT);
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
if (optname >= BPFILTER_IPT_SO_GET_INFO &&
optname < BPFILTER_IPT_GET_MAX)
err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index dde671e97829..c248e0dccbe1 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -80,7 +80,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
iph->version = 4;
iph->ihl = sizeof(struct iphdr) >> 2;
- iph->frag_off = df;
+ iph->frag_off = ip_mtu_locked(&rt->dst) ? 0 : df;
iph->protocol = proto;
iph->tos = tos;
iph->daddr = dst;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index ce1512b02cb2..fd3f9e8a74da 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -81,9 +81,12 @@ static int __init masquerade_tg_init(void)
int ret;
ret = xt_register_target(&masquerade_tg_reg);
+ if (ret)
+ return ret;
- if (ret == 0)
- nf_nat_masquerade_ipv4_register_notifier();
+ ret = nf_nat_masquerade_ipv4_register_notifier();
+ if (ret)
+ xt_unregister_target(&masquerade_tg_reg);
return ret;
}
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index a9d5e013e555..41327bb99093 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -147,28 +147,50 @@ static struct notifier_block masq_inet_notifier = {
.notifier_call = masq_inet_event,
};
-static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
+static int masq_refcnt;
+static DEFINE_MUTEX(masq_mutex);
-void nf_nat_masquerade_ipv4_register_notifier(void)
+int nf_nat_masquerade_ipv4_register_notifier(void)
{
+ int ret = 0;
+
+ mutex_lock(&masq_mutex);
/* check if the notifier was already set */
- if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
- return;
+ if (++masq_refcnt > 1)
+ goto out_unlock;
/* Register for device down reports */
- register_netdevice_notifier(&masq_dev_notifier);
+ ret = register_netdevice_notifier(&masq_dev_notifier);
+ if (ret)
+ goto err_dec;
/* Register IP address change reports */
- register_inetaddr_notifier(&masq_inet_notifier);
+ ret = register_inetaddr_notifier(&masq_inet_notifier);
+ if (ret)
+ goto err_unregister;
+
+ mutex_unlock(&masq_mutex);
+ return ret;
+
+err_unregister:
+ unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+ masq_refcnt--;
+out_unlock:
+ mutex_unlock(&masq_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
void nf_nat_masquerade_ipv4_unregister_notifier(void)
{
+ mutex_lock(&masq_mutex);
/* check if the notifier still has clients */
- if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
- return;
+ if (--masq_refcnt > 0)
+ goto out_unlock;
unregister_netdevice_notifier(&masq_dev_notifier);
unregister_inetaddr_notifier(&masq_inet_notifier);
+out_unlock:
+ mutex_unlock(&masq_mutex);
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index f1193e1e928a..6847de1d1db8 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -69,7 +69,9 @@ static int __init nft_masq_ipv4_module_init(void)
if (ret < 0)
return ret;
- nf_nat_masquerade_ipv4_register_notifier();
+ ret = nf_nat_masquerade_ipv4_register_notifier();
+ if (ret)
+ nft_unregister_expr(&nft_masq_ipv4_type);
return ret;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2868ef28ce52..a9d9555a973f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -579,10 +579,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
u32 delta_us;
- if (!delta)
- delta = 1;
- delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
- tcp_rcv_rtt_update(tp, delta_us, 0);
+ if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+ if (!delta)
+ delta = 1;
+ delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+ tcp_rcv_rtt_update(tp, delta_us, 0);
+ }
}
}
@@ -2910,9 +2912,11 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
flag & FLAG_ACKED) {
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
- u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
- seq_rtt_us = ca_rtt_us = delta_us;
+ if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+ seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+ ca_rtt_us = seq_rtt_us;
+ }
}
rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
if (seq_rtt_us < 0)
@@ -4268,7 +4272,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
* If the sack array is full, forget about the last one.
*/
if (this_sack >= TCP_NUM_SACKS) {
- if (tp->compressed_ack)
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
tcp_send_ack(sk);
this_sack--;
tp->rx_opt.num_sacks--;
@@ -4363,6 +4367,7 @@ static bool tcp_try_coalesce(struct sock *sk,
if (TCP_SKB_CB(from)->has_rxtstamp) {
TCP_SKB_CB(to)->has_rxtstamp = true;
to->tstamp = from->tstamp;
+ skb_hwtstamps(to)->hwtstamp = skb_hwtstamps(from)->hwtstamp;
}
return true;
@@ -5188,7 +5193,17 @@ send_now:
if (!tcp_is_sack(tp) ||
tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
goto send_now;
- tp->compressed_ack++;
+
+ if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
+ tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
+ tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+ tp->compressed_ack = 0;
+ }
+
+ if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
+ goto send_now;
if (hrtimer_is_queued(&tp->compressed_ack_timer))
return;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9c34b97d365d..3f510cad0b3e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -180,10 +180,10 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
{
struct tcp_sock *tp = tcp_sk(sk);
- if (unlikely(tp->compressed_ack)) {
+ if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
- tp->compressed_ack);
- tp->compressed_ack = 0;
+ tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+ tp->compressed_ack = TCP_FASTRETRANS_THRESH;
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
__sock_put(sk);
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 676020663ce8..091c53925e4d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -40,15 +40,17 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
u32 elapsed, start_ts;
+ s32 remaining;
start_ts = tcp_retransmit_stamp(sk);
if (!icsk->icsk_user_timeout || !start_ts)
return icsk->icsk_rto;
elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
- if (elapsed >= icsk->icsk_user_timeout)
+ remaining = icsk->icsk_user_timeout - elapsed;
+ if (remaining <= 0)
return 1; /* user timeout has passed; fire ASAP */
- else
- return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(icsk->icsk_user_timeout - elapsed));
+
+ return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining));
}
/**
@@ -209,7 +211,7 @@ static bool retransmits_timed_out(struct sock *sk,
(boundary - linear_backoff_thresh) * TCP_RTO_MAX;
timeout = jiffies_to_msecs(timeout);
}
- return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= timeout;
+ return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
}
/* A write timeout has occurred. Process the after effects. */
@@ -740,7 +742,7 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
- if (tp->compressed_ack)
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
tcp_send_ack(sk);
} else {
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 63a808d5af15..045597b9a7c0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -179,7 +179,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp);
static void addrconf_dad_work(struct work_struct *w);
static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
bool send_na);
-static void addrconf_dad_run(struct inet6_dev *idev);
+static void addrconf_dad_run(struct inet6_dev *idev, bool restart);
static void addrconf_rs_timer(struct timer_list *t);
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
@@ -3439,6 +3439,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_change_info *change_info;
struct netdev_notifier_changeupper_info *info;
struct inet6_dev *idev = __in6_dev_get(dev);
struct net *net = dev_net(dev);
@@ -3513,7 +3514,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
break;
}
- if (idev) {
+ if (!IS_ERR_OR_NULL(idev)) {
if (idev->if_flags & IF_READY) {
/* device is already configured -
* but resend MLD reports, we might
@@ -3521,6 +3522,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
* multicast snooping switches
*/
ipv6_mc_up(idev);
+ change_info = ptr;
+ if (change_info->flags_changed & IFF_NOARP)
+ addrconf_dad_run(idev, true);
rt6_sync_up(dev, RTNH_F_LINKDOWN);
break;
}
@@ -3555,7 +3559,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (!IS_ERR_OR_NULL(idev)) {
if (run_pending)
- addrconf_dad_run(idev);
+ addrconf_dad_run(idev, false);
/* Device has an address by now */
rt6_sync_up(dev, RTNH_F_DEAD);
@@ -4173,16 +4177,19 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
addrconf_verify_rtnl();
}
-static void addrconf_dad_run(struct inet6_dev *idev)
+static void addrconf_dad_run(struct inet6_dev *idev, bool restart)
{
struct inet6_ifaddr *ifp;
read_lock_bh(&idev->lock);
list_for_each_entry(ifp, &idev->addr_list, if_list) {
spin_lock(&ifp->lock);
- if (ifp->flags & IFA_F_TENTATIVE &&
- ifp->state == INET6_IFADDR_STATE_DAD)
+ if ((ifp->flags & IFA_F_TENTATIVE &&
+ ifp->state == INET6_IFADDR_STATE_DAD) || restart) {
+ if (restart)
+ ifp->state = INET6_IFADDR_STATE_PREDAD;
addrconf_dad_kick(ifp);
+ }
spin_unlock(&ifp->lock);
}
read_unlock_bh(&idev->lock);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3f4d61017a69..f0cd291034f0 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1001,6 +1001,9 @@ static int __init inet6_init(void)
err = ip6_flowlabel_init();
if (err)
goto ip6_flowlabel_fail;
+ err = ipv6_anycast_init();
+ if (err)
+ goto ipv6_anycast_fail;
err = addrconf_init();
if (err)
goto addrconf_fail;
@@ -1091,6 +1094,8 @@ ipv6_frag_fail:
ipv6_exthdrs_fail:
addrconf_cleanup();
addrconf_fail:
+ ipv6_anycast_cleanup();
+ipv6_anycast_fail:
ip6_flowlabel_cleanup();
ip6_flowlabel_fail:
ndisc_late_cleanup();
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 4e0ff7031edd..94999058e110 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -44,8 +44,22 @@
#include <net/checksum.h>
+#define IN6_ADDR_HSIZE_SHIFT 8
+#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT)
+/* anycast address hash table
+ */
+static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(acaddr_hash_lock);
+
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
+static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+ u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+ return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
/*
* socket join an anycast group
*/
@@ -204,16 +218,39 @@ void ipv6_sock_ac_close(struct sock *sk)
rtnl_unlock();
}
+static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca)
+{
+ unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr);
+
+ spin_lock(&acaddr_hash_lock);
+ hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]);
+ spin_unlock(&acaddr_hash_lock);
+}
+
+static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca)
+{
+ spin_lock(&acaddr_hash_lock);
+ hlist_del_init_rcu(&aca->aca_addr_lst);
+ spin_unlock(&acaddr_hash_lock);
+}
+
static void aca_get(struct ifacaddr6 *aca)
{
refcount_inc(&aca->aca_refcnt);
}
+static void aca_free_rcu(struct rcu_head *h)
+{
+ struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu);
+
+ fib6_info_release(aca->aca_rt);
+ kfree(aca);
+}
+
static void aca_put(struct ifacaddr6 *ac)
{
if (refcount_dec_and_test(&ac->aca_refcnt)) {
- fib6_info_release(ac->aca_rt);
- kfree(ac);
+ call_rcu(&ac->rcu, aca_free_rcu);
}
}
@@ -229,6 +266,7 @@ static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
aca->aca_addr = *addr;
fib6_info_hold(f6i);
aca->aca_rt = f6i;
+ INIT_HLIST_NODE(&aca->aca_addr_lst);
aca->aca_users = 1;
/* aca_tstamp should be updated upon changes */
aca->aca_cstamp = aca->aca_tstamp = jiffies;
@@ -285,6 +323,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
aca_get(aca);
write_unlock_bh(&idev->lock);
+ ipv6_add_acaddr_hash(net, aca);
+
ip6_ins_rt(net, f6i);
addrconf_join_solict(idev->dev, &aca->aca_addr);
@@ -325,6 +365,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
else
idev->ac_list = aca->aca_next;
write_unlock_bh(&idev->lock);
+ ipv6_del_acaddr_hash(aca);
addrconf_leave_solict(idev, &aca->aca_addr);
ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
@@ -352,6 +393,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
idev->ac_list = aca->aca_next;
write_unlock_bh(&idev->lock);
+ ipv6_del_acaddr_hash(aca);
+
addrconf_leave_solict(idev, &aca->aca_addr);
ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
@@ -390,17 +433,25 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad
bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
const struct in6_addr *addr)
{
+ unsigned int hash = inet6_acaddr_hash(net, addr);
+ struct net_device *nh_dev;
+ struct ifacaddr6 *aca;
bool found = false;
rcu_read_lock();
if (dev)
found = ipv6_chk_acast_dev(dev, addr);
else
- for_each_netdev_rcu(net, dev)
- if (ipv6_chk_acast_dev(dev, addr)) {
+ hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash],
+ aca_addr_lst) {
+ nh_dev = fib6_info_nh_dev(aca->aca_rt);
+ if (!nh_dev || !net_eq(dev_net(nh_dev), net))
+ continue;
+ if (ipv6_addr_equal(&aca->aca_addr, addr)) {
found = true;
break;
}
+ }
rcu_read_unlock();
return found;
}
@@ -540,3 +591,24 @@ void ac6_proc_exit(struct net *net)
remove_proc_entry("anycast6", net->proc_net);
}
#endif
+
+/* Init / cleanup code
+ */
+int __init ipv6_anycast_init(void)
+{
+ int i;
+
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
+ return 0;
+}
+
+void ipv6_anycast_cleanup(void)
+{
+ int i;
+
+ spin_lock(&acaddr_hash_lock);
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
+ spin_unlock(&acaddr_hash_lock);
+}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1b8bc008b53b..ae3786132c23 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -591,7 +591,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
/* fib entries are never clones */
if (arg.filter.flags & RTM_F_CLONED)
- return skb->len;
+ goto out;
w = (void *)cb->args[2];
if (!w) {
@@ -621,7 +621,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
tb = fib6_get_table(net, arg.filter.table_id);
if (!tb) {
if (arg.filter.dump_all_families)
- return skb->len;
+ goto out;
NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
return -ENOENT;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 89e0d5118afe..827a3f5ff3bb 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1354,7 +1354,7 @@ emsgsize:
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
- unsigned int pagedlen = 0;
+ unsigned int pagedlen;
alloc_new_skb:
/* There's no room in the current skb */
if (skb)
@@ -1378,6 +1378,7 @@ alloc_new_skb:
if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
fraglen = datalen + fragheaderlen;
+ pagedlen = 0;
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 5ae8e1c51079..8b075f0bc351 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -24,7 +24,8 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
unsigned int hh_len;
struct dst_entry *dst;
struct flowi6 fl6 = {
- .flowi6_oif = sk ? sk->sk_bound_dev_if : 0,
+ .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if :
+ rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0,
.flowi6_mark = skb->mark,
.flowi6_uid = sock_net_uid(net, sk),
.daddr = iph->daddr,
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 491f808e356a..29c7f1915a96 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -58,8 +58,12 @@ static int __init masquerade_tg6_init(void)
int err;
err = xt_register_target(&masquerade_tg6_reg);
- if (err == 0)
- nf_nat_masquerade_ipv6_register_notifier();
+ if (err)
+ return err;
+
+ err = nf_nat_masquerade_ipv6_register_notifier();
+ if (err)
+ xt_unregister_target(&masquerade_tg6_reg);
return err;
}
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b8ac369f98ad..d219979c3e52 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -587,11 +587,16 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
*/
ret = -EINPROGRESS;
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
- fq->q.meat == fq->q.len &&
- nf_ct_frag6_reasm(fq, skb, dev))
- ret = 0;
- else
+ fq->q.meat == fq->q.len) {
+ unsigned long orefdst = skb->_skb_refdst;
+
+ skb->_skb_refdst = 0UL;
+ if (nf_ct_frag6_reasm(fq, skb, dev))
+ ret = 0;
+ skb->_skb_refdst = orefdst;
+ } else {
skb_dst_drop(skb);
+ }
out_unlock:
spin_unlock_bh(&fq->q.lock);
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 3e4bf2286abe..0ad0da5a2600 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -132,8 +132,8 @@ static void iterate_cleanup_work(struct work_struct *work)
* of ipv6 addresses being deleted), we also need to add an upper
* limit to the number of queued work items.
*/
-static int masq_inet_event(struct notifier_block *this,
- unsigned long event, void *ptr)
+static int masq_inet6_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
{
struct inet6_ifaddr *ifa = ptr;
const struct net_device *dev;
@@ -171,30 +171,53 @@ static int masq_inet_event(struct notifier_block *this,
return NOTIFY_DONE;
}
-static struct notifier_block masq_inet_notifier = {
- .notifier_call = masq_inet_event,
+static struct notifier_block masq_inet6_notifier = {
+ .notifier_call = masq_inet6_event,
};
-static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
+static int masq_refcnt;
+static DEFINE_MUTEX(masq_mutex);
-void nf_nat_masquerade_ipv6_register_notifier(void)
+int nf_nat_masquerade_ipv6_register_notifier(void)
{
+ int ret = 0;
+
+ mutex_lock(&masq_mutex);
/* check if the notifier is already set */
- if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
- return;
+ if (++masq_refcnt > 1)
+ goto out_unlock;
+
+ ret = register_netdevice_notifier(&masq_dev_notifier);
+ if (ret)
+ goto err_dec;
+
+ ret = register_inet6addr_notifier(&masq_inet6_notifier);
+ if (ret)
+ goto err_unregister;
- register_netdevice_notifier(&masq_dev_notifier);
- register_inet6addr_notifier(&masq_inet_notifier);
+ mutex_unlock(&masq_mutex);
+ return ret;
+
+err_unregister:
+ unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+ masq_refcnt--;
+out_unlock:
+ mutex_unlock(&masq_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
void nf_nat_masquerade_ipv6_unregister_notifier(void)
{
+ mutex_lock(&masq_mutex);
/* check if the notifier still has clients */
- if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
- return;
+ if (--masq_refcnt > 0)
+ goto out_unlock;
- unregister_inet6addr_notifier(&masq_inet_notifier);
+ unregister_inet6addr_notifier(&masq_inet6_notifier);
unregister_netdevice_notifier(&masq_dev_notifier);
+out_unlock:
+ mutex_unlock(&masq_mutex);
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index dd0122f3cffe..e06c82e9dfcd 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -70,7 +70,9 @@ static int __init nft_masq_ipv6_module_init(void)
if (ret < 0)
return ret;
- nf_nat_masquerade_ipv6_register_notifier();
+ ret = nf_nat_masquerade_ipv6_register_notifier();
+ if (ret)
+ nft_unregister_expr(&nft_masq_ipv6_type);
return ret;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 2a7423c39456..059f0531f7c1 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2232,8 +2232,7 @@ static void ip6_link_failure(struct sk_buff *skb)
if (rt) {
rcu_read_lock();
if (rt->rt6i_flags & RTF_CACHE) {
- if (dst_hold_safe(&rt->dst))
- rt6_remove_exception_rt(rt);
+ rt6_remove_exception_rt(rt);
} else {
struct fib6_info *from;
struct fib6_node *fn;
@@ -2360,10 +2359,13 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu);
void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
{
+ int oif = sk->sk_bound_dev_if;
struct dst_entry *dst;
- ip6_update_pmtu(skb, sock_net(sk), mtu,
- sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
+ if (!oif && skb->dev)
+ oif = l3mdev_master_ifindex(skb->dev);
+
+ ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
dst = __sk_dst_get(sk);
if (!dst || !dst->obsolete ||
@@ -3214,8 +3216,8 @@ static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
if (cfg->fc_flags & RTF_GATEWAY &&
!ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
goto out;
- if (dst_hold_safe(&rt->dst))
- rc = rt6_remove_exception_rt(rt);
+
+ rc = rt6_remove_exception_rt(rt);
out:
return rc;
}
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 82cdf9020b53..26f1d435696a 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1490,12 +1490,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
goto err_sock;
}
- sk = sock->sk;
-
- sock_hold(sk);
- tunnel->sock = sk;
tunnel->l2tp_net = net;
-
pn = l2tp_pernet(net);
spin_lock_bh(&pn->l2tp_tunnel_list_lock);
@@ -1510,6 +1505,10 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+ sk = sock->sk;
+ sock_hold(sk);
+ tunnel->sock = sk;
+
if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
struct udp_tunnel_sock_cfg udp_cfg = {
.sk_user_data = tunnel,
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index bc4bd247bb7d..1577f2f76060 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -55,11 +55,15 @@ MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
MODULE_DESCRIPTION("core IP set support");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
-/* When the nfnl mutex is held: */
+/* When the nfnl mutex or ip_set_ref_lock is held: */
#define ip_set_dereference(p) \
- rcu_dereference_protected(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
+ rcu_dereference_protected(p, \
+ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \
+ lockdep_is_held(&ip_set_ref_lock))
#define ip_set(inst, id) \
ip_set_dereference((inst)->ip_set_list)[id]
+#define ip_set_ref_netlink(inst,id) \
+ rcu_dereference_raw((inst)->ip_set_list)[id]
/* The set types are implemented in modules and registered set types
* can be found in ip_set_type_list. Adding/deleting types is
@@ -693,21 +697,20 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index)
EXPORT_SYMBOL_GPL(ip_set_put_byindex);
/* Get the name of a set behind a set index.
- * We assume the set is referenced, so it does exist and
- * can't be destroyed. The set cannot be renamed due to
- * the referencing either.
- *
+ * Set itself is protected by RCU, but its name isn't: to protect against
+ * renaming, grab ip_set_ref_lock as reader (see ip_set_rename()) and copy the
+ * name.
*/
-const char *
-ip_set_name_byindex(struct net *net, ip_set_id_t index)
+void
+ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
{
- const struct ip_set *set = ip_set_rcu_get(net, index);
+ struct ip_set *set = ip_set_rcu_get(net, index);
BUG_ON(!set);
- BUG_ON(set->ref == 0);
- /* Referenced, so it's safe */
- return set->name;
+ read_lock_bh(&ip_set_ref_lock);
+ strncpy(name, set->name, IPSET_MAXNAMELEN);
+ read_unlock_bh(&ip_set_ref_lock);
}
EXPORT_SYMBOL_GPL(ip_set_name_byindex);
@@ -961,7 +964,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl,
/* Wraparound */
goto cleanup;
- list = kcalloc(i, sizeof(struct ip_set *), GFP_KERNEL);
+ list = kvcalloc(i, sizeof(struct ip_set *), GFP_KERNEL);
if (!list)
goto cleanup;
/* nfnl mutex is held, both lists are valid */
@@ -973,7 +976,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl,
/* Use new list */
index = inst->ip_set_max;
inst->ip_set_max = i;
- kfree(tmp);
+ kvfree(tmp);
ret = 0;
} else if (ret) {
goto cleanup;
@@ -1153,7 +1156,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
if (!set)
return -ENOENT;
- read_lock_bh(&ip_set_ref_lock);
+ write_lock_bh(&ip_set_ref_lock);
if (set->ref != 0) {
ret = -IPSET_ERR_REFERENCED;
goto out;
@@ -1170,7 +1173,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
strncpy(set->name, name2, IPSET_MAXNAMELEN);
out:
- read_unlock_bh(&ip_set_ref_lock);
+ write_unlock_bh(&ip_set_ref_lock);
return ret;
}
@@ -1252,7 +1255,7 @@ ip_set_dump_done(struct netlink_callback *cb)
struct ip_set_net *inst =
(struct ip_set_net *)cb->args[IPSET_CB_NET];
ip_set_id_t index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
- struct ip_set *set = ip_set(inst, index);
+ struct ip_set *set = ip_set_ref_netlink(inst, index);
if (set->variant->uref)
set->variant->uref(set, cb, false);
@@ -1441,7 +1444,7 @@ next_set:
release_refcount:
/* If there was an error or set is done, release set */
if (ret || !cb->args[IPSET_CB_ARG0]) {
- set = ip_set(inst, index);
+ set = ip_set_ref_netlink(inst, index);
if (set->variant->uref)
set->variant->uref(set, cb, false);
pr_debug("release set %s\n", set->name);
@@ -2059,7 +2062,7 @@ ip_set_net_init(struct net *net)
if (inst->ip_set_max >= IPSET_INVALID_ID)
inst->ip_set_max = IPSET_INVALID_ID - 1;
- list = kcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL);
+ list = kvcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL);
if (!list)
return -ENOMEM;
inst->is_deleted = false;
@@ -2087,7 +2090,7 @@ ip_set_net_exit(struct net *net)
}
}
nfnl_unlock(NFNL_SUBSYS_IPSET);
- kfree(rcu_dereference_protected(inst->ip_set_list, 1));
+ kvfree(rcu_dereference_protected(inst->ip_set_list, 1));
}
static struct pernet_operations ip_set_net_ops = {
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index d391485a6acd..613e18e720a4 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -213,13 +213,13 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_CIDR]) {
e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
+ if (e.cidr[0] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
}
if (tb[IPSET_ATTR_CIDR2]) {
e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
- if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
+ if (e.cidr[1] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
}
@@ -493,13 +493,13 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_CIDR]) {
e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
+ if (e.cidr[0] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
}
if (tb[IPSET_ATTR_CIDR2]) {
e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
- if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
+ if (e.cidr[1] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
}
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 072a658fde04..4eef55da0878 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -148,9 +148,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
{
struct set_elem *e = container_of(rcu, struct set_elem, rcu);
struct ip_set *set = e->set;
- struct list_set *map = set->data;
- ip_set_put_byindex(map->net, e->id);
ip_set_ext_destroy(set, e);
kfree(e);
}
@@ -158,15 +156,21 @@ __list_set_del_rcu(struct rcu_head * rcu)
static inline void
list_set_del(struct ip_set *set, struct set_elem *e)
{
+ struct list_set *map = set->data;
+
set->elements--;
list_del_rcu(&e->list);
+ ip_set_put_byindex(map->net, e->id);
call_rcu(&e->rcu, __list_set_del_rcu);
}
static inline void
-list_set_replace(struct set_elem *e, struct set_elem *old)
+list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
{
+ struct list_set *map = set->data;
+
list_replace_rcu(&old->list, &e->list);
+ ip_set_put_byindex(map->net, old->id);
call_rcu(&old->rcu, __list_set_del_rcu);
}
@@ -298,7 +302,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
INIT_LIST_HEAD(&e->list);
list_set_init_extensions(set, ext, e);
if (n)
- list_set_replace(e, n);
+ list_set_replace(set, e, n);
else if (next)
list_add_tail_rcu(&e->list, &next->list);
else if (prev)
@@ -486,6 +490,7 @@ list_set_list(const struct ip_set *set,
const struct list_set *map = set->data;
struct nlattr *atd, *nested;
u32 i = 0, first = cb->args[IPSET_CB_ARG0];
+ char name[IPSET_MAXNAMELEN];
struct set_elem *e;
int ret = 0;
@@ -504,8 +509,8 @@ list_set_list(const struct ip_set *set,
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested)
goto nla_put_failure;
- if (nla_put_string(skb, IPSET_ATTR_NAME,
- ip_set_name_byindex(map->net, e->id)))
+ ip_set_name_byindex(map->net, e->id, name);
+ if (nla_put_string(skb, IPSET_ATTR_NAME, name))
goto nla_put_failure;
if (ip_set_put_extensions(skb, set, e, true))
goto nla_put_failure;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 83395bf6dc35..432141f04af3 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3980,6 +3980,9 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
static struct notifier_block ip_vs_dst_notifier = {
.notifier_call = ip_vs_dst_event,
+#ifdef CONFIG_IP_VS_IPV6
+ .priority = ADDRCONF_NOTIFY_PRIORITY + 5,
+#endif
};
int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 02ca7df793f5..b6d0f6deea86 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -49,6 +49,7 @@ struct nf_conncount_tuple {
struct nf_conntrack_zone zone;
int cpu;
u32 jiffies32;
+ bool dead;
struct rcu_head rcu_head;
};
@@ -106,15 +107,16 @@ nf_conncount_add(struct nf_conncount_list *list,
conn->zone = *zone;
conn->cpu = raw_smp_processor_id();
conn->jiffies32 = (u32)jiffies;
- spin_lock(&list->list_lock);
+ conn->dead = false;
+ spin_lock_bh(&list->list_lock);
if (list->dead == true) {
kmem_cache_free(conncount_conn_cachep, conn);
- spin_unlock(&list->list_lock);
+ spin_unlock_bh(&list->list_lock);
return NF_CONNCOUNT_SKIP;
}
list_add_tail(&conn->node, &list->head);
list->count++;
- spin_unlock(&list->list_lock);
+ spin_unlock_bh(&list->list_lock);
return NF_CONNCOUNT_ADDED;
}
EXPORT_SYMBOL_GPL(nf_conncount_add);
@@ -132,19 +134,22 @@ static bool conn_free(struct nf_conncount_list *list,
{
bool free_entry = false;
- spin_lock(&list->list_lock);
+ spin_lock_bh(&list->list_lock);
- if (list->count == 0) {
- spin_unlock(&list->list_lock);
- return free_entry;
+ if (conn->dead) {
+ spin_unlock_bh(&list->list_lock);
+ return free_entry;
}
list->count--;
+ conn->dead = true;
list_del_rcu(&conn->node);
- if (list->count == 0)
+ if (list->count == 0) {
+ list->dead = true;
free_entry = true;
+ }
- spin_unlock(&list->list_lock);
+ spin_unlock_bh(&list->list_lock);
call_rcu(&conn->rcu_head, __conn_free);
return free_entry;
}
@@ -245,7 +250,7 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
{
spin_lock_init(&list->list_lock);
INIT_LIST_HEAD(&list->head);
- list->count = 1;
+ list->count = 0;
list->dead = false;
}
EXPORT_SYMBOL_GPL(nf_conncount_list_init);
@@ -259,6 +264,7 @@ bool nf_conncount_gc_list(struct net *net,
struct nf_conn *found_ct;
unsigned int collected = 0;
bool free_entry = false;
+ bool ret = false;
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
found = find_or_evict(net, list, conn, &free_entry);
@@ -288,7 +294,15 @@ bool nf_conncount_gc_list(struct net *net,
if (collected > CONNCOUNT_GC_MAX_NODES)
return false;
}
- return false;
+
+ spin_lock_bh(&list->list_lock);
+ if (!list->count) {
+ list->dead = true;
+ ret = true;
+ }
+ spin_unlock_bh(&list->list_lock);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_conncount_gc_list);
@@ -309,11 +323,8 @@ static void tree_nodes_free(struct rb_root *root,
while (gc_count) {
rbconn = gc_nodes[--gc_count];
spin_lock(&rbconn->list.list_lock);
- if (rbconn->list.count == 0 && rbconn->list.dead == false) {
- rbconn->list.dead = true;
- rb_erase(&rbconn->node, root);
- call_rcu(&rbconn->rcu_head, __tree_nodes_free);
- }
+ rb_erase(&rbconn->node, root);
+ call_rcu(&rbconn->rcu_head, __tree_nodes_free);
spin_unlock(&rbconn->list.list_lock);
}
}
@@ -414,6 +425,7 @@ insert_tree(struct net *net,
nf_conncount_list_init(&rbconn->list);
list_add(&conn->node, &rbconn->list.head);
count = 1;
+ rbconn->list.count = count;
rb_link_node(&rbconn->node, parent, rbnode);
rb_insert_color(&rbconn->node, root);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ca1168d67fac..e92e749aff53 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1073,19 +1073,22 @@ static unsigned int early_drop_list(struct net *net,
return drops;
}
-static noinline int early_drop(struct net *net, unsigned int _hash)
+static noinline int early_drop(struct net *net, unsigned int hash)
{
- unsigned int i;
+ unsigned int i, bucket;
for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
struct hlist_nulls_head *ct_hash;
- unsigned int hash, hsize, drops;
+ unsigned int hsize, drops;
rcu_read_lock();
nf_conntrack_get_ht(&ct_hash, &hsize);
- hash = reciprocal_scale(_hash++, hsize);
+ if (!i)
+ bucket = reciprocal_scale(hash, hsize);
+ else
+ bucket = (bucket + 1) % hsize;
- drops = early_drop_list(net, &ct_hash[hash]);
+ drops = early_drop_list(net, &ct_hash[bucket]);
rcu_read_unlock();
if (drops) {
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 171e9e122e5f..023c1445bc39 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -384,11 +384,6 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
},
};
-static inline struct nf_dccp_net *dccp_pernet(struct net *net)
-{
- return &net->ct.nf_ct_proto.dccp;
-}
-
static noinline bool
dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
const struct dccp_hdr *dh)
@@ -401,7 +396,7 @@ dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE];
switch (state) {
default:
- dn = dccp_pernet(net);
+ dn = nf_dccp_pernet(net);
if (dn->dccp_loose == 0) {
msg = "not picking up existing connection ";
goto out_invalid;
@@ -568,7 +563,7 @@ static int dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
timeouts = nf_ct_timeout_lookup(ct);
if (!timeouts)
- timeouts = dccp_pernet(nf_ct_net(ct))->dccp_timeout;
+ timeouts = nf_dccp_pernet(nf_ct_net(ct))->dccp_timeout;
nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
return NF_ACCEPT;
@@ -681,7 +676,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
- struct nf_dccp_net *dn = dccp_pernet(net);
+ struct nf_dccp_net *dn = nf_dccp_pernet(net);
unsigned int *timeouts = data;
int i;
@@ -814,7 +809,7 @@ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
static int dccp_init_net(struct net *net)
{
- struct nf_dccp_net *dn = dccp_pernet(net);
+ struct nf_dccp_net *dn = nf_dccp_pernet(net);
struct nf_proto_net *pn = &dn->pn;
if (!pn->users) {
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index e10e867e0b55..5da19d5fbc76 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -27,11 +27,6 @@ static bool nf_generic_should_process(u8 proto)
}
}
-static inline struct nf_generic_net *generic_pernet(struct net *net)
-{
- return &net->ct.nf_ct_proto.generic;
-}
-
static bool generic_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
struct net *net, struct nf_conntrack_tuple *tuple)
@@ -58,7 +53,7 @@ static int generic_packet(struct nf_conn *ct,
}
if (!timeout)
- timeout = &generic_pernet(nf_ct_net(ct))->timeout;
+ timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout;
nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
return NF_ACCEPT;
@@ -72,7 +67,7 @@ static int generic_packet(struct nf_conn *ct,
static int generic_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
- struct nf_generic_net *gn = generic_pernet(net);
+ struct nf_generic_net *gn = nf_generic_pernet(net);
unsigned int *timeout = data;
if (!timeout)
@@ -138,7 +133,7 @@ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
static int generic_init_net(struct net *net)
{
- struct nf_generic_net *gn = generic_pernet(net);
+ struct nf_generic_net *gn = nf_generic_pernet(net);
struct nf_proto_net *pn = &gn->pn;
gn->timeout = nf_ct_generic_timeout;
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 9b48dc8b4b88..2a5e56c6d8d9 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -43,24 +43,12 @@
#include <linux/netfilter/nf_conntrack_proto_gre.h>
#include <linux/netfilter/nf_conntrack_pptp.h>
-enum grep_conntrack {
- GRE_CT_UNREPLIED,
- GRE_CT_REPLIED,
- GRE_CT_MAX
-};
-
static const unsigned int gre_timeouts[GRE_CT_MAX] = {
[GRE_CT_UNREPLIED] = 30*HZ,
[GRE_CT_REPLIED] = 180*HZ,
};
static unsigned int proto_gre_net_id __read_mostly;
-struct netns_proto_gre {
- struct nf_proto_net nf;
- rwlock_t keymap_lock;
- struct list_head keymap_list;
- unsigned int gre_timeouts[GRE_CT_MAX];
-};
static inline struct netns_proto_gre *gre_pernet(struct net *net)
{
@@ -402,6 +390,8 @@ static int __init nf_ct_proto_gre_init(void)
{
int ret;
+ BUILD_BUG_ON(offsetof(struct netns_proto_gre, nf) != 0);
+
ret = register_pernet_subsys(&proto_gre_net_ops);
if (ret < 0)
goto out_pernet;
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index 3598520bd19b..de64d8a5fdfd 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -25,11 +25,6 @@
static const unsigned int nf_ct_icmp_timeout = 30*HZ;
-static inline struct nf_icmp_net *icmp_pernet(struct net *net)
-{
- return &net->ct.nf_ct_proto.icmp;
-}
-
static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
struct net *net, struct nf_conntrack_tuple *tuple)
{
@@ -103,7 +98,7 @@ static int icmp_packet(struct nf_conn *ct,
}
if (!timeout)
- timeout = &icmp_pernet(nf_ct_net(ct))->timeout;
+ timeout = &nf_icmp_pernet(nf_ct_net(ct))->timeout;
nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
return NF_ACCEPT;
@@ -275,7 +270,7 @@ static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
unsigned int *timeout = data;
- struct nf_icmp_net *in = icmp_pernet(net);
+ struct nf_icmp_net *in = nf_icmp_pernet(net);
if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
if (!timeout)
@@ -337,7 +332,7 @@ static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
static int icmp_init_net(struct net *net)
{
- struct nf_icmp_net *in = icmp_pernet(net);
+ struct nf_icmp_net *in = nf_icmp_pernet(net);
struct nf_proto_net *pn = &in->pn;
in->timeout = nf_ct_icmp_timeout;
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index 378618feed5d..a15eefb8e317 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -30,11 +30,6 @@
static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
-static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
-{
- return &net->ct.nf_ct_proto.icmpv6;
-}
-
static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
struct net *net,
@@ -87,7 +82,7 @@ static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
static unsigned int *icmpv6_get_timeouts(struct net *net)
{
- return &icmpv6_pernet(net)->timeout;
+ return &nf_icmpv6_pernet(net)->timeout;
}
/* Returns verdict for packet, or -1 for invalid. */
@@ -286,7 +281,7 @@ static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
unsigned int *timeout = data;
- struct nf_icmp_net *in = icmpv6_pernet(net);
+ struct nf_icmp_net *in = nf_icmpv6_pernet(net);
if (!timeout)
timeout = icmpv6_get_timeouts(net);
@@ -348,7 +343,7 @@ static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
static int icmpv6_init_net(struct net *net)
{
- struct nf_icmp_net *in = icmpv6_pernet(net);
+ struct nf_icmp_net *in = nf_icmpv6_pernet(net);
struct nf_proto_net *pn = &in->pn;
in->timeout = nf_ct_icmpv6_timeout;
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 3d719d3eb9a3..d53e3e78f605 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -146,11 +146,6 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
}
};
-static inline struct nf_sctp_net *sctp_pernet(struct net *net)
-{
- return &net->ct.nf_ct_proto.sctp;
-}
-
#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* Print out the private part of the conntrack. */
static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@@ -480,7 +475,7 @@ static int sctp_packet(struct nf_conn *ct,
timeouts = nf_ct_timeout_lookup(ct);
if (!timeouts)
- timeouts = sctp_pernet(nf_ct_net(ct))->timeouts;
+ timeouts = nf_sctp_pernet(nf_ct_net(ct))->timeouts;
nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
@@ -599,7 +594,7 @@ static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
unsigned int *timeouts = data;
- struct nf_sctp_net *sn = sctp_pernet(net);
+ struct nf_sctp_net *sn = nf_sctp_pernet(net);
int i;
/* set default SCTP timeouts. */
@@ -736,7 +731,7 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
static int sctp_init_net(struct net *net)
{
- struct nf_sctp_net *sn = sctp_pernet(net);
+ struct nf_sctp_net *sn = nf_sctp_pernet(net);
struct nf_proto_net *pn = &sn->pn;
if (!pn->users) {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 1bcf9984d45e..4dcbd51a8e97 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -272,11 +272,6 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
}
};
-static inline struct nf_tcp_net *tcp_pernet(struct net *net)
-{
- return &net->ct.nf_ct_proto.tcp;
-}
-
#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* Print out the private part of the conntrack. */
static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@@ -475,7 +470,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
const struct tcphdr *tcph)
{
struct net *net = nf_ct_net(ct);
- struct nf_tcp_net *tn = tcp_pernet(net);
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
struct ip_ct_tcp_state *sender = &state->seen[dir];
struct ip_ct_tcp_state *receiver = &state->seen[!dir];
const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
@@ -767,7 +762,7 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
{
enum tcp_conntrack new_state;
struct net *net = nf_ct_net(ct);
- const struct nf_tcp_net *tn = tcp_pernet(net);
+ const struct nf_tcp_net *tn = nf_tcp_pernet(net);
const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
@@ -841,7 +836,7 @@ static int tcp_packet(struct nf_conn *ct,
const struct nf_hook_state *state)
{
struct net *net = nf_ct_net(ct);
- struct nf_tcp_net *tn = tcp_pernet(net);
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
struct nf_conntrack_tuple *tuple;
enum tcp_conntrack new_state, old_state;
unsigned int index, *timeouts;
@@ -1283,7 +1278,7 @@ static unsigned int tcp_nlattr_tuple_size(void)
static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
- struct nf_tcp_net *tn = tcp_pernet(net);
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
unsigned int *timeouts = data;
int i;
@@ -1508,7 +1503,7 @@ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
static int tcp_init_net(struct net *net)
{
- struct nf_tcp_net *tn = tcp_pernet(net);
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
struct nf_proto_net *pn = &tn->pn;
if (!pn->users) {
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index a7aa70370913..c879d8d78cfd 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -32,14 +32,9 @@ static const unsigned int udp_timeouts[UDP_CT_MAX] = {
[UDP_CT_REPLIED] = 180*HZ,
};
-static inline struct nf_udp_net *udp_pernet(struct net *net)
-{
- return &net->ct.nf_ct_proto.udp;
-}
-
static unsigned int *udp_get_timeouts(struct net *net)
{
- return udp_pernet(net)->timeouts;
+ return nf_udp_pernet(net)->timeouts;
}
static void udp_error_log(const struct sk_buff *skb,
@@ -212,7 +207,7 @@ static int udp_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
unsigned int *timeouts = data;
- struct nf_udp_net *un = udp_pernet(net);
+ struct nf_udp_net *un = nf_udp_pernet(net);
if (!timeouts)
timeouts = un->timeouts;
@@ -292,7 +287,7 @@ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
static int udp_init_net(struct net *net)
{
- struct nf_udp_net *un = udp_pernet(net);
+ struct nf_udp_net *un = nf_udp_pernet(net);
struct nf_proto_net *pn = &un->pn;
if (!pn->users) {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 42487d01a3ed..2e61aab6ed73 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2457,7 +2457,7 @@ err:
static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
struct nft_rule *rule)
{
- struct nft_expr *expr;
+ struct nft_expr *expr, *next;
/*
* Careful: some expressions might not be initialized in case this
@@ -2465,8 +2465,9 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
*/
expr = nft_expr_first(rule);
while (expr != nft_expr_last(rule) && expr->ops) {
+ next = nft_expr_next(expr);
nf_tables_expr_destroy(ctx, expr);
- expr = nft_expr_next(expr);
+ expr = next;
}
kfree(rule);
}
@@ -2589,17 +2590,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
if (chain->use == UINT_MAX)
return -EOVERFLOW;
- }
-
- if (nla[NFTA_RULE_POSITION]) {
- if (!(nlh->nlmsg_flags & NLM_F_CREATE))
- return -EOPNOTSUPP;
- pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
- old_rule = __nft_rule_lookup(chain, pos_handle);
- if (IS_ERR(old_rule)) {
- NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
- return PTR_ERR(old_rule);
+ if (nla[NFTA_RULE_POSITION]) {
+ pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
+ old_rule = __nft_rule_lookup(chain, pos_handle);
+ if (IS_ERR(old_rule)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
+ return PTR_ERR(old_rule);
+ }
}
}
@@ -2669,21 +2667,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
}
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
- if (!nft_is_active_next(net, old_rule)) {
- err = -ENOENT;
- goto err2;
- }
- trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
- old_rule);
+ trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
if (trans == NULL) {
err = -ENOMEM;
goto err2;
}
- nft_deactivate_next(net, old_rule);
- chain->use--;
-
- if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
- err = -ENOMEM;
+ err = nft_delrule(&ctx, old_rule);
+ if (err < 0) {
+ nft_trans_destroy(trans);
goto err2;
}
@@ -6324,7 +6315,7 @@ static void nf_tables_commit_chain_free_rules_old(struct nft_rule **rules)
call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old);
}
-static void nf_tables_commit_chain_active(struct net *net, struct nft_chain *chain)
+static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
{
struct nft_rule **g0, **g1;
bool next_genbit;
@@ -6441,11 +6432,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
/* step 2. Make rules_gen_X visible to packet path */
list_for_each_entry(table, &net->nft.tables, list) {
- list_for_each_entry(chain, &table->chains, list) {
- if (!nft_is_active_next(net, chain))
- continue;
- nf_tables_commit_chain_active(net, chain);
- }
+ list_for_each_entry(chain, &table->chains, list)
+ nf_tables_commit_chain(net, chain);
}
/*
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index e7a50af1b3d6..109b0d27345a 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -382,7 +382,8 @@ err:
static int
cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
u32 seq, u32 type, int event, u16 l3num,
- const struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto,
+ const unsigned int *timeouts)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
@@ -408,7 +409,7 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
if (!nest_parms)
goto nla_put_failure;
- ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL);
+ ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts);
if (ret < 0)
goto nla_put_failure;
@@ -430,6 +431,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
struct netlink_ext_ack *extack)
{
const struct nf_conntrack_l4proto *l4proto;
+ unsigned int *timeouts = NULL;
struct sk_buff *skb2;
int ret, err;
__u16 l3num;
@@ -442,12 +444,55 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
l4proto = nf_ct_l4proto_find_get(l4num);
- /* This protocol is not supported, skip. */
- if (l4proto->l4proto != l4num) {
- err = -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ if (l4proto->l4proto != l4num)
goto err;
+
+ switch (l4proto->l4proto) {
+ case IPPROTO_ICMP:
+ timeouts = &nf_icmp_pernet(net)->timeout;
+ break;
+ case IPPROTO_TCP:
+ timeouts = nf_tcp_pernet(net)->timeouts;
+ break;
+ case IPPROTO_UDP: /* fallthrough */
+ case IPPROTO_UDPLITE:
+ timeouts = nf_udp_pernet(net)->timeouts;
+ break;
+ case IPPROTO_DCCP:
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+ timeouts = nf_dccp_pernet(net)->dccp_timeout;
+#endif
+ break;
+ case IPPROTO_ICMPV6:
+ timeouts = &nf_icmpv6_pernet(net)->timeout;
+ break;
+ case IPPROTO_SCTP:
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+ timeouts = nf_sctp_pernet(net)->timeouts;
+#endif
+ break;
+ case IPPROTO_GRE:
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ if (l4proto->net_id) {
+ struct netns_proto_gre *net_gre;
+
+ net_gre = net_generic(net, *l4proto->net_id);
+ timeouts = net_gre->gre_timeouts;
+ }
+#endif
+ break;
+ case 255:
+ timeouts = &nf_generic_pernet(net)->timeout;
+ break;
+ default:
+ WARN_ONCE(1, "Missing timeouts for proto %d", l4proto->l4proto);
+ break;
}
+ if (!timeouts)
+ goto err;
+
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (skb2 == NULL) {
err = -ENOMEM;
@@ -458,8 +503,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
nlh->nlmsg_seq,
NFNL_MSG_TYPE(nlh->nlmsg_type),
IPCTNL_MSG_TIMEOUT_DEFAULT_SET,
- l3num,
- l4proto);
+ l3num, l4proto, timeouts);
if (ret <= 0) {
kfree_skb(skb2);
err = -ENOMEM;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 768292eac2a4..7334e0b80a5e 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -54,9 +54,11 @@ static bool nft_xt_put(struct nft_xt *xt)
return false;
}
-static int nft_compat_chain_validate_dependency(const char *tablename,
- const struct nft_chain *chain)
+static int nft_compat_chain_validate_dependency(const struct nft_ctx *ctx,
+ const char *tablename)
{
+ enum nft_chain_types type = NFT_CHAIN_T_DEFAULT;
+ const struct nft_chain *chain = ctx->chain;
const struct nft_base_chain *basechain;
if (!tablename ||
@@ -64,9 +66,12 @@ static int nft_compat_chain_validate_dependency(const char *tablename,
return 0;
basechain = nft_base_chain(chain);
- if (strcmp(tablename, "nat") == 0 &&
- basechain->type->type != NFT_CHAIN_T_NAT)
- return -EINVAL;
+ if (strcmp(tablename, "nat") == 0) {
+ if (ctx->family != NFPROTO_BRIDGE)
+ type = NFT_CHAIN_T_NAT;
+ if (basechain->type->type != type)
+ return -EINVAL;
+ }
return 0;
}
@@ -342,8 +347,7 @@ static int nft_target_validate(const struct nft_ctx *ctx,
if (target->hooks && !(hook_mask & target->hooks))
return -EINVAL;
- ret = nft_compat_chain_validate_dependency(target->table,
- ctx->chain);
+ ret = nft_compat_chain_validate_dependency(ctx, target->table);
if (ret < 0)
return ret;
}
@@ -516,6 +520,7 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
void *info)
{
struct xt_match *match = expr->ops->data;
+ struct module *me = match->me;
struct xt_mtdtor_param par;
par.net = ctx->net;
@@ -526,7 +531,7 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
par.match->destroy(&par);
if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
- module_put(match->me);
+ module_put(me);
}
static void
@@ -590,8 +595,7 @@ static int nft_match_validate(const struct nft_ctx *ctx,
if (match->hooks && !(hook_mask & match->hooks))
return -EINVAL;
- ret = nft_compat_chain_validate_dependency(match->table,
- ctx->chain);
+ ret = nft_compat_chain_validate_dependency(ctx, match->table);
if (ret < 0)
return ret;
}
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index e82d9a966c45..974525eb92df 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -214,7 +214,9 @@ static int __init nft_flow_offload_module_init(void)
{
int err;
- register_netdevice_notifier(&flow_offload_netdev_notifier);
+ err = register_netdevice_notifier(&flow_offload_netdev_notifier);
+ if (err)
+ goto err;
err = nft_register_expr(&nft_flow_offload_type);
if (err < 0)
@@ -224,6 +226,7 @@ static int __init nft_flow_offload_module_init(void)
register_expr:
unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+err:
return err;
}
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index 649d1700ec5b..3cc1b3dc3c3c 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -24,7 +24,6 @@ struct nft_ng_inc {
u32 modulus;
atomic_t counter;
u32 offset;
- struct nft_set *map;
};
static u32 nft_ng_inc_gen(struct nft_ng_inc *priv)
@@ -48,34 +47,11 @@ static void nft_ng_inc_eval(const struct nft_expr *expr,
regs->data[priv->dreg] = nft_ng_inc_gen(priv);
}
-static void nft_ng_inc_map_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
-{
- struct nft_ng_inc *priv = nft_expr_priv(expr);
- const struct nft_set *map = priv->map;
- const struct nft_set_ext *ext;
- u32 result;
- bool found;
-
- result = nft_ng_inc_gen(priv);
- found = map->ops->lookup(nft_net(pkt), map, &result, &ext);
-
- if (!found)
- return;
-
- nft_data_copy(&regs->data[priv->dreg],
- nft_set_ext_data(ext), map->dlen);
-}
-
static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
[NFTA_NG_DREG] = { .type = NLA_U32 },
[NFTA_NG_MODULUS] = { .type = NLA_U32 },
[NFTA_NG_TYPE] = { .type = NLA_U32 },
[NFTA_NG_OFFSET] = { .type = NLA_U32 },
- [NFTA_NG_SET_NAME] = { .type = NLA_STRING,
- .len = NFT_SET_MAXNAMELEN - 1 },
- [NFTA_NG_SET_ID] = { .type = NLA_U32 },
};
static int nft_ng_inc_init(const struct nft_ctx *ctx,
@@ -101,22 +77,6 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
NFT_DATA_VALUE, sizeof(u32));
}
-static int nft_ng_inc_map_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
-{
- struct nft_ng_inc *priv = nft_expr_priv(expr);
- u8 genmask = nft_genmask_next(ctx->net);
-
- nft_ng_inc_init(ctx, expr, tb);
-
- priv->map = nft_set_lookup_global(ctx->net, ctx->table,
- tb[NFTA_NG_SET_NAME],
- tb[NFTA_NG_SET_ID], genmask);
-
- return PTR_ERR_OR_ZERO(priv->map);
-}
-
static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
u32 modulus, enum nft_ng_types type, u32 offset)
{
@@ -143,27 +103,10 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
priv->offset);
}
-static int nft_ng_inc_map_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
-{
- const struct nft_ng_inc *priv = nft_expr_priv(expr);
-
- if (nft_ng_dump(skb, priv->dreg, priv->modulus,
- NFT_NG_INCREMENTAL, priv->offset) ||
- nla_put_string(skb, NFTA_NG_SET_NAME, priv->map->name))
- goto nla_put_failure;
-
- return 0;
-
-nla_put_failure:
- return -1;
-}
-
struct nft_ng_random {
enum nft_registers dreg:8;
u32 modulus;
u32 offset;
- struct nft_set *map;
};
static u32 nft_ng_random_gen(struct nft_ng_random *priv)
@@ -183,25 +126,6 @@ static void nft_ng_random_eval(const struct nft_expr *expr,
regs->data[priv->dreg] = nft_ng_random_gen(priv);
}
-static void nft_ng_random_map_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
-{
- struct nft_ng_random *priv = nft_expr_priv(expr);
- const struct nft_set *map = priv->map;
- const struct nft_set_ext *ext;
- u32 result;
- bool found;
-
- result = nft_ng_random_gen(priv);
- found = map->ops->lookup(nft_net(pkt), map, &result, &ext);
- if (!found)
- return;
-
- nft_data_copy(&regs->data[priv->dreg],
- nft_set_ext_data(ext), map->dlen);
-}
-
static int nft_ng_random_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@@ -226,21 +150,6 @@ static int nft_ng_random_init(const struct nft_ctx *ctx,
NFT_DATA_VALUE, sizeof(u32));
}
-static int nft_ng_random_map_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
-{
- struct nft_ng_random *priv = nft_expr_priv(expr);
- u8 genmask = nft_genmask_next(ctx->net);
-
- nft_ng_random_init(ctx, expr, tb);
- priv->map = nft_set_lookup_global(ctx->net, ctx->table,
- tb[NFTA_NG_SET_NAME],
- tb[NFTA_NG_SET_ID], genmask);
-
- return PTR_ERR_OR_ZERO(priv->map);
-}
-
static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_ng_random *priv = nft_expr_priv(expr);
@@ -249,22 +158,6 @@ static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
priv->offset);
}
-static int nft_ng_random_map_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
-{
- const struct nft_ng_random *priv = nft_expr_priv(expr);
-
- if (nft_ng_dump(skb, priv->dreg, priv->modulus,
- NFT_NG_RANDOM, priv->offset) ||
- nla_put_string(skb, NFTA_NG_SET_NAME, priv->map->name))
- goto nla_put_failure;
-
- return 0;
-
-nla_put_failure:
- return -1;
-}
-
static struct nft_expr_type nft_ng_type;
static const struct nft_expr_ops nft_ng_inc_ops = {
.type = &nft_ng_type,
@@ -274,14 +167,6 @@ static const struct nft_expr_ops nft_ng_inc_ops = {
.dump = nft_ng_inc_dump,
};
-static const struct nft_expr_ops nft_ng_inc_map_ops = {
- .type = &nft_ng_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)),
- .eval = nft_ng_inc_map_eval,
- .init = nft_ng_inc_map_init,
- .dump = nft_ng_inc_map_dump,
-};
-
static const struct nft_expr_ops nft_ng_random_ops = {
.type = &nft_ng_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)),
@@ -290,14 +175,6 @@ static const struct nft_expr_ops nft_ng_random_ops = {
.dump = nft_ng_random_dump,
};
-static const struct nft_expr_ops nft_ng_random_map_ops = {
- .type = &nft_ng_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)),
- .eval = nft_ng_random_map_eval,
- .init = nft_ng_random_map_init,
- .dump = nft_ng_random_map_dump,
-};
-
static const struct nft_expr_ops *
nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
{
@@ -312,12 +189,8 @@ nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
switch (type) {
case NFT_NG_INCREMENTAL:
- if (tb[NFTA_NG_SET_NAME])
- return &nft_ng_inc_map_ops;
return &nft_ng_inc_ops;
case NFT_NG_RANDOM:
- if (tb[NFTA_NG_SET_NAME])
- return &nft_ng_random_map_ops;
return &nft_ng_random_ops;
}
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index ca5e5d8c5ef8..b13618c764ec 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -50,7 +50,7 @@ static int nft_osf_init(const struct nft_ctx *ctx,
int err;
u8 ttl;
- if (nla_get_u8(tb[NFTA_OSF_TTL])) {
+ if (tb[NFTA_OSF_TTL]) {
ttl = nla_get_u8(tb[NFTA_OSF_TTL]);
if (ttl > 2)
return -EINVAL;
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index c6acfc2d9c84..eb4cbd244c3d 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -114,6 +114,22 @@ static void idletimer_tg_expired(struct timer_list *t)
schedule_work(&timer->work);
}
+static int idletimer_check_sysfs_name(const char *name, unsigned int size)
+{
+ int ret;
+
+ ret = xt_check_proc_name(name, size);
+ if (ret < 0)
+ return ret;
+
+ if (!strcmp(name, "power") ||
+ !strcmp(name, "subsystem") ||
+ !strcmp(name, "uevent"))
+ return -EINVAL;
+
+ return 0;
+}
+
static int idletimer_tg_create(struct idletimer_tg_info *info)
{
int ret;
@@ -124,6 +140,10 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
goto out;
}
+ ret = idletimer_check_sysfs_name(info->label, sizeof(info->label));
+ if (ret < 0)
+ goto out_free_timer;
+
sysfs_attr_init(&info->timer->attr.attr);
info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
if (!info->timer->attr.attr.name) {
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index dec843cadf46..9e05c86ba5c4 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -201,18 +201,8 @@ static __net_init int xt_rateest_net_init(struct net *net)
return 0;
}
-static void __net_exit xt_rateest_net_exit(struct net *net)
-{
- struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
- int i;
-
- for (i = 0; i < ARRAY_SIZE(xn->hash); i++)
- WARN_ON_ONCE(!hlist_empty(&xn->hash[i]));
-}
-
static struct pernet_operations xt_rateest_net_ops = {
.init = xt_rateest_net_init,
- .exit = xt_rateest_net_exit,
.id = &xt_rateest_id,
.size = sizeof(struct xt_rateest_net),
};
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 3e7d259e5d8d..1ad4017f9b73 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -295,9 +295,10 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
/* copy match config into hashtable config */
ret = cfg_copy(&hinfo->cfg, (void *)cfg, 3);
-
- if (ret)
+ if (ret) {
+ vfree(hinfo);
return ret;
+ }
hinfo->cfg.size = size;
if (hinfo->cfg.max == 0)
@@ -814,7 +815,6 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
int ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
-
if (ret)
return ret;
@@ -830,7 +830,6 @@ hashlimit_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
int ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
-
if (ret)
return ret;
@@ -921,7 +920,6 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
return ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
-
if (ret)
return ret;
@@ -940,7 +938,6 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
return ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
-
if (ret)
return ret;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 6bec37ab4472..a4660c48ff01 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1203,7 +1203,8 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
&info->labels.mask);
if (err)
return err;
- } else if (labels_nonzero(&info->labels.mask)) {
+ } else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+ labels_nonzero(&info->labels.mask)) {
err = ovs_ct_set_labels(ct, key, &info->labels.value,
&info->labels.mask);
if (err)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ec3095f13aae..a74650e98f42 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2394,7 +2394,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
void *ph;
__u32 ts;
- ph = skb_shinfo(skb)->destructor_arg;
+ ph = skb_zcopy_get_nouarg(skb);
packet_dec_pending(&po->tx_ring);
ts = __packet_set_timestamp(po, ph, skb);
@@ -2461,7 +2461,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->mark = po->sk.sk_mark;
skb->tstamp = sockc->transmit_time;
sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
- skb_shinfo(skb)->destructor_arg = ph.raw;
+ skb_zcopy_set_nouarg(skb, ph.raw);
skb_reserve(skb, hlen);
skb_reset_network_header(skb);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 64362d078da8..a2522f9d71e2 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -375,17 +375,36 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call);
* getting ACKs from the server. Returns a number representing the life state
* which can be compared to that returned by a previous call.
*
- * If this is a client call, ping ACKs will be sent to the server to find out
- * whether it's still responsive and whether the call is still alive on the
- * server.
+ * If the life state stalls, rxrpc_kernel_probe_life() should be called and
+ * then 2RTT waited.
*/
-u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call)
+u32 rxrpc_kernel_check_life(const struct socket *sock,
+ const struct rxrpc_call *call)
{
return call->acks_latest;
}
EXPORT_SYMBOL(rxrpc_kernel_check_life);
/**
+ * rxrpc_kernel_probe_life - Poke the peer to see if it's still alive
+ * @sock: The socket the call is on
+ * @call: The call to check
+ *
+ * In conjunction with rxrpc_kernel_check_life(), allow a kernel service to
+ * find out whether a call is still alive by pinging it. This should cause the
+ * life state to be bumped in about 2*RTT.
+ *
+ * The must be called in TASK_RUNNING state on pain of might_sleep() objecting.
+ */
+void rxrpc_kernel_probe_life(struct socket *sock, struct rxrpc_call *call)
+{
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
+ rxrpc_propose_ack_ping_for_check_life);
+ rxrpc_send_ack_packet(call, true, NULL);
+}
+EXPORT_SYMBOL(rxrpc_kernel_probe_life);
+
+/**
* rxrpc_kernel_get_epoch - Retrieve the epoch value from a call.
* @sock: The socket the call is on
* @call: The call to query
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 382196e57a26..bc628acf4f4f 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -611,6 +611,7 @@ struct rxrpc_call {
* not hard-ACK'd packet follows this.
*/
rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */
+ u16 tx_backoff; /* Delay to insert due to Tx failure */
/* TCP-style slow-start congestion control [RFC5681]. Since the SMSS
* is fixed, we keep these numbers in terms of segments (ie. DATA
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 8e7434e92097..468efc3660c0 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -123,6 +123,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
else
ack_at = expiry;
+ ack_at += READ_ONCE(call->tx_backoff);
ack_at += now;
if (time_before(ack_at, call->ack_at)) {
WRITE_ONCE(call->ack_at, ack_at);
@@ -311,6 +312,7 @@ void rxrpc_process_call(struct work_struct *work)
container_of(work, struct rxrpc_call, processor);
rxrpc_serial_t *send_ack;
unsigned long now, next, t;
+ unsigned int iterations = 0;
rxrpc_see_call(call);
@@ -319,6 +321,11 @@ void rxrpc_process_call(struct work_struct *work)
call->debug_id, rxrpc_call_states[call->state], call->events);
recheck_state:
+ /* Limit the number of times we do this before returning to the manager */
+ iterations++;
+ if (iterations > 5)
+ goto requeue;
+
if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
rxrpc_send_abort_packet(call);
goto recheck_state;
@@ -447,13 +454,16 @@ recheck_state:
rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart);
/* other events may have been raised since we started checking */
- if (call->events && call->state < RXRPC_CALL_COMPLETE) {
- __rxrpc_queue_call(call);
- goto out;
- }
+ if (call->events && call->state < RXRPC_CALL_COMPLETE)
+ goto requeue;
out_put:
rxrpc_put_call(call, rxrpc_call_put);
out:
_leave("");
+ return;
+
+requeue:
+ __rxrpc_queue_call(call);
+ goto out;
}
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 189418888839..736aa9281100 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -35,6 +35,21 @@ struct rxrpc_abort_buffer {
static const char rxrpc_keepalive_string[] = "";
/*
+ * Increase Tx backoff on transmission failure and clear it on success.
+ */
+static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret)
+{
+ if (ret < 0) {
+ u16 tx_backoff = READ_ONCE(call->tx_backoff);
+
+ if (tx_backoff < HZ)
+ WRITE_ONCE(call->tx_backoff, tx_backoff + 1);
+ } else {
+ WRITE_ONCE(call->tx_backoff, 0);
+ }
+}
+
+/*
* Arrange for a keepalive ping a certain time after we last transmitted. This
* lets the far side know we're still interested in this call and helps keep
* the route through any intervening firewall open.
@@ -210,6 +225,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
else
trace_rxrpc_tx_packet(call->debug_id, &pkt->whdr,
rxrpc_tx_point_call_ack);
+ rxrpc_tx_backoff(call, ret);
if (call->state < RXRPC_CALL_COMPLETE) {
if (ret < 0) {
@@ -218,7 +234,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
rxrpc_propose_ACK(call, pkt->ack.reason,
ntohs(pkt->ack.maxSkew),
ntohl(pkt->ack.serial),
- true, true,
+ false, true,
rxrpc_propose_ack_retry_tx);
} else {
spin_lock_bh(&call->lock);
@@ -300,7 +316,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
else
trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr,
rxrpc_tx_point_call_abort);
-
+ rxrpc_tx_backoff(call, ret);
rxrpc_put_connection(conn);
return ret;
@@ -413,6 +429,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
else
trace_rxrpc_tx_packet(call->debug_id, &whdr,
rxrpc_tx_point_call_data_nofrag);
+ rxrpc_tx_backoff(call, ret);
if (ret == -EMSGSIZE)
goto send_fragmentable;
@@ -445,9 +462,18 @@ done:
rxrpc_reduce_call_timer(call, expect_rx_by, nowj,
rxrpc_timer_set_for_normal);
}
- }
- rxrpc_set_keepalive(call);
+ rxrpc_set_keepalive(call);
+ } else {
+ /* Cancel the call if the initial transmission fails,
+ * particularly if that's due to network routing issues that
+ * aren't going away anytime soon. The layer above can arrange
+ * the retransmission.
+ */
+ if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags))
+ rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+ RX_USER_ABORT, ret);
+ }
_leave(" = %d [%u]", ret, call->peer->maxdata);
return ret;
@@ -506,6 +532,7 @@ send_fragmentable:
else
trace_rxrpc_tx_packet(call->debug_id, &whdr,
rxrpc_tx_point_call_data_frag);
+ rxrpc_tx_backoff(call, ret);
up_write(&conn->params.local->defrag_sem);
goto done;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 1dae5f2b358f..c8cf4d10c435 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -258,7 +258,8 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
if (is_redirect) {
skb2->tc_redirected = 1;
skb2->tc_from_ingress = skb2->tc_at_ingress;
-
+ if (skb2->tc_from_ingress)
+ skb2->tstamp = 0;
/* let's the caller reinsert the packet, if possible */
if (use_reinsert) {
res->ingress = want_ingress;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index da3dd0f68cc2..2b372a06b432 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -201,7 +201,8 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
goto out_release;
}
} else {
- return err;
+ ret = err;
+ goto out_free;
}
p = to_pedit(*a);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 052855d47354..37c9b8f0e10f 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -27,10 +27,7 @@ struct tcf_police_params {
u32 tcfp_ewma_rate;
s64 tcfp_burst;
u32 tcfp_mtu;
- s64 tcfp_toks;
- s64 tcfp_ptoks;
s64 tcfp_mtu_ptoks;
- s64 tcfp_t_c;
struct psched_ratecfg rate;
bool rate_present;
struct psched_ratecfg peak;
@@ -41,6 +38,11 @@ struct tcf_police_params {
struct tcf_police {
struct tc_action common;
struct tcf_police_params __rcu *params;
+
+ spinlock_t tcfp_lock ____cacheline_aligned_in_smp;
+ s64 tcfp_toks;
+ s64 tcfp_ptoks;
+ s64 tcfp_t_c;
};
#define to_police(pc) ((struct tcf_police *)pc)
@@ -122,6 +124,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
return ret;
}
ret = ACT_P_CREATED;
+ spin_lock_init(&(to_police(*a)->tcfp_lock));
} else if (!ovr) {
tcf_idr_release(*a, bind);
return -EEXIST;
@@ -186,12 +189,9 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
}
new->tcfp_burst = PSCHED_TICKS2NS(parm->burst);
- new->tcfp_toks = new->tcfp_burst;
- if (new->peak_present) {
+ if (new->peak_present)
new->tcfp_mtu_ptoks = (s64)psched_l2t_ns(&new->peak,
new->tcfp_mtu);
- new->tcfp_ptoks = new->tcfp_mtu_ptoks;
- }
if (tb[TCA_POLICE_AVRATE])
new->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
@@ -207,7 +207,12 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
}
spin_lock_bh(&police->tcf_lock);
- new->tcfp_t_c = ktime_get_ns();
+ spin_lock_bh(&police->tcfp_lock);
+ police->tcfp_t_c = ktime_get_ns();
+ police->tcfp_toks = new->tcfp_burst;
+ if (new->peak_present)
+ police->tcfp_ptoks = new->tcfp_mtu_ptoks;
+ spin_unlock_bh(&police->tcfp_lock);
police->tcf_action = parm->action;
rcu_swap_protected(police->params,
new,
@@ -257,25 +262,28 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
}
now = ktime_get_ns();
- toks = min_t(s64, now - p->tcfp_t_c, p->tcfp_burst);
+ spin_lock_bh(&police->tcfp_lock);
+ toks = min_t(s64, now - police->tcfp_t_c, p->tcfp_burst);
if (p->peak_present) {
- ptoks = toks + p->tcfp_ptoks;
+ ptoks = toks + police->tcfp_ptoks;
if (ptoks > p->tcfp_mtu_ptoks)
ptoks = p->tcfp_mtu_ptoks;
ptoks -= (s64)psched_l2t_ns(&p->peak,
qdisc_pkt_len(skb));
}
- toks += p->tcfp_toks;
+ toks += police->tcfp_toks;
if (toks > p->tcfp_burst)
toks = p->tcfp_burst;
toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb));
if ((toks|ptoks) >= 0) {
- p->tcfp_t_c = now;
- p->tcfp_toks = toks;
- p->tcfp_ptoks = ptoks;
+ police->tcfp_t_c = now;
+ police->tcfp_toks = toks;
+ police->tcfp_ptoks = ptoks;
+ spin_unlock_bh(&police->tcfp_lock);
ret = p->tcfp_result;
goto inc_drops;
}
+ spin_unlock_bh(&police->tcfp_lock);
}
inc_overlimits:
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9aada2d0ef06..c6c327874abc 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -709,11 +709,23 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
struct netlink_ext_ack *extack)
{
const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL;
- int option_len, key_depth, msk_depth = 0;
+ int err, option_len, key_depth, msk_depth = 0;
+
+ err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS],
+ TCA_FLOWER_KEY_ENC_OPTS_MAX,
+ enc_opts_policy, extack);
+ if (err)
+ return err;
nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]);
if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) {
+ err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK],
+ TCA_FLOWER_KEY_ENC_OPTS_MAX,
+ enc_opts_policy, extack);
+ if (err)
+ return err;
+
nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
}
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 4b1af706896c..25a7cf6d380f 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -469,22 +469,29 @@ begin:
goto begin;
}
prefetch(&skb->end);
- f->credit -= qdisc_pkt_len(skb);
+ plen = qdisc_pkt_len(skb);
+ f->credit -= plen;
- if (ktime_to_ns(skb->tstamp) || !q->rate_enable)
+ if (!q->rate_enable)
goto out;
rate = q->flow_max_rate;
- if (skb->sk)
- rate = min(skb->sk->sk_pacing_rate, rate);
-
- if (rate <= q->low_rate_threshold) {
- f->credit = 0;
- plen = qdisc_pkt_len(skb);
- } else {
- plen = max(qdisc_pkt_len(skb), q->quantum);
- if (f->credit > 0)
- goto out;
+
+ /* If EDT time was provided for this skb, we need to
+ * update f->time_next_packet only if this qdisc enforces
+ * a flow max rate.
+ */
+ if (!skb->tstamp) {
+ if (skb->sk)
+ rate = min(skb->sk->sk_pacing_rate, rate);
+
+ if (rate <= q->low_rate_threshold) {
+ f->credit = 0;
+ } else {
+ plen = max(plen, q->quantum);
+ if (f->credit > 0)
+ goto out;
+ }
}
if (rate != ~0UL) {
u64 len = (u64)plen * NSEC_PER_SEC;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 57b3ad9394ad..2c38e3d07924 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -648,15 +648,6 @@ deliver:
*/
skb->dev = qdisc_dev(sch);
-#ifdef CONFIG_NET_CLS_ACT
- /*
- * If it's at ingress let's pretend the delay is
- * from the network (tstamp will be updated).
- */
- if (skb->tc_redirected && skb->tc_from_ingress)
- skb->tstamp = 0;
-#endif
-
if (q->slot.slot_next) {
q->slot.packets_left--;
q->slot.bytes_left -= qdisc_pkt_len(skb);
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 67939ad99c01..025f48e14a91 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -118,6 +118,9 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
sctp_transport_route(tp, NULL, sp);
if (asoc->param_flags & SPP_PMTUD_ENABLE)
sctp_assoc_sync_pmtu(asoc);
+ } else if (!sctp_transport_pmtu_check(tp)) {
+ if (asoc->param_flags & SPP_PMTUD_ENABLE)
+ sctp_assoc_sync_pmtu(asoc);
}
if (asoc->pmtu_pending) {
@@ -396,25 +399,6 @@ finish:
return retval;
}
-static void sctp_packet_release_owner(struct sk_buff *skb)
-{
- sk_free(skb->sk);
-}
-
-static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
-{
- skb_orphan(skb);
- skb->sk = sk;
- skb->destructor = sctp_packet_release_owner;
-
- /*
- * The data chunks have already been accounted for in sctp_sendmsg(),
- * therefore only reserve a single byte to keep socket around until
- * the packet has been transmitted.
- */
- refcount_inc(&sk->sk_wmem_alloc);
-}
-
static void sctp_packet_gso_append(struct sk_buff *head, struct sk_buff *skb)
{
if (SCTP_OUTPUT_CB(head)->last == head)
@@ -426,6 +410,7 @@ static void sctp_packet_gso_append(struct sk_buff *head, struct sk_buff *skb)
head->truesize += skb->truesize;
head->data_len += skb->len;
head->len += skb->len;
+ refcount_add(skb->truesize, &head->sk->sk_wmem_alloc);
__skb_header_release(skb);
}
@@ -601,7 +586,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
if (!head)
goto out;
skb_reserve(head, packet->overhead + MAX_HEADER);
- sctp_packet_set_owner_w(head, sk);
+ skb_set_owner_w(head, sk);
/* set sctp header */
sh = skb_push(head, sizeof(struct sctphdr));
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 9cb854b05342..c37e1c2dec9d 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -212,7 +212,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
INIT_LIST_HEAD(&q->retransmit);
INIT_LIST_HEAD(&q->sacked);
INIT_LIST_HEAD(&q->abandoned);
- sctp_sched_set_sched(asoc, SCTP_SS_FCFS);
+ sctp_sched_set_sched(asoc, SCTP_SS_DEFAULT);
}
/* Free the outqueue structure and any related pending chunks.
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 739f3e50120d..bf618d1b41fd 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3940,32 +3940,16 @@ static int sctp_setsockopt_pr_supported(struct sock *sk,
unsigned int optlen)
{
struct sctp_assoc_value params;
- struct sctp_association *asoc;
- int retval = -EINVAL;
if (optlen != sizeof(params))
- goto out;
-
- if (copy_from_user(&params, optval, optlen)) {
- retval = -EFAULT;
- goto out;
- }
-
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (asoc) {
- asoc->prsctp_enable = !!params.assoc_value;
- } else if (!params.assoc_id) {
- struct sctp_sock *sp = sctp_sk(sk);
+ return -EINVAL;
- sp->ep->prsctp_enable = !!params.assoc_value;
- } else {
- goto out;
- }
+ if (copy_from_user(&params, optval, optlen))
+ return -EFAULT;
- retval = 0;
+ sctp_sk(sk)->ep->prsctp_enable = !!params.assoc_value;
-out:
- return retval;
+ return 0;
}
static int sctp_setsockopt_default_prinfo(struct sock *sk,
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index ffb940d3b57c..3892e7630f3a 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -535,7 +535,6 @@ int sctp_send_add_streams(struct sctp_association *asoc,
goto out;
}
- stream->incnt = incnt;
stream->outcnt = outcnt;
asoc->strreset_outstanding = !!out + !!in;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 80e2119f1c70..5fbaf1901571 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -127,6 +127,8 @@ static int smc_release(struct socket *sock)
smc = smc_sk(sk);
/* cleanup for a dangling non-blocking connect */
+ if (smc->connect_info && sk->sk_state == SMC_INIT)
+ tcp_abort(smc->clcsock->sk, ECONNABORTED);
flush_work(&smc->connect_work);
kfree(smc->connect_info);
smc->connect_info = NULL;
@@ -547,7 +549,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
mutex_lock(&smc_create_lgr_pending);
local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
- ibport, &aclc->lcl, NULL, 0);
+ ibport, ntoh24(aclc->qpn), &aclc->lcl,
+ NULL, 0);
if (local_contact < 0) {
if (local_contact == -ENOMEM)
reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
@@ -618,7 +621,7 @@ static int smc_connect_ism(struct smc_sock *smc,
int rc = 0;
mutex_lock(&smc_create_lgr_pending);
- local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0,
+ local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0,
NULL, ismdev, aclc->gid);
if (local_contact < 0)
return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0);
@@ -1083,7 +1086,7 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
int *local_contact)
{
/* allocate connection / link group */
- *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport,
+ *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport, 0,
&pclc->lcl, NULL, 0);
if (*local_contact < 0) {
if (*local_contact == -ENOMEM)
@@ -1107,7 +1110,7 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
struct smc_clc_msg_smcd *pclc_smcd;
pclc_smcd = smc_get_clc_msg_smcd(pclc);
- *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, NULL,
+ *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, 0, NULL,
ismdev, pclc_smcd->gid);
if (*local_contact < 0) {
if (*local_contact == -ENOMEM)
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index ed5dcf03fe0b..db83332ac1c8 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -81,7 +81,7 @@ static inline void smc_cdc_add_pending_send(struct smc_connection *conn,
sizeof(struct smc_cdc_msg) > SMC_WR_BUF_SIZE,
"must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_cdc_msg)");
BUILD_BUG_ON_MSG(
- sizeof(struct smc_cdc_msg) != SMC_WR_TX_SIZE,
+ offsetofend(struct smc_cdc_msg, reserved) > SMC_WR_TX_SIZE,
"must adapt SMC_WR_TX_SIZE to sizeof(struct smc_cdc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()");
BUILD_BUG_ON_MSG(
sizeof(struct smc_cdc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE,
@@ -177,23 +177,24 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
int smcd_cdc_msg_send(struct smc_connection *conn)
{
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+ union smc_host_cursor curs;
struct smcd_cdc_msg cdc;
int rc, diff;
memset(&cdc, 0, sizeof(cdc));
cdc.common.type = SMC_CDC_MSG_TYPE;
- cdc.prod_wrap = conn->local_tx_ctrl.prod.wrap;
- cdc.prod_count = conn->local_tx_ctrl.prod.count;
-
- cdc.cons_wrap = conn->local_tx_ctrl.cons.wrap;
- cdc.cons_count = conn->local_tx_ctrl.cons.count;
- cdc.prod_flags = conn->local_tx_ctrl.prod_flags;
- cdc.conn_state_flags = conn->local_tx_ctrl.conn_state_flags;
+ curs.acurs.counter = atomic64_read(&conn->local_tx_ctrl.prod.acurs);
+ cdc.prod.wrap = curs.wrap;
+ cdc.prod.count = curs.count;
+ curs.acurs.counter = atomic64_read(&conn->local_tx_ctrl.cons.acurs);
+ cdc.cons.wrap = curs.wrap;
+ cdc.cons.count = curs.count;
+ cdc.cons.prod_flags = conn->local_tx_ctrl.prod_flags;
+ cdc.cons.conn_state_flags = conn->local_tx_ctrl.conn_state_flags;
rc = smcd_tx_ism_write(conn, &cdc, sizeof(cdc), 0, 1);
if (rc)
return rc;
- smc_curs_copy(&conn->rx_curs_confirmed, &conn->local_tx_ctrl.cons,
- conn);
+ smc_curs_copy(&conn->rx_curs_confirmed, &curs, conn);
/* Calculate transmitted data and increment free send buffer space */
diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin,
&conn->tx_curs_sent);
@@ -331,13 +332,16 @@ static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc)
static void smcd_cdc_rx_tsklet(unsigned long data)
{
struct smc_connection *conn = (struct smc_connection *)data;
+ struct smcd_cdc_msg *data_cdc;
struct smcd_cdc_msg cdc;
struct smc_sock *smc;
if (!conn)
return;
- memcpy(&cdc, conn->rmb_desc->cpu_addr, sizeof(cdc));
+ data_cdc = (struct smcd_cdc_msg *)conn->rmb_desc->cpu_addr;
+ smcd_curs_copy(&cdc.prod, &data_cdc->prod, conn);
+ smcd_curs_copy(&cdc.cons, &data_cdc->cons, conn);
smc = container_of(conn, struct smc_sock, conn);
smc_cdc_msg_recv(smc, (struct smc_cdc_msg *)&cdc);
}
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 934df4473a7c..b5bfe38c7f9b 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -48,21 +48,31 @@ struct smc_cdc_msg {
struct smc_cdc_producer_flags prod_flags;
struct smc_cdc_conn_state_flags conn_state_flags;
u8 reserved[18];
-} __packed; /* format defined in RFC7609 */
+};
+
+/* SMC-D cursor format */
+union smcd_cdc_cursor {
+ struct {
+ u16 wrap;
+ u32 count;
+ struct smc_cdc_producer_flags prod_flags;
+ struct smc_cdc_conn_state_flags conn_state_flags;
+ } __packed;
+#ifdef KERNEL_HAS_ATOMIC64
+ atomic64_t acurs; /* for atomic processing */
+#else
+ u64 acurs; /* for atomic processing */
+#endif
+} __aligned(8);
/* CDC message for SMC-D */
struct smcd_cdc_msg {
struct smc_wr_rx_hdr common; /* Type = 0xFE */
u8 res1[7];
- u16 prod_wrap;
- u32 prod_count;
- u8 res2[2];
- u16 cons_wrap;
- u32 cons_count;
- struct smc_cdc_producer_flags prod_flags;
- struct smc_cdc_conn_state_flags conn_state_flags;
+ union smcd_cdc_cursor prod;
+ union smcd_cdc_cursor cons;
u8 res3[8];
-} __packed;
+} __aligned(8);
static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn)
{
@@ -135,6 +145,21 @@ static inline void smc_curs_copy_net(union smc_cdc_cursor *tgt,
#endif
}
+static inline void smcd_curs_copy(union smcd_cdc_cursor *tgt,
+ union smcd_cdc_cursor *src,
+ struct smc_connection *conn)
+{
+#ifndef KERNEL_HAS_ATOMIC64
+ unsigned long flags;
+
+ spin_lock_irqsave(&conn->acurs_lock, flags);
+ tgt->acurs = src->acurs;
+ spin_unlock_irqrestore(&conn->acurs_lock, flags);
+#else
+ atomic64_set(&tgt->acurs, atomic64_read(&src->acurs));
+#endif
+}
+
/* calculate cursor difference between old and new, where old <= new */
static inline int smc_curs_diff(unsigned int size,
union smc_host_cursor *old,
@@ -222,12 +247,17 @@ static inline void smcr_cdc_msg_to_host(struct smc_host_cdc_msg *local,
static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local,
struct smcd_cdc_msg *peer)
{
- local->prod.wrap = peer->prod_wrap;
- local->prod.count = peer->prod_count;
- local->cons.wrap = peer->cons_wrap;
- local->cons.count = peer->cons_count;
- local->prod_flags = peer->prod_flags;
- local->conn_state_flags = peer->conn_state_flags;
+ union smc_host_cursor temp;
+
+ temp.wrap = peer->prod.wrap;
+ temp.count = peer->prod.count;
+ atomic64_set(&local->prod.acurs, atomic64_read(&temp.acurs));
+
+ temp.wrap = peer->cons.wrap;
+ temp.count = peer->cons.count;
+ atomic64_set(&local->cons.acurs, atomic64_read(&temp.acurs));
+ local->prod_flags = peer->cons.prod_flags;
+ local->conn_state_flags = peer->cons.conn_state_flags;
}
static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 18daebcef181..1c9fa7f0261a 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -184,6 +184,8 @@ free:
if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
smc_llc_link_inactive(lnk);
+ if (lgr->is_smcd)
+ smc_ism_signal_shutdown(lgr);
smc_lgr_free(lgr);
}
}
@@ -485,7 +487,7 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
}
/* Called when SMC-D device is terminated or peer is lost */
-void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
+void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
{
struct smc_link_group *lgr, *l;
LIST_HEAD(lgr_free_list);
@@ -495,7 +497,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
if (lgr->is_smcd && lgr->smcd == dev &&
(!peer_gid || lgr->peer_gid == peer_gid) &&
- !list_empty(&lgr->list)) {
+ (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
__smc_lgr_terminate(lgr);
list_move(&lgr->list, &lgr_free_list);
}
@@ -506,6 +508,8 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
list_del_init(&lgr->list);
cancel_delayed_work_sync(&lgr->free_work);
+ if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */
+ smc_ism_signal_shutdown(lgr);
smc_lgr_free(lgr);
}
}
@@ -559,7 +563,7 @@ out:
static bool smcr_lgr_match(struct smc_link_group *lgr,
struct smc_clc_msg_local *lcl,
- enum smc_lgr_role role)
+ enum smc_lgr_role role, u32 clcqpn)
{
return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
SMC_SYSTEMID_LEN) &&
@@ -567,7 +571,9 @@ static bool smcr_lgr_match(struct smc_link_group *lgr,
SMC_GID_SIZE) &&
!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
sizeof(lcl->mac)) &&
- lgr->role == role;
+ lgr->role == role &&
+ (lgr->role == SMC_SERV ||
+ lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn);
}
static bool smcd_lgr_match(struct smc_link_group *lgr,
@@ -578,7 +584,7 @@ static bool smcd_lgr_match(struct smc_link_group *lgr,
/* create a new SMC connection (and a new link group if necessary) */
int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
- struct smc_ib_device *smcibdev, u8 ibport,
+ struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn,
struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
u64 peer_gid)
{
@@ -603,7 +609,7 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
list_for_each_entry(lgr, &smc_lgr_list.list, list) {
write_lock_bh(&lgr->conns_lock);
if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) :
- smcr_lgr_match(lgr, lcl, role)) &&
+ smcr_lgr_match(lgr, lcl, role, clcqpn)) &&
!lgr->sync_err &&
lgr->vlan_id == vlan_id &&
(role == SMC_CLNT ||
@@ -1024,6 +1030,8 @@ void smc_core_exit(void)
smc_llc_link_inactive(lnk);
}
cancel_delayed_work_sync(&lgr->free_work);
+ if (lgr->is_smcd)
+ smc_ism_signal_shutdown(lgr);
smc_lgr_free(lgr); /* free link group */
}
}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index c156674733c9..cf98f4d6093e 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -247,7 +247,8 @@ void smc_lgr_free(struct smc_link_group *lgr);
void smc_lgr_forget(struct smc_link_group *lgr);
void smc_lgr_terminate(struct smc_link_group *lgr);
void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
-void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid);
+void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
+ unsigned short vlan);
int smc_buf_create(struct smc_sock *smc, bool is_smcd);
int smc_uncompress_bufsize(u8 compressed);
int smc_rmb_rtoken_handling(struct smc_connection *conn,
@@ -262,7 +263,7 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id);
void smc_conn_free(struct smc_connection *conn);
int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
- struct smc_ib_device *smcibdev, u8 ibport,
+ struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn,
struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
u64 peer_gid);
void smcd_conn_free(struct smc_connection *conn);
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index e36f21ce7252..2fff79db1a59 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -187,22 +187,28 @@ struct smc_ism_event_work {
#define ISM_EVENT_REQUEST 0x0001
#define ISM_EVENT_RESPONSE 0x0002
#define ISM_EVENT_REQUEST_IR 0x00000001
+#define ISM_EVENT_CODE_SHUTDOWN 0x80
#define ISM_EVENT_CODE_TESTLINK 0x83
+union smcd_sw_event_info {
+ u64 info;
+ struct {
+ u8 uid[SMC_LGR_ID_SIZE];
+ unsigned short vlan_id;
+ u16 code;
+ };
+};
+
static void smcd_handle_sw_event(struct smc_ism_event_work *wrk)
{
- union {
- u64 info;
- struct {
- u32 uid;
- unsigned short vlanid;
- u16 code;
- };
- } ev_info;
+ union smcd_sw_event_info ev_info;
+ ev_info.info = wrk->event.info;
switch (wrk->event.code) {
+ case ISM_EVENT_CODE_SHUTDOWN: /* Peer shut down DMBs */
+ smc_smcd_terminate(wrk->smcd, wrk->event.tok, ev_info.vlan_id);
+ break;
case ISM_EVENT_CODE_TESTLINK: /* Activity timer */
- ev_info.info = wrk->event.info;
if (ev_info.code == ISM_EVENT_REQUEST) {
ev_info.code = ISM_EVENT_RESPONSE;
wrk->smcd->ops->signal_event(wrk->smcd,
@@ -215,6 +221,21 @@ static void smcd_handle_sw_event(struct smc_ism_event_work *wrk)
}
}
+int smc_ism_signal_shutdown(struct smc_link_group *lgr)
+{
+ int rc;
+ union smcd_sw_event_info ev_info;
+
+ memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE);
+ ev_info.vlan_id = lgr->vlan_id;
+ ev_info.code = ISM_EVENT_REQUEST;
+ rc = lgr->smcd->ops->signal_event(lgr->smcd, lgr->peer_gid,
+ ISM_EVENT_REQUEST_IR,
+ ISM_EVENT_CODE_SHUTDOWN,
+ ev_info.info);
+ return rc;
+}
+
/* worker for SMC-D events */
static void smc_ism_event_work(struct work_struct *work)
{
@@ -223,7 +244,7 @@ static void smc_ism_event_work(struct work_struct *work)
switch (wrk->event.type) {
case ISM_EVENT_GID: /* GID event, token is peer GID */
- smc_smcd_terminate(wrk->smcd, wrk->event.tok);
+ smc_smcd_terminate(wrk->smcd, wrk->event.tok, VLAN_VID_MASK);
break;
case ISM_EVENT_DMB:
break;
@@ -289,7 +310,7 @@ void smcd_unregister_dev(struct smcd_dev *smcd)
spin_unlock(&smcd_dev_list.lock);
flush_workqueue(smcd->event_wq);
destroy_workqueue(smcd->event_wq);
- smc_smcd_terminate(smcd, 0);
+ smc_smcd_terminate(smcd, 0, VLAN_VID_MASK);
device_del(&smcd->dev);
}
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index aee45b860b79..4da946cbfa29 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -45,4 +45,5 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size,
int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc);
int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos,
void *data, size_t len);
+int smc_ism_signal_shutdown(struct smc_link_group *lgr);
#endif
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 3c458d279855..c2694750a6a8 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -215,12 +215,14 @@ int smc_wr_tx_put_slot(struct smc_link *link,
pend = container_of(wr_pend_priv, struct smc_wr_tx_pend, priv);
if (pend->idx < link->wr_tx_cnt) {
+ u32 idx = pend->idx;
+
/* clear the full struct smc_wr_tx_pend including .priv */
memset(&link->wr_tx_pends[pend->idx], 0,
sizeof(link->wr_tx_pends[pend->idx]));
memset(&link->wr_tx_bufs[pend->idx], 0,
sizeof(link->wr_tx_bufs[pend->idx]));
- test_and_clear_bit(pend->idx, link->wr_tx_mask);
+ test_and_clear_bit(idx, link->wr_tx_mask);
return 1;
}
diff --git a/net/socket.c b/net/socket.c
index 593826e11a53..334fcc617ef2 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -853,7 +853,7 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
struct socket *sock = file->private_data;
if (unlikely(!sock->ops->splice_read))
- return -EINVAL;
+ return generic_file_splice_read(file, ppos, pipe, len, flags);
return sock->ops->splice_read(sock, ppos, pipe, len, flags);
}
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index d8831b988b1e..ab4a3be1542a 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -281,13 +281,7 @@ static bool generic_key_to_expire(struct rpc_cred *cred)
{
struct auth_cred *acred = &container_of(cred, struct generic_cred,
gc_base)->acred;
- bool ret;
-
- get_rpccred(cred);
- ret = test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
- put_rpccred(cred);
-
- return ret;
+ return test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
}
static const struct rpc_credops generic_credops = {
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 30f970cdc7f6..5d3f252659f1 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1239,36 +1239,59 @@ gss_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
return &gss_auth->rpc_auth;
}
+static struct gss_cred *
+gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
+{
+ struct gss_cred *new;
+
+ /* Make a copy of the cred so that we can reference count it */
+ new = kzalloc(sizeof(*gss_cred), GFP_NOIO);
+ if (new) {
+ struct auth_cred acred = {
+ .uid = gss_cred->gc_base.cr_uid,
+ };
+ struct gss_cl_ctx *ctx =
+ rcu_dereference_protected(gss_cred->gc_ctx, 1);
+
+ rpcauth_init_cred(&new->gc_base, &acred,
+ &gss_auth->rpc_auth,
+ &gss_nullops);
+ new->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
+ new->gc_service = gss_cred->gc_service;
+ new->gc_principal = gss_cred->gc_principal;
+ kref_get(&gss_auth->kref);
+ rcu_assign_pointer(new->gc_ctx, ctx);
+ gss_get_ctx(ctx);
+ }
+ return new;
+}
+
/*
- * gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call
+ * gss_send_destroy_context will cause the RPCSEC_GSS to send a NULL RPC call
* to the server with the GSS control procedure field set to
* RPC_GSS_PROC_DESTROY. This should normally cause the server to release
* all RPCSEC_GSS state associated with that context.
*/
-static int
-gss_destroying_context(struct rpc_cred *cred)
+static void
+gss_send_destroy_context(struct rpc_cred *cred)
{
struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1);
+ struct gss_cred *new;
struct rpc_task *task;
- if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
- return 0;
-
- ctx->gc_proc = RPC_GSS_PROC_DESTROY;
- cred->cr_ops = &gss_nullops;
-
- /* Take a reference to ensure the cred will be destroyed either
- * by the RPC call or by the put_rpccred() below */
- get_rpccred(cred);
+ new = gss_dup_cred(gss_auth, gss_cred);
+ if (new) {
+ ctx->gc_proc = RPC_GSS_PROC_DESTROY;
- task = rpc_call_null(gss_auth->client, cred, RPC_TASK_ASYNC|RPC_TASK_SOFT);
- if (!IS_ERR(task))
- rpc_put_task(task);
+ task = rpc_call_null(gss_auth->client, &new->gc_base,
+ RPC_TASK_ASYNC|RPC_TASK_SOFT);
+ if (!IS_ERR(task))
+ rpc_put_task(task);
- put_rpccred(cred);
- return 1;
+ put_rpccred(&new->gc_base);
+ }
}
/* gss_destroy_cred (and gss_free_ctx) are used to clean up after failure
@@ -1330,8 +1353,8 @@ static void
gss_destroy_cred(struct rpc_cred *cred)
{
- if (gss_destroying_context(cred))
- return;
+ if (test_and_clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0)
+ gss_send_destroy_context(cred);
gss_destroy_nullcred(cred);
}
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 2bbb8d38d2bf..f302c6eb8779 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -546,7 +546,7 @@ EXPORT_SYMBOL_GPL(xdr_commit_encode);
static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
size_t nbytes)
{
- static __be32 *p;
+ __be32 *p;
int space_left;
int frag1bytes, frag2bytes;
@@ -673,11 +673,10 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
WARN_ON_ONCE(xdr->iov);
return;
}
- if (fraglen) {
+ if (fraglen)
xdr->end = head->iov_base + head->iov_len;
- xdr->page_ptr--;
- }
/* (otherwise assume xdr->end is already set) */
+ xdr->page_ptr--;
head->iov_len = len;
buf->len = len;
xdr->p = head->iov_base + head->iov_len;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 2830709957bd..c138d68e8a69 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -166,7 +166,8 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
/* Apply trial address if we just left trial period */
if (!trial && !self) {
- tipc_net_finalize(net, tn->trial_addr);
+ tipc_sched_net_finalize(net, tn->trial_addr);
+ msg_set_prevnode(buf_msg(d->skb), tn->trial_addr);
msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
}
@@ -300,14 +301,12 @@ static void tipc_disc_timeout(struct timer_list *t)
goto exit;
}
- /* Trial period over ? */
- if (!time_before(jiffies, tn->addr_trial_end)) {
- /* Did we just leave it ? */
- if (!tipc_own_addr(net))
- tipc_net_finalize(net, tn->trial_addr);
-
- msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
- msg_set_prevnode(buf_msg(d->skb), tipc_own_addr(net));
+ /* Did we just leave trial period ? */
+ if (!time_before(jiffies, tn->addr_trial_end) && !tipc_own_addr(net)) {
+ mod_timer(&d->timer, jiffies + TIPC_DISC_INIT);
+ spin_unlock_bh(&d->lock);
+ tipc_sched_net_finalize(net, tn->trial_addr);
+ return;
}
/* Adjust timeout interval according to discovery phase */
@@ -319,6 +318,8 @@ static void tipc_disc_timeout(struct timer_list *t)
d->timer_intv = TIPC_DISC_SLOW;
else if (!d->num_nodes && d->timer_intv > TIPC_DISC_FAST)
d->timer_intv = TIPC_DISC_FAST;
+ msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
+ msg_set_prevnode(buf_msg(d->skb), tn->trial_addr);
}
mod_timer(&d->timer, jiffies + d->timer_intv);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 201c3b5bc96b..836727e363c4 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1594,14 +1594,17 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI))
l->priority = peers_prio;
- /* ACTIVATE_MSG serves as PEER_RESET if link is already down */
- if (msg_peer_stopping(hdr))
+ /* If peer is going down we want full re-establish cycle */
+ if (msg_peer_stopping(hdr)) {
rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
- else if ((mtyp == RESET_MSG) || !link_is_up(l))
+ break;
+ }
+ /* ACTIVATE_MSG serves as PEER_RESET if link is already down */
+ if (mtyp == RESET_MSG || !link_is_up(l))
rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
/* ACTIVATE_MSG takes up link if it was already locally reset */
- if ((mtyp == ACTIVATE_MSG) && (l->state == LINK_ESTABLISHING))
+ if (mtyp == ACTIVATE_MSG && l->state == LINK_ESTABLISHING)
rc = TIPC_LINK_UP_EVT;
l->peer_session = msg_session(hdr);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 62199cf5a56c..f076edb74338 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -104,6 +104,14 @@
* - A local spin_lock protecting the queue of subscriber events.
*/
+struct tipc_net_work {
+ struct work_struct work;
+ struct net *net;
+ u32 addr;
+};
+
+static void tipc_net_finalize(struct net *net, u32 addr);
+
int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
{
if (tipc_own_id(net)) {
@@ -119,17 +127,38 @@ int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
return 0;
}
-void tipc_net_finalize(struct net *net, u32 addr)
+static void tipc_net_finalize(struct net *net, u32 addr)
{
struct tipc_net *tn = tipc_net(net);
- if (!cmpxchg(&tn->node_addr, 0, addr)) {
- tipc_set_node_addr(net, addr);
- tipc_named_reinit(net);
- tipc_sk_reinit(net);
- tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
- TIPC_CLUSTER_SCOPE, 0, addr);
- }
+ if (cmpxchg(&tn->node_addr, 0, addr))
+ return;
+ tipc_set_node_addr(net, addr);
+ tipc_named_reinit(net);
+ tipc_sk_reinit(net);
+ tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
+ TIPC_CLUSTER_SCOPE, 0, addr);
+}
+
+static void tipc_net_finalize_work(struct work_struct *work)
+{
+ struct tipc_net_work *fwork;
+
+ fwork = container_of(work, struct tipc_net_work, work);
+ tipc_net_finalize(fwork->net, fwork->addr);
+ kfree(fwork);
+}
+
+void tipc_sched_net_finalize(struct net *net, u32 addr)
+{
+ struct tipc_net_work *fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC);
+
+ if (!fwork)
+ return;
+ INIT_WORK(&fwork->work, tipc_net_finalize_work);
+ fwork->net = net;
+ fwork->addr = addr;
+ schedule_work(&fwork->work);
}
void tipc_net_stop(struct net *net)
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 09ad02b50bb1..b7f2e364eb99 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -42,7 +42,7 @@
extern const struct nla_policy tipc_nl_net_policy[];
int tipc_net_init(struct net *net, u8 *node_id, u32 addr);
-void tipc_net_finalize(struct net *net, u32 addr);
+void tipc_sched_net_finalize(struct net *net, u32 addr);
void tipc_net_stop(struct net *net);
int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2afc4f8c37a7..488019766433 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -584,12 +584,15 @@ static void tipc_node_clear_links(struct tipc_node *node)
/* tipc_node_cleanup - delete nodes that does not
* have active links for NODE_CLEANUP_AFTER time
*/
-static int tipc_node_cleanup(struct tipc_node *peer)
+static bool tipc_node_cleanup(struct tipc_node *peer)
{
struct tipc_net *tn = tipc_net(peer->net);
bool deleted = false;
- spin_lock_bh(&tn->node_list_lock);
+ /* If lock held by tipc_node_stop() the node will be deleted anyway */
+ if (!spin_trylock_bh(&tn->node_list_lock))
+ return false;
+
tipc_node_write_lock(peer);
if (!node_is_up(peer) && time_after(jiffies, peer->delete_at)) {
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 636e6131769d..b57b1be7252b 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1555,16 +1555,17 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
/**
* tipc_sk_anc_data_recv - optionally capture ancillary data for received message
* @m: descriptor for message info
- * @msg: received message header
+ * @skb: received message buffer
* @tsk: TIPC port associated with message
*
* Note: Ancillary data is not captured if not requested by receiver.
*
* Returns 0 if successful, otherwise errno
*/
-static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
+static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb,
struct tipc_sock *tsk)
{
+ struct tipc_msg *msg;
u32 anc_data[3];
u32 err;
u32 dest_type;
@@ -1573,6 +1574,7 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
if (likely(m->msg_controllen == 0))
return 0;
+ msg = buf_msg(skb);
/* Optionally capture errored message object(s) */
err = msg ? msg_errcode(msg) : 0;
@@ -1583,6 +1585,9 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
if (res)
return res;
if (anc_data[1]) {
+ if (skb_linearize(skb))
+ return -ENOMEM;
+ msg = buf_msg(skb);
res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
msg_data(msg));
if (res)
@@ -1744,9 +1749,10 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
/* Collect msg meta data, including error code and rejected data */
tipc_sk_set_orig_addr(m, skb);
- rc = tipc_sk_anc_data_recv(m, hdr, tsk);
+ rc = tipc_sk_anc_data_recv(m, skb, tsk);
if (unlikely(rc))
goto exit;
+ hdr = buf_msg(skb);
/* Capture data if non-error msg, otherwise just set return value */
if (likely(!err)) {
@@ -1856,9 +1862,10 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
/* Collect msg meta data, incl. error code and rejected data */
if (!copied) {
tipc_sk_set_orig_addr(m, skb);
- rc = tipc_sk_anc_data_recv(m, hdr, tsk);
+ rc = tipc_sk_anc_data_recv(m, skb, tsk);
if (rc)
break;
+ hdr = buf_msg(skb);
}
/* Copy data if msg ok, otherwise return error/partial data */