aboutsummaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/bpf_sk_storage.c1
-rw-r--r--net/core/datagram.c14
-rw-r--r--net/core/dev.c82
-rw-r--r--net/core/dev_ioctl.c105
-rw-r--r--net/core/dst.c26
-rw-r--r--net/core/filter.c213
-rw-r--r--net/core/neighbour.c123
-rw-r--r--net/core/net-procfs.c18
-rw-r--r--net/core/netdev-genl-gen.c2
-rw-r--r--net/core/rtnetlink.c220
-rw-r--r--net/core/skbuff.c8
-rw-r--r--net/core/sock.c13
-rw-r--r--net/core/sock_map.c20
13 files changed, 597 insertions, 248 deletions
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index bb378c33f542..7a36353dbc22 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -324,6 +324,7 @@ const struct bpf_map_ops sk_storage_map_ops = {
.map_local_storage_charge = bpf_sk_storage_charge,
.map_local_storage_uncharge = bpf_sk_storage_uncharge,
.map_owner_storage_ptr = bpf_sk_storage_ptr,
+ .map_mem_usage = bpf_local_storage_map_mem_usage,
};
const struct bpf_func_proto bpf_sk_storage_get_proto = {
diff --git a/net/core/datagram.c b/net/core/datagram.c
index e4ff2db40c98..5662dff3d381 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -622,12 +622,12 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
frag = skb_shinfo(skb)->nr_frags;
while (length && iov_iter_count(from)) {
+ struct page *head, *last_head = NULL;
struct page *pages[MAX_SKB_FRAGS];
- struct page *last_head = NULL;
+ int refs, order, n = 0;
size_t start;
ssize_t copied;
unsigned long truesize;
- int refs, n = 0;
if (frag == MAX_SKB_FRAGS)
return -EMSGSIZE;
@@ -650,9 +650,17 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
} else {
refcount_add(truesize, &skb->sk->sk_wmem_alloc);
}
+
+ head = compound_head(pages[n]);
+ order = compound_order(head);
+
for (refs = 0; copied != 0; start = 0) {
int size = min_t(int, copied, PAGE_SIZE - start);
- struct page *head = compound_head(pages[n]);
+
+ if (pages[n] - head > (1UL << order) - 1) {
+ head = compound_head(pages[n]);
+ order = compound_order(head);
+ }
start += (pages[n] - head) << PAGE_SHIFT;
copied -= size;
diff --git a/net/core/dev.c b/net/core/dev.c
index 1488f700bf81..c7f13742b56c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -160,8 +160,6 @@ struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
struct list_head ptype_all __read_mostly; /* Taps */
static int netif_rx_internal(struct sk_buff *skb);
-static int call_netdevice_notifiers_info(unsigned long val,
- struct netdev_notifier_info *info);
static int call_netdevice_notifiers_extack(unsigned long val,
struct net_device *dev,
struct netlink_ext_ack *extack);
@@ -1919,8 +1917,8 @@ static void move_netdevice_notifiers_dev_net(struct net_device *dev,
* are as for raw_notifier_call_chain().
*/
-static int call_netdevice_notifiers_info(unsigned long val,
- struct netdev_notifier_info *info)
+int call_netdevice_notifiers_info(unsigned long val,
+ struct netdev_notifier_info *info)
{
struct net *net = dev_net(info->dev);
int ret;
@@ -2535,6 +2533,8 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
struct xps_map *map, *new_map;
unsigned int nr_ids;
+ WARN_ON_ONCE(index >= dev->num_tx_queues);
+
if (dev->num_tc) {
/* Do not allow XPS on subordinate device directly */
num_tc = dev->num_tc;
@@ -3075,7 +3075,7 @@ void __netif_schedule(struct Qdisc *q)
EXPORT_SYMBOL(__netif_schedule);
struct dev_kfree_skb_cb {
- enum skb_free_reason reason;
+ enum skb_drop_reason reason;
};
static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
@@ -3108,7 +3108,7 @@ void netif_tx_wake_queue(struct netdev_queue *dev_queue)
}
EXPORT_SYMBOL(netif_tx_wake_queue);
-void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
+void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum skb_drop_reason reason)
{
unsigned long flags;
@@ -3128,18 +3128,16 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_restore(flags);
}
-EXPORT_SYMBOL(__dev_kfree_skb_irq);
+EXPORT_SYMBOL(dev_kfree_skb_irq_reason);
-void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
+void dev_kfree_skb_any_reason(struct sk_buff *skb, enum skb_drop_reason reason)
{
if (in_hardirq() || irqs_disabled())
- __dev_kfree_skb_irq(skb, reason);
- else if (unlikely(reason == SKB_REASON_DROPPED))
- kfree_skb(skb);
+ dev_kfree_skb_irq_reason(skb, reason);
else
- consume_skb(skb);
+ kfree_skb_reason(skb, reason);
}
-EXPORT_SYMBOL(__dev_kfree_skb_any);
+EXPORT_SYMBOL(dev_kfree_skb_any_reason);
/**
@@ -3736,25 +3734,25 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
* we add to pkt_len the headers size of all segments
*/
if (shinfo->gso_size && skb_transport_header_was_set(skb)) {
- unsigned int hdr_len;
u16 gso_segs = shinfo->gso_segs;
+ unsigned int hdr_len;
/* mac layer + network layer */
- hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+ hdr_len = skb_transport_offset(skb);
/* + transport layer */
if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
const struct tcphdr *th;
struct tcphdr _tcphdr;
- th = skb_header_pointer(skb, skb_transport_offset(skb),
+ th = skb_header_pointer(skb, hdr_len,
sizeof(_tcphdr), &_tcphdr);
if (likely(th))
hdr_len += __tcp_hdrlen(th);
} else {
struct udphdr _udphdr;
- if (skb_header_pointer(skb, skb_transport_offset(skb),
+ if (skb_header_pointer(skb, hdr_len,
sizeof(_udphdr), &_udphdr))
hdr_len += sizeof(struct udphdr);
}
@@ -4361,7 +4359,11 @@ static inline void ____napi_schedule(struct softnet_data *sd,
}
list_add_tail(&napi->poll_list, &sd->poll_list);
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ /* If not called from net_rx_action()
+ * we have to raise NET_RX_SOFTIRQ.
+ */
+ if (!sd->in_net_rx_action)
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
#ifdef CONFIG_RPS
@@ -4583,11 +4585,16 @@ static void trigger_rx_softirq(void *data)
}
/*
- * Check if this softnet_data structure is another cpu one
- * If yes, queue it to our IPI list and return 1
- * If no, return 0
+ * After we queued a packet into sd->input_pkt_queue,
+ * we need to make sure this queue is serviced soon.
+ *
+ * - If this is another cpu queue, link it to our rps_ipi_list,
+ * and make sure we will process rps_ipi_list from net_rx_action().
+ *
+ * - If this is our own queue, NAPI schedule our backlog.
+ * Note that this also raises NET_RX_SOFTIRQ.
*/
-static int napi_schedule_rps(struct softnet_data *sd)
+static void napi_schedule_rps(struct softnet_data *sd)
{
struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
@@ -4596,12 +4603,15 @@ static int napi_schedule_rps(struct softnet_data *sd)
sd->rps_ipi_next = mysd->rps_ipi_list;
mysd->rps_ipi_list = sd;
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
- return 1;
+ /* If not called from net_rx_action()
+ * we have to raise NET_RX_SOFTIRQ.
+ */
+ if (!mysd->in_net_rx_action)
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ return;
}
#endif /* CONFIG_RPS */
__napi_schedule_irqoff(&mysd->backlog);
- return 0;
}
#ifdef CONFIG_NET_FLOW_LIMIT
@@ -5021,11 +5031,11 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
clist = clist->next;
WARN_ON(refcount_read(&skb->users));
- if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
+ if (likely(get_kfree_skb_cb(skb)->reason == SKB_CONSUMED))
trace_consume_skb(skb, net_tx_action);
else
trace_kfree_skb(skb, net_tx_action,
- SKB_DROP_REASON_NOT_SPECIFIED);
+ get_kfree_skb_cb(skb)->reason);
if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
__kfree_skb(skb);
@@ -6641,6 +6651,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
LIST_HEAD(list);
LIST_HEAD(repoll);
+start:
+ sd->in_net_rx_action = true;
local_irq_disable();
list_splice_init(&sd->poll_list, &list);
local_irq_enable();
@@ -6651,8 +6663,18 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
skb_defer_free_flush(sd);
if (list_empty(&list)) {
- if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
- goto end;
+ if (list_empty(&repoll)) {
+ sd->in_net_rx_action = false;
+ barrier();
+ /* We need to check if ____napi_schedule()
+ * had refilled poll_list while
+ * sd->in_net_rx_action was true.
+ */
+ if (!list_empty(&sd->poll_list))
+ goto start;
+ if (!sd_has_rps_ipi_waiting(sd))
+ goto end;
+ }
break;
}
@@ -6677,6 +6699,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
list_splice(&list, &sd->poll_list);
if (!list_empty(&sd->poll_list))
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ else
+ sd->in_net_rx_action = false;
net_rps_action_and_irq_enable(sd);
end:;
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 5cdbfbf9a7dc..3730945ee294 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -7,7 +7,7 @@
#include <linux/net_tstamp.h>
#include <linux/wireless.h>
#include <linux/if_bridge.h>
-#include <net/dsa.h>
+#include <net/dsa_stubs.h>
#include <net/wext.h>
#include "dev.h"
@@ -183,22 +183,18 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
return err;
}
-static int net_hwtstamp_validate(struct ifreq *ifr)
+static int net_hwtstamp_validate(const struct kernel_hwtstamp_config *cfg)
{
- struct hwtstamp_config cfg;
enum hwtstamp_tx_types tx_type;
enum hwtstamp_rx_filters rx_filter;
int tx_type_valid = 0;
int rx_filter_valid = 0;
- if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
- return -EFAULT;
-
- if (cfg.flags & ~HWTSTAMP_FLAG_MASK)
+ if (cfg->flags & ~HWTSTAMP_FLAG_MASK)
return -EINVAL;
- tx_type = cfg.tx_type;
- rx_filter = cfg.rx_filter;
+ tx_type = cfg->tx_type;
+ rx_filter = cfg->rx_filter;
switch (tx_type) {
case HWTSTAMP_TX_OFF:
@@ -246,20 +242,45 @@ static int dev_eth_ioctl(struct net_device *dev,
struct ifreq *ifr, unsigned int cmd)
{
const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!ops->ndo_eth_ioctl)
+ return -EOPNOTSUPP;
+
+ if (!netif_device_present(dev))
+ return -ENODEV;
+
+ return ops->ndo_eth_ioctl(dev, ifr, cmd);
+}
+
+static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr)
+{
+ return dev_eth_ioctl(dev, ifr, SIOCGHWTSTAMP);
+}
+
+static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr)
+{
+ struct kernel_hwtstamp_config kernel_cfg;
+ struct netlink_ext_ack extack = {};
+ struct hwtstamp_config cfg;
int err;
- err = dsa_ndo_eth_ioctl(dev, ifr, cmd);
- if (err == 0 || err != -EOPNOTSUPP)
+ if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+ return -EFAULT;
+
+ hwtstamp_config_to_kernel(&kernel_cfg, &cfg);
+
+ err = net_hwtstamp_validate(&kernel_cfg);
+ if (err)
return err;
- if (ops->ndo_eth_ioctl) {
- if (netif_device_present(dev))
- err = ops->ndo_eth_ioctl(dev, ifr, cmd);
- else
- err = -ENODEV;
+ err = dsa_master_hwtstamp_validate(dev, &kernel_cfg, &extack);
+ if (err) {
+ if (extack._msg)
+ netdev_err(dev, "%s\n", extack._msg);
+ return err;
}
- return err;
+ return dev_eth_ioctl(dev, ifr, SIOCSHWTSTAMP);
}
static int dev_siocbond(struct net_device *dev,
@@ -391,36 +412,31 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data,
rtnl_lock();
return err;
+ case SIOCDEVPRIVATE ... SIOCDEVPRIVATE + 15:
+ return dev_siocdevprivate(dev, ifr, data, cmd);
+
case SIOCSHWTSTAMP:
- err = net_hwtstamp_validate(ifr);
- if (err)
- return err;
- fallthrough;
+ return dev_set_hwtstamp(dev, ifr);
- /*
- * Unknown or private ioctl
- */
- default:
- if (cmd >= SIOCDEVPRIVATE &&
- cmd <= SIOCDEVPRIVATE + 15)
- return dev_siocdevprivate(dev, ifr, data, cmd);
-
- if (cmd == SIOCGMIIPHY ||
- cmd == SIOCGMIIREG ||
- cmd == SIOCSMIIREG ||
- cmd == SIOCSHWTSTAMP ||
- cmd == SIOCGHWTSTAMP) {
- err = dev_eth_ioctl(dev, ifr, cmd);
- } else if (cmd == SIOCBONDENSLAVE ||
- cmd == SIOCBONDRELEASE ||
- cmd == SIOCBONDSETHWADDR ||
- cmd == SIOCBONDSLAVEINFOQUERY ||
- cmd == SIOCBONDINFOQUERY ||
- cmd == SIOCBONDCHANGEACTIVE) {
- err = dev_siocbond(dev, ifr, cmd);
- } else
- err = -EINVAL;
+ case SIOCGHWTSTAMP:
+ return dev_get_hwtstamp(dev, ifr);
+ case SIOCGMIIPHY:
+ case SIOCGMIIREG:
+ case SIOCSMIIREG:
+ return dev_eth_ioctl(dev, ifr, cmd);
+
+ case SIOCBONDENSLAVE:
+ case SIOCBONDRELEASE:
+ case SIOCBONDSETHWADDR:
+ case SIOCBONDSLAVEINFOQUERY:
+ case SIOCBONDINFOQUERY:
+ case SIOCBONDCHANGEACTIVE:
+ return dev_siocbond(dev, ifr, cmd);
+
+ /* Unknown ioctl */
+ default:
+ err = -EINVAL;
}
return err;
}
@@ -462,6 +478,7 @@ EXPORT_SYMBOL(dev_load);
* @net: the applicable net namespace
* @cmd: command to issue
* @ifr: pointer to a struct ifreq in user space
+ * @data: data exchanged with userspace
* @need_copyout: whether or not copy_to_user() should be called
*
* Issue ioctl functions to devices. This is normally called by the
diff --git a/net/core/dst.c b/net/core/dst.c
index 31c08a3386d3..3247e84045ca 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -66,7 +66,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
dst->tclassid = 0;
#endif
dst->lwtstate = NULL;
- atomic_set(&dst->__refcnt, initial_ref);
+ rcuref_init(&dst->__rcuref, initial_ref);
dst->__use = 0;
dst->lastuse = jiffies;
dst->flags = flags;
@@ -162,31 +162,15 @@ EXPORT_SYMBOL(dst_dev_put);
void dst_release(struct dst_entry *dst)
{
- if (dst) {
- int newrefcnt;
-
- newrefcnt = atomic_dec_return(&dst->__refcnt);
- if (WARN_ONCE(newrefcnt < 0, "dst_release underflow"))
- net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
- __func__, dst, newrefcnt);
- if (!newrefcnt)
- call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu);
- }
+ if (dst && rcuref_put(&dst->__rcuref))
+ call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu);
}
EXPORT_SYMBOL(dst_release);
void dst_release_immediate(struct dst_entry *dst)
{
- if (dst) {
- int newrefcnt;
-
- newrefcnt = atomic_dec_return(&dst->__refcnt);
- if (WARN_ONCE(newrefcnt < 0, "dst_release_immediate underflow"))
- net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
- __func__, dst, newrefcnt);
- if (!newrefcnt)
- dst_destroy(dst);
- }
+ if (dst && rcuref_put(&dst->__rcuref))
+ dst_destroy(dst);
}
EXPORT_SYMBOL(dst_release_immediate);
diff --git a/net/core/filter.c b/net/core/filter.c
index 1d6f165923bf..a8c8fd96c822 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1721,6 +1721,12 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
.arg5_type = ARG_ANYTHING,
};
+int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from,
+ u32 len, u64 flags)
+{
+ return ____bpf_skb_store_bytes(skb, offset, from, len, flags);
+}
+
BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
void *, to, u32, len)
{
@@ -1751,6 +1757,11 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
.arg4_type = ARG_CONST_SIZE,
};
+int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
+{
+ return ____bpf_skb_load_bytes(skb, offset, to, len);
+}
+
BPF_CALL_4(bpf_flow_dissector_load_bytes,
const struct bpf_flow_dissector *, ctx, u32, offset,
void *, to, u32, len)
@@ -2193,7 +2204,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
return -ENOMEM;
}
- rcu_read_lock_bh();
+ rcu_read_lock();
if (!nh) {
dst = skb_dst(skb);
nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
@@ -2206,10 +2217,12 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
int ret;
sock_confirm_neigh(skb, neigh);
+ local_bh_disable();
dev_xmit_recursion_inc();
ret = neigh_output(neigh, skb, false);
dev_xmit_recursion_dec();
- rcu_read_unlock_bh();
+ local_bh_enable();
+ rcu_read_unlock();
return ret;
}
rcu_read_unlock_bh();
@@ -2291,7 +2304,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
return -ENOMEM;
}
- rcu_read_lock_bh();
+ rcu_read_lock();
if (!nh) {
struct dst_entry *dst = skb_dst(skb);
struct rtable *rt = container_of(dst, struct rtable, dst);
@@ -2303,7 +2316,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
} else if (nh->nh_family == AF_INET) {
neigh = ip_neigh_gw4(dev, nh->ipv4_nh);
} else {
- rcu_read_unlock_bh();
+ rcu_read_unlock();
goto out_drop;
}
@@ -2311,13 +2324,15 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
int ret;
sock_confirm_neigh(skb, neigh);
+ local_bh_disable();
dev_xmit_recursion_inc();
ret = neigh_output(neigh, skb, is_v6gw);
dev_xmit_recursion_dec();
- rcu_read_unlock_bh();
+ local_bh_enable();
+ rcu_read_unlock();
return ret;
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();
out_drop:
kfree_skb(skb);
return -ENETDOWN;
@@ -3828,7 +3843,7 @@ static const struct bpf_func_proto sk_skb_change_head_proto = {
.arg3_type = ARG_ANYTHING,
};
-BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp)
+BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp)
{
return xdp_get_buff_len(xdp);
}
@@ -3883,8 +3898,8 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
.arg2_type = ARG_ANYTHING,
};
-static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
- void *buf, unsigned long len, bool flush)
+void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
+ void *buf, unsigned long len, bool flush)
{
unsigned long ptr_len, ptr_off = 0;
skb_frag_t *next_frag, *end_frag;
@@ -3930,7 +3945,7 @@ static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
}
}
-static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
+void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
u32 size = xdp->data_end - xdp->data;
@@ -3988,6 +4003,11 @@ static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
.arg4_type = ARG_CONST_SIZE,
};
+int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len)
+{
+ return ____bpf_xdp_load_bytes(xdp, offset, buf, len);
+}
+
BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
void *, buf, u32, len)
{
@@ -4015,6 +4035,11 @@ static const struct bpf_func_proto bpf_xdp_store_bytes_proto = {
.arg4_type = ARG_CONST_SIZE,
};
+int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len)
+{
+ return ____bpf_xdp_store_bytes(xdp, offset, buf, len);
+}
+
static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
@@ -5850,7 +5875,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
else
neigh = __ipv6_neigh_lookup_noref_stub(dev, params->ipv6_dst);
- if (!neigh || !(neigh->nud_state & NUD_VALID))
+ if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID))
return BPF_FIB_LKUP_RET_NO_NEIGH;
memcpy(params->dmac, neigh->ha, ETH_ALEN);
memcpy(params->smac, dev->dev_addr, ETH_ALEN);
@@ -5971,7 +5996,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
* not needed here.
*/
neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
- if (!neigh || !(neigh->nud_state & NUD_VALID))
+ if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID))
return BPF_FIB_LKUP_RET_NO_NEIGH;
memcpy(params->dmac, neigh->ha, ETH_ALEN);
memcpy(params->smac, dev->dev_addr, ETH_ALEN);
@@ -8144,12 +8169,6 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_storage_delete_proto;
case BPF_FUNC_get_netns_cookie:
return &bpf_get_netns_cookie_sk_msg_proto;
-#ifdef CONFIG_CGROUPS
- case BPF_FUNC_get_current_cgroup_id:
- return &bpf_get_current_cgroup_id_proto;
- case BPF_FUNC_get_current_ancestor_cgroup_id:
- return &bpf_get_current_ancestor_cgroup_id_proto;
-#endif
#ifdef CONFIG_CGROUP_NET_CLASSID
case BPF_FUNC_get_cgroup_classid:
return &bpf_get_cgroup_classid_curr_proto;
@@ -9264,11 +9283,15 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
#endif
/* <store>: skb->tstamp = tstamp */
- *insn++ = BPF_STX_MEM(BPF_DW, skb_reg, value_reg,
- offsetof(struct sk_buff, tstamp));
+ *insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) | BPF_DW | BPF_MEM,
+ skb_reg, value_reg, offsetof(struct sk_buff, tstamp), si->imm);
return insn;
}
+#define BPF_EMIT_STORE(size, si, off) \
+ BPF_RAW_INSN(BPF_CLASS((si)->code) | (size) | BPF_MEM, \
+ (si)->dst_reg, (si)->src_reg, (off), (si)->imm)
+
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
@@ -9298,9 +9321,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, priority):
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- bpf_target_off(struct sk_buff, priority, 4,
- target_size));
+ *insn++ = BPF_EMIT_STORE(BPF_W, si,
+ bpf_target_off(struct sk_buff, priority, 4,
+ target_size));
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
bpf_target_off(struct sk_buff, priority, 4,
@@ -9331,9 +9354,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, mark):
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- bpf_target_off(struct sk_buff, mark, 4,
- target_size));
+ *insn++ = BPF_EMIT_STORE(BPF_W, si,
+ bpf_target_off(struct sk_buff, mark, 4,
+ target_size));
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
bpf_target_off(struct sk_buff, mark, 4,
@@ -9352,11 +9375,16 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, queue_mapping):
if (type == BPF_WRITE) {
- *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1);
- *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
- bpf_target_off(struct sk_buff,
- queue_mapping,
- 2, target_size));
+ u32 off = bpf_target_off(struct sk_buff, queue_mapping, 2, target_size);
+
+ if (BPF_CLASS(si->code) == BPF_ST && si->imm >= NO_QUEUE_MAPPING) {
+ *insn++ = BPF_JMP_A(0); /* noop */
+ break;
+ }
+
+ if (BPF_CLASS(si->code) == BPF_STX)
+ *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1);
+ *insn++ = BPF_EMIT_STORE(BPF_H, si, off);
} else {
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
bpf_target_off(struct sk_buff,
@@ -9392,8 +9420,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
off += offsetof(struct sk_buff, cb);
off += offsetof(struct qdisc_skb_cb, data);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
- si->src_reg, off);
+ *insn++ = BPF_EMIT_STORE(BPF_SIZE(si->code), si, off);
else
*insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
si->src_reg, off);
@@ -9408,8 +9435,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
off += offsetof(struct qdisc_skb_cb, tc_classid);
*target_size = 2;
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
- si->src_reg, off);
+ *insn++ = BPF_EMIT_STORE(BPF_H, si, off);
else
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg,
si->src_reg, off);
@@ -9442,9 +9468,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, tc_index):
#ifdef CONFIG_NET_SCHED
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
- bpf_target_off(struct sk_buff, tc_index, 2,
- target_size));
+ *insn++ = BPF_EMIT_STORE(BPF_H, si,
+ bpf_target_off(struct sk_buff, tc_index, 2,
+ target_size));
else
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
bpf_target_off(struct sk_buff, tc_index, 2,
@@ -9645,8 +9671,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
BUILD_BUG_ON(sizeof_field(struct sock, sk_bound_dev_if) != 4);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sock, sk_bound_dev_if));
+ *insn++ = BPF_EMIT_STORE(BPF_W, si,
+ offsetof(struct sock, sk_bound_dev_if));
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_bound_dev_if));
@@ -9656,8 +9682,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
BUILD_BUG_ON(sizeof_field(struct sock, sk_mark) != 4);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sock, sk_mark));
+ *insn++ = BPF_EMIT_STORE(BPF_W, si,
+ offsetof(struct sock, sk_mark));
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_mark));
@@ -9667,8 +9693,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
BUILD_BUG_ON(sizeof_field(struct sock, sk_priority) != 4);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sock, sk_priority));
+ *insn++ = BPF_EMIT_STORE(BPF_W, si,
+ offsetof(struct sock, sk_priority));
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_priority));
@@ -9933,10 +9959,12 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
offsetof(S, TF)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
si->dst_reg, offsetof(S, F)); \
- *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \
+ *insn++ = BPF_RAW_INSN(SIZE | BPF_MEM | BPF_CLASS(si->code), \
+ tmp_reg, si->src_reg, \
bpf_target_off(NS, NF, sizeof_field(NS, NF), \
target_size) \
- + OFF); \
+ + OFF, \
+ si->imm); \
*insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \
offsetof(S, TF)); \
} while (0)
@@ -10171,9 +10199,11 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
struct bpf_sock_ops_kern, sk),\
reg, si->dst_reg, \
offsetof(struct bpf_sock_ops_kern, sk));\
- *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \
- reg, si->src_reg, \
- offsetof(OBJ, OBJ_FIELD)); \
+ *insn++ = BPF_RAW_INSN(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD) | \
+ BPF_MEM | BPF_CLASS(si->code), \
+ reg, si->src_reg, \
+ offsetof(OBJ, OBJ_FIELD), \
+ si->imm); \
*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
offsetof(struct bpf_sock_ops_kern, \
temp)); \
@@ -10205,8 +10235,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
off -= offsetof(struct bpf_sock_ops, replylong[0]);
off += offsetof(struct bpf_sock_ops_kern, replylong[0]);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- off);
+ *insn++ = BPF_EMIT_STORE(BPF_W, si, off);
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
off);
@@ -10563,8 +10592,7 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
off += offsetof(struct sk_buff, cb);
off += offsetof(struct sk_skb_cb, data);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
- si->src_reg, off);
+ *insn++ = BPF_EMIT_STORE(BPF_SIZE(si->code), si, off);
else
*insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
si->src_reg, off);
@@ -11621,3 +11649,82 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
return func;
}
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in vmlinux BTF");
+__bpf_kfunc int bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags,
+ struct bpf_dynptr_kern *ptr__uninit)
+{
+ if (flags) {
+ bpf_dynptr_set_null(ptr__uninit);
+ return -EINVAL;
+ }
+
+ bpf_dynptr_init(ptr__uninit, skb, BPF_DYNPTR_TYPE_SKB, 0, skb->len);
+
+ return 0;
+}
+
+__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags,
+ struct bpf_dynptr_kern *ptr__uninit)
+{
+ if (flags) {
+ bpf_dynptr_set_null(ptr__uninit);
+ return -EINVAL;
+ }
+
+ bpf_dynptr_init(ptr__uninit, xdp, BPF_DYNPTR_TYPE_XDP, 0, xdp_get_buff_len(xdp));
+
+ return 0;
+}
+__diag_pop();
+
+int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
+ struct bpf_dynptr_kern *ptr__uninit)
+{
+ int err;
+
+ err = bpf_dynptr_from_skb(skb, flags, ptr__uninit);
+ if (err)
+ return err;
+
+ bpf_dynptr_set_rdonly(ptr__uninit);
+
+ return 0;
+}
+
+BTF_SET8_START(bpf_kfunc_check_set_skb)
+BTF_ID_FLAGS(func, bpf_dynptr_from_skb)
+BTF_SET8_END(bpf_kfunc_check_set_skb)
+
+BTF_SET8_START(bpf_kfunc_check_set_xdp)
+BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
+BTF_SET8_END(bpf_kfunc_check_set_xdp)
+
+static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
+ .owner = THIS_MODULE,
+ .set = &bpf_kfunc_check_set_skb,
+};
+
+static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = {
+ .owner = THIS_MODULE,
+ .set = &bpf_kfunc_check_set_xdp,
+};
+
+static int __init bpf_kfunc_init(void)
+{
+ int ret;
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_ACT, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SK_SKB, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SOCKET_FILTER, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_OUT, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_IN, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
+ return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
+}
+late_initcall(bpf_kfunc_init);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6798f6d2423b..ddd0f32de20e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -614,7 +614,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
NEIGH_CACHE_STAT_INC(tbl, lookups);
- rcu_read_lock_bh();
+ rcu_read_lock();
n = __neigh_lookup_noref(tbl, pkey, dev);
if (n) {
if (!refcount_inc_not_zero(&n->refcnt))
@@ -622,42 +622,11 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
NEIGH_CACHE_STAT_INC(tbl, hits);
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return n;
}
EXPORT_SYMBOL(neigh_lookup);
-struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
- const void *pkey)
-{
- struct neighbour *n;
- unsigned int key_len = tbl->key_len;
- u32 hash_val;
- struct neigh_hash_table *nht;
-
- NEIGH_CACHE_STAT_INC(tbl, lookups);
-
- rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
- hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
-
- for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
- n != NULL;
- n = rcu_dereference_bh(n->next)) {
- if (!memcmp(n->primary_key, pkey, key_len) &&
- net_eq(dev_net(n->dev), net)) {
- if (!refcount_inc_not_zero(&n->refcnt))
- n = NULL;
- NEIGH_CACHE_STAT_INC(tbl, hits);
- break;
- }
- }
-
- rcu_read_unlock_bh();
- return n;
-}
-EXPORT_SYMBOL(neigh_lookup_nodev);
-
static struct neighbour *
___neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev, u32 flags,
@@ -1124,13 +1093,13 @@ static void neigh_timer_handler(struct timer_list *t)
neigh->used +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
neigh_dbg(2, "neigh %p is delayed\n", neigh);
- neigh->nud_state = NUD_DELAY;
+ WRITE_ONCE(neigh->nud_state, NUD_DELAY);
neigh->updated = jiffies;
neigh_suspect(neigh);
next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
} else {
neigh_dbg(2, "neigh %p is suspected\n", neigh);
- neigh->nud_state = NUD_STALE;
+ WRITE_ONCE(neigh->nud_state, NUD_STALE);
neigh->updated = jiffies;
neigh_suspect(neigh);
notify = 1;
@@ -1140,14 +1109,14 @@ static void neigh_timer_handler(struct timer_list *t)
neigh->confirmed +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
neigh_dbg(2, "neigh %p is now reachable\n", neigh);
- neigh->nud_state = NUD_REACHABLE;
+ WRITE_ONCE(neigh->nud_state, NUD_REACHABLE);
neigh->updated = jiffies;
neigh_connect(neigh);
notify = 1;
next = neigh->confirmed + neigh->parms->reachable_time;
} else {
neigh_dbg(2, "neigh %p is probed\n", neigh);
- neigh->nud_state = NUD_PROBE;
+ WRITE_ONCE(neigh->nud_state, NUD_PROBE);
neigh->updated = jiffies;
atomic_set(&neigh->probes, 0);
notify = 1;
@@ -1161,7 +1130,7 @@ static void neigh_timer_handler(struct timer_list *t)
if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
- neigh->nud_state = NUD_FAILED;
+ WRITE_ONCE(neigh->nud_state, NUD_FAILED);
notify = 1;
neigh_invalidate(neigh);
goto out;
@@ -1210,7 +1179,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
atomic_set(&neigh->probes,
NEIGH_VAR(neigh->parms, UCAST_PROBES));
neigh_del_timer(neigh);
- neigh->nud_state = NUD_INCOMPLETE;
+ WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
neigh->updated = now;
if (!immediate_ok) {
next = now + 1;
@@ -1222,7 +1191,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
}
neigh_add_timer(neigh, next);
} else {
- neigh->nud_state = NUD_FAILED;
+ WRITE_ONCE(neigh->nud_state, NUD_FAILED);
neigh->updated = jiffies;
write_unlock_bh(&neigh->lock);
@@ -1232,7 +1201,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
} else if (neigh->nud_state & NUD_STALE) {
neigh_dbg(2, "neigh %p is delayed\n", neigh);
neigh_del_timer(neigh);
- neigh->nud_state = NUD_DELAY;
+ WRITE_ONCE(neigh->nud_state, NUD_DELAY);
neigh->updated = jiffies;
neigh_add_timer(neigh, jiffies +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
@@ -1344,7 +1313,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
new = old & ~NUD_PERMANENT;
- neigh->nud_state = new;
+ WRITE_ONCE(neigh->nud_state, new);
err = 0;
goto out;
}
@@ -1353,7 +1322,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
neigh_del_timer(neigh);
if (old & NUD_CONNECTED)
neigh_suspect(neigh);
- neigh->nud_state = new;
+ WRITE_ONCE(neigh->nud_state, new);
err = 0;
notify = old & NUD_VALID;
if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
@@ -1432,7 +1401,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
((new & NUD_REACHABLE) ?
neigh->parms->reachable_time :
0)));
- neigh->nud_state = new;
+ WRITE_ONCE(neigh->nud_state, new);
notify = 1;
}
@@ -1519,7 +1488,7 @@ void __neigh_set_probe_once(struct neighbour *neigh)
neigh->updated = jiffies;
if (!(neigh->nud_state & NUD_FAILED))
return;
- neigh->nud_state = NUD_INCOMPLETE;
+ WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
atomic_set(&neigh->probes, neigh_max_probes(neigh));
neigh_add_timer(neigh,
jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
@@ -2215,11 +2184,11 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
.ndtc_proxy_qlen = tbl->proxy_queue.qlen,
};
- rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
+ rcu_read_lock();
+ nht = rcu_dereference(tbl->nht);
ndc.ndtc_hash_rnd = nht->hash_rnd[0];
ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
- rcu_read_unlock_bh();
+ rcu_read_unlock();
if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
goto nla_put_failure;
@@ -2734,15 +2703,15 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (filter->dev_idx || filter->master_idx)
flags |= NLM_F_DUMP_FILTERED;
- rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
+ rcu_read_lock();
+ nht = rcu_dereference(tbl->nht);
for (h = s_h; h < (1 << nht->hash_shift); h++) {
if (h > s_h)
s_idx = 0;
- for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
+ for (n = rcu_dereference(nht->hash_buckets[h]), idx = 0;
n != NULL;
- n = rcu_dereference_bh(n->next)) {
+ n = rcu_dereference(n->next)) {
if (idx < s_idx || !net_eq(dev_net(n->dev), net))
goto next;
if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
@@ -2761,7 +2730,7 @@ next:
}
rc = skb->len;
out:
- rcu_read_unlock_bh();
+ rcu_read_unlock();
cb->args[1] = h;
cb->args[2] = idx;
return rc;
@@ -3106,20 +3075,20 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void
int chain;
struct neigh_hash_table *nht;
- rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
+ rcu_read_lock();
+ nht = rcu_dereference(tbl->nht);
- read_lock(&tbl->lock); /* avoid resizes */
+ read_lock_bh(&tbl->lock); /* avoid resizes */
for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
struct neighbour *n;
- for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
+ for (n = rcu_dereference(nht->hash_buckets[chain]);
n != NULL;
- n = rcu_dereference_bh(n->next))
+ n = rcu_dereference(n->next))
cb(n, cookie);
}
- read_unlock(&tbl->lock);
- rcu_read_unlock_bh();
+ read_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
}
EXPORT_SYMBOL(neigh_for_each);
@@ -3169,7 +3138,7 @@ int neigh_xmit(int index, struct net_device *dev,
tbl = neigh_tables[index];
if (!tbl)
goto out;
- rcu_read_lock_bh();
+ rcu_read_lock();
if (index == NEIGH_ARP_TABLE) {
u32 key = *((u32 *)addr);
@@ -3181,11 +3150,11 @@ int neigh_xmit(int index, struct net_device *dev,
neigh = __neigh_create(tbl, addr, dev, false);
err = PTR_ERR(neigh);
if (IS_ERR(neigh)) {
- rcu_read_unlock_bh();
+ rcu_read_unlock();
goto out_kfree_skb;
}
err = neigh->output(neigh, skb);
- rcu_read_unlock_bh();
+ rcu_read_unlock();
}
else if (index == NEIGH_LINK_TABLE) {
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
@@ -3214,7 +3183,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
- n = rcu_dereference_bh(nht->hash_buckets[bucket]);
+ n = rcu_dereference(nht->hash_buckets[bucket]);
while (n) {
if (!net_eq(dev_net(n->dev), net))
@@ -3229,10 +3198,10 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
}
if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
break;
- if (n->nud_state & ~NUD_NOARP)
+ if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
break;
next:
- n = rcu_dereference_bh(n->next);
+ n = rcu_dereference(n->next);
}
if (n)
@@ -3256,7 +3225,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
if (v)
return n;
}
- n = rcu_dereference_bh(n->next);
+ n = rcu_dereference(n->next);
while (1) {
while (n) {
@@ -3271,10 +3240,10 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
break;
- if (n->nud_state & ~NUD_NOARP)
+ if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
break;
next:
- n = rcu_dereference_bh(n->next);
+ n = rcu_dereference(n->next);
}
if (n)
@@ -3283,7 +3252,7 @@ next:
if (++state->bucket >= (1 << nht->hash_shift))
break;
- n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
+ n = rcu_dereference(nht->hash_buckets[state->bucket]);
}
if (n && pos)
@@ -3385,7 +3354,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
__acquires(tbl->lock)
- __acquires(rcu_bh)
+ __acquires(rcu)
{
struct neigh_seq_state *state = seq->private;
@@ -3393,9 +3362,9 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl
state->bucket = 0;
state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
- rcu_read_lock_bh();
- state->nht = rcu_dereference_bh(tbl->nht);
- read_lock(&tbl->lock);
+ rcu_read_lock();
+ state->nht = rcu_dereference(tbl->nht);
+ read_lock_bh(&tbl->lock);
return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
}
@@ -3430,13 +3399,13 @@ EXPORT_SYMBOL(neigh_seq_next);
void neigh_seq_stop(struct seq_file *seq, void *v)
__releases(tbl->lock)
- __releases(rcu_bh)
+ __releases(rcu)
{
struct neigh_seq_state *state = seq->private;
struct neigh_table *tbl = state->tbl;
- read_unlock(&tbl->lock);
- rcu_read_unlock_bh();
+ read_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
}
EXPORT_SYMBOL(neigh_seq_stop);
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 1ec23bf8b05c..09f7ed1a04e8 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -115,10 +115,14 @@ static int dev_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static u32 softnet_backlog_len(struct softnet_data *sd)
+static u32 softnet_input_pkt_queue_len(struct softnet_data *sd)
{
- return skb_queue_len_lockless(&sd->input_pkt_queue) +
- skb_queue_len_lockless(&sd->process_queue);
+ return skb_queue_len_lockless(&sd->input_pkt_queue);
+}
+
+static u32 softnet_process_queue_len(struct softnet_data *sd)
+{
+ return skb_queue_len_lockless(&sd->process_queue);
}
static struct softnet_data *softnet_get_online(loff_t *pos)
@@ -152,6 +156,8 @@ static void softnet_seq_stop(struct seq_file *seq, void *v)
static int softnet_seq_show(struct seq_file *seq, void *v)
{
struct softnet_data *sd = v;
+ u32 input_qlen = softnet_input_pkt_queue_len(sd);
+ u32 process_qlen = softnet_process_queue_len(sd);
unsigned int flow_limit_count = 0;
#ifdef CONFIG_NET_FLOW_LIMIT
@@ -169,12 +175,14 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
* mapping the data a specific CPU
*/
seq_printf(seq,
- "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x "
+ "%08x %08x\n",
sd->processed, sd->dropped, sd->time_squeeze, 0,
0, 0, 0, 0, /* was fastroute */
0, /* was cpu_collision */
sd->received_rps, flow_limit_count,
- softnet_backlog_len(sd), (int)seq->index);
+ input_qlen + process_qlen, (int)seq->index,
+ input_qlen, process_qlen);
return 0;
}
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index 3abab70d66dd..de17ca2f7dbf 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -16,7 +16,7 @@ static const struct nla_policy netdev_dev_get_nl_policy[NETDEV_A_DEV_IFINDEX + 1
};
/* Ops table for netdev */
-static const struct genl_split_ops netdev_nl_ops[2] = {
+static const struct genl_split_ops netdev_nl_ops[] = {
{
.cmd = NETDEV_CMD_DEV_GET,
.doit = netdev_nl_dev_get_doit,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 6e44e92ebdf5..e844d75220fb 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -54,6 +54,9 @@
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
#include <net/devlink.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/addrconf.h>
+#endif
#include "dev.h"
@@ -840,7 +843,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
if (dst) {
ci.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse);
ci.rta_used = dst->__use;
- ci.rta_clntref = atomic_read(&dst->__refcnt);
+ ci.rta_clntref = rcuref_read(&dst->__rcuref);
}
if (expires) {
unsigned long clock;
@@ -6070,6 +6073,217 @@ static int rtnl_stats_set(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
}
+static int rtnl_mdb_valid_dump_req(const struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct br_port_msg *bpm;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*bpm))) {
+ NL_SET_ERR_MSG(extack, "Invalid header for mdb dump request");
+ return -EINVAL;
+ }
+
+ bpm = nlmsg_data(nlh);
+ if (bpm->ifindex) {
+ NL_SET_ERR_MSG(extack, "Filtering by device index is not supported for mdb dump request");
+ return -EINVAL;
+ }
+ if (nlmsg_attrlen(nlh, sizeof(*bpm))) {
+ NL_SET_ERR_MSG(extack, "Invalid data after header in mdb dump request");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+struct rtnl_mdb_dump_ctx {
+ long idx;
+};
+
+static int rtnl_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct rtnl_mdb_dump_ctx *ctx = (void *)cb->ctx;
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ int idx, s_idx;
+ int err;
+
+ NL_ASSERT_DUMP_CTX_FITS(struct rtnl_mdb_dump_ctx);
+
+ if (cb->strict_check) {
+ err = rtnl_mdb_valid_dump_req(cb->nlh, cb->extack);
+ if (err)
+ return err;
+ }
+
+ s_idx = ctx->idx;
+ idx = 0;
+
+ for_each_netdev(net, dev) {
+ if (idx < s_idx)
+ goto skip;
+ if (!dev->netdev_ops->ndo_mdb_dump)
+ goto skip;
+
+ err = dev->netdev_ops->ndo_mdb_dump(dev, skb, cb);
+ if (err == -EMSGSIZE)
+ goto out;
+ /* Moving on to next device, reset markers and sequence
+ * counters since they are all maintained per-device.
+ */
+ memset(cb->ctx, 0, sizeof(cb->ctx));
+ cb->prev_seq = 0;
+ cb->seq = 0;
+skip:
+ idx++;
+ }
+
+out:
+ ctx->idx = idx;
+ return skb->len;
+}
+
+static int rtnl_validate_mdb_entry(const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct br_mdb_entry *entry = nla_data(attr);
+
+ if (nla_len(attr) != sizeof(struct br_mdb_entry)) {
+ NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length");
+ return -EINVAL;
+ }
+
+ if (entry->ifindex == 0) {
+ NL_SET_ERR_MSG(extack, "Zero entry ifindex is not allowed");
+ return -EINVAL;
+ }
+
+ if (entry->addr.proto == htons(ETH_P_IP)) {
+ if (!ipv4_is_multicast(entry->addr.u.ip4) &&
+ !ipv4_is_zeronet(entry->addr.u.ip4)) {
+ NL_SET_ERR_MSG(extack, "IPv4 entry group address is not multicast or 0.0.0.0");
+ return -EINVAL;
+ }
+ if (ipv4_is_local_multicast(entry->addr.u.ip4)) {
+ NL_SET_ERR_MSG(extack, "IPv4 entry group address is local multicast");
+ return -EINVAL;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (entry->addr.proto == htons(ETH_P_IPV6)) {
+ if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6)) {
+ NL_SET_ERR_MSG(extack, "IPv6 entry group address is link-local all nodes");
+ return -EINVAL;
+ }
+#endif
+ } else if (entry->addr.proto == 0) {
+ /* L2 mdb */
+ if (!is_multicast_ether_addr(entry->addr.u.mac_addr)) {
+ NL_SET_ERR_MSG(extack, "L2 entry group is not multicast");
+ return -EINVAL;
+ }
+ } else {
+ NL_SET_ERR_MSG(extack, "Unknown entry protocol");
+ return -EINVAL;
+ }
+
+ if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) {
+ NL_SET_ERR_MSG(extack, "Unknown entry state");
+ return -EINVAL;
+ }
+ if (entry->vid >= VLAN_VID_MASK) {
+ NL_SET_ERR_MSG(extack, "Invalid entry VLAN id");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct nla_policy mdba_policy[MDBA_SET_ENTRY_MAX + 1] = {
+ [MDBA_SET_ENTRY_UNSPEC] = { .strict_start_type = MDBA_SET_ENTRY_ATTRS + 1 },
+ [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY,
+ rtnl_validate_mdb_entry,
+ sizeof(struct br_mdb_entry)),
+ [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED },
+};
+
+static int rtnl_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ struct br_port_msg *bpm;
+ struct net_device *dev;
+ int err;
+
+ err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb,
+ MDBA_SET_ENTRY_MAX, mdba_policy, extack);
+ if (err)
+ return err;
+
+ bpm = nlmsg_data(nlh);
+ if (!bpm->ifindex) {
+ NL_SET_ERR_MSG(extack, "Invalid ifindex");
+ return -EINVAL;
+ }
+
+ dev = __dev_get_by_index(net, bpm->ifindex);
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Device doesn't exist");
+ return -ENODEV;
+ }
+
+ if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) {
+ NL_SET_ERR_MSG(extack, "Missing MDBA_SET_ENTRY attribute");
+ return -EINVAL;
+ }
+
+ if (!dev->netdev_ops->ndo_mdb_add) {
+ NL_SET_ERR_MSG(extack, "Device does not support MDB operations");
+ return -EOPNOTSUPP;
+ }
+
+ return dev->netdev_ops->ndo_mdb_add(dev, tb, nlh->nlmsg_flags, extack);
+}
+
+static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ struct br_port_msg *bpm;
+ struct net_device *dev;
+ int err;
+
+ err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb,
+ MDBA_SET_ENTRY_MAX, mdba_policy, extack);
+ if (err)
+ return err;
+
+ bpm = nlmsg_data(nlh);
+ if (!bpm->ifindex) {
+ NL_SET_ERR_MSG(extack, "Invalid ifindex");
+ return -EINVAL;
+ }
+
+ dev = __dev_get_by_index(net, bpm->ifindex);
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Device doesn't exist");
+ return -ENODEV;
+ }
+
+ if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) {
+ NL_SET_ERR_MSG(extack, "Missing MDBA_SET_ENTRY attribute");
+ return -EINVAL;
+ }
+
+ if (!dev->netdev_ops->ndo_mdb_del) {
+ NL_SET_ERR_MSG(extack, "Device does not support MDB operations");
+ return -EOPNOTSUPP;
+ }
+
+ return dev->netdev_ops->ndo_mdb_del(dev, tb, extack);
+}
+
/* Process one rtnetlink message. */
static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -6304,4 +6518,8 @@ void __init rtnetlink_init(void)
rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump,
0);
rtnl_register(PF_UNSPEC, RTM_SETSTATS, rtnl_stats_set, NULL, 0);
+
+ rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, rtnl_mdb_dump, 0);
+ rtnl_register(PF_BRIDGE, RTM_NEWMDB, rtnl_mdb_add, NULL, 0);
+ rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL, 0);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4c0879798eb8..78238a13dbcf 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -420,10 +420,9 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
{
struct sk_buff *skb = __build_skb(data, frag_size);
- if (skb && frag_size) {
+ if (likely(skb && frag_size)) {
skb->head_frag = 1;
- if (page_is_pfmemalloc(virt_to_head_page(data)))
- skb->pfmemalloc = 1;
+ skb_propagate_pfmemalloc(virt_to_head_page(data), skb);
}
return skb;
}
@@ -445,8 +444,7 @@ struct sk_buff *build_skb_around(struct sk_buff *skb,
if (frag_size) {
skb->head_frag = 1;
- if (page_is_pfmemalloc(virt_to_head_page(data)))
- skb->pfmemalloc = 1;
+ skb_propagate_pfmemalloc(virt_to_head_page(data), skb);
}
return skb;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index c25888795390..5440e67bcfe3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1396,15 +1396,10 @@ set_sndbuf:
#ifdef CONFIG_NET_RX_BUSY_POLL
case SO_BUSY_POLL:
- /* allow unprivileged users to decrease the value */
- if ((val > sk->sk_ll_usec) && !sockopt_capable(CAP_NET_ADMIN))
- ret = -EPERM;
- else {
- if (val < 0)
- ret = -EINVAL;
- else
- WRITE_ONCE(sk->sk_ll_usec, val);
- }
+ if (val < 0)
+ ret = -EINVAL;
+ else
+ WRITE_ONCE(sk->sk_ll_usec, val);
break;
case SO_PREFER_BUSY_POLL:
if (valbool && !sockopt_capable(CAP_NET_ADMIN))
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index a68a7290a3b2..9b854e236d23 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -797,6 +797,14 @@ static void sock_map_fini_seq_private(void *priv_data)
bpf_map_put_with_uref(info->map);
}
+static u64 sock_map_mem_usage(const struct bpf_map *map)
+{
+ u64 usage = sizeof(struct bpf_stab);
+
+ usage += (u64)map->max_entries * sizeof(struct sock *);
+ return usage;
+}
+
static const struct bpf_iter_seq_info sock_map_iter_seq_info = {
.seq_ops = &sock_map_seq_ops,
.init_seq_private = sock_map_init_seq_private,
@@ -816,6 +824,7 @@ const struct bpf_map_ops sock_map_ops = {
.map_lookup_elem = sock_map_lookup,
.map_release_uref = sock_map_release_progs,
.map_check_btf = map_check_no_btf,
+ .map_mem_usage = sock_map_mem_usage,
.map_btf_id = &sock_map_btf_ids[0],
.iter_seq_info = &sock_map_iter_seq_info,
};
@@ -1397,6 +1406,16 @@ static void sock_hash_fini_seq_private(void *priv_data)
bpf_map_put_with_uref(info->map);
}
+static u64 sock_hash_mem_usage(const struct bpf_map *map)
+{
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
+ u64 usage = sizeof(*htab);
+
+ usage += htab->buckets_num * sizeof(struct bpf_shtab_bucket);
+ usage += atomic_read(&htab->count) * (u64)htab->elem_size;
+ return usage;
+}
+
static const struct bpf_iter_seq_info sock_hash_iter_seq_info = {
.seq_ops = &sock_hash_seq_ops,
.init_seq_private = sock_hash_init_seq_private,
@@ -1416,6 +1435,7 @@ const struct bpf_map_ops sock_hash_ops = {
.map_lookup_elem_sys_only = sock_hash_lookup_sys,
.map_release_uref = sock_hash_release_progs,
.map_check_btf = map_check_no_btf,
+ .map_mem_usage = sock_hash_mem_usage,
.map_btf_id = &sock_hash_map_btf_ids[0],
.iter_seq_info = &sock_hash_iter_seq_info,
};