aboutsummaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/9p/9p.h6
-rw-r--r--include/net/Space.h7
-rw-r--r--include/net/addrconf.h2
-rw-r--r--include/net/af_rxrpc.h24
-rw-r--r--include/net/af_unix.h6
-rw-r--r--include/net/af_vsock.h19
-rw-r--r--include/net/arp.h8
-rw-r--r--include/net/ax25.h5
-rw-r--r--include/net/bluetooth/bluetooth.h54
-rw-r--r--include/net/bluetooth/coredump.h116
-rw-r--r--include/net/bluetooth/hci.h48
-rw-r--r--include/net/bluetooth/hci_core.h197
-rw-r--r--include/net/bluetooth/hci_sync.h7
-rw-r--r--include/net/bluetooth/l2cap.h2
-rw-r--r--include/net/bluetooth/mgmt.h83
-rw-r--r--include/net/bluetooth/sco.h2
-rw-r--r--include/net/bond_3ad.h1
-rw-r--r--include/net/bonding.h21
-rw-r--r--include/net/busy_poll.h1
-rw-r--r--include/net/caif/cfsrvl.h3
-rw-r--r--include/net/cfg80211.h276
-rw-r--r--include/net/cfg802154.h6
-rw-r--r--include/net/codel.h4
-rw-r--r--include/net/datalink.h2
-rw-r--r--include/net/devlink.h301
-rw-r--r--include/net/dropreason-core.h376
-rw-r--r--include/net/dropreason.h376
-rw-r--r--include/net/dsa.h80
-rw-r--r--include/net/dsa_stubs.h48
-rw-r--r--include/net/dst.h30
-rw-r--r--include/net/dst_ops.h2
-rw-r--r--include/net/flow.h3
-rw-r--r--include/net/flow_dissector.h75
-rw-r--r--include/net/flow_offload.h6
-rw-r--r--include/net/fou.h2
-rw-r--r--include/net/fq.h5
-rw-r--r--include/net/genetlink.h76
-rw-r--r--include/net/gro.h70
-rw-r--r--include/net/gso.h109
-rw-r--r--include/net/handshake.h49
-rw-r--r--include/net/ieee80211_radiotap.h220
-rw-r--r--include/net/ieee802154_netdev.h20
-rw-r--r--include/net/ila.h16
-rw-r--r--include/net/inet6_hashtables.h81
-rw-r--r--include/net/inet_common.h7
-rw-r--r--include/net/inet_connection_sock.h7
-rw-r--r--include/net/inet_frag.h4
-rw-r--r--include/net/inet_hashtables.h76
-rw-r--r--include/net/inet_sock.h106
-rw-r--r--include/net/ip.h39
-rw-r--r--include/net/ip6_fib.h76
-rw-r--r--include/net/ip6_route.h4
-rw-r--r--include/net/ip_tunnels.h39
-rw-r--r--include/net/ip_vs.h32
-rw-r--r--include/net/ipv6.h14
-rw-r--r--include/net/iw_handler.h11
-rw-r--r--include/net/kcm.h2
-rw-r--r--include/net/llc_c_ac.h1
-rw-r--r--include/net/llc_c_ev.h1
-rw-r--r--include/net/llc_conn.h2
-rw-r--r--include/net/llc_pdu.h6
-rw-r--r--include/net/lwtunnel.h5
-rw-r--r--include/net/mac80211.h289
-rw-r--r--include/net/macsec.h12
-rw-r--r--include/net/mana/gdma.h24
-rw-r--r--include/net/mana/hw_channel.h5
-rw-r--r--include/net/mana/mana.h139
-rw-r--r--include/net/mctp.h4
-rw-r--r--include/net/mptcp.h21
-rw-r--r--include/net/ndisc.h15
-rw-r--r--include/net/neighbour.h14
-rw-r--r--include/net/net_namespace.h14
-rw-r--r--include/net/netdev_queues.h173
-rw-r--r--include/net/netdev_rx_queue.h53
-rw-r--r--include/net/netfilter/nf_bpf_link.h15
-rw-r--r--include/net/netfilter/nf_conntrack.h4
-rw-r--r--include/net/netfilter/nf_conntrack_acct.h2
-rw-r--r--include/net/netfilter/nf_conntrack_core.h6
-rw-r--r--include/net/netfilter/nf_conntrack_expect.h20
-rw-r--r--include/net/netfilter/nf_conntrack_helper.h3
-rw-r--r--include/net/netfilter/nf_conntrack_labels.h1
-rw-r--r--include/net/netfilter/nf_conntrack_tuple.h3
-rw-r--r--include/net/netfilter/nf_flow_table.h6
-rw-r--r--include/net/netfilter/nf_nat_redirect.h3
-rw-r--r--include/net/netfilter/nf_tables.h230
-rw-r--r--include/net/netlink.h10
-rw-r--r--include/net/netns/ipv4.h4
-rw-r--r--include/net/netns/ipv6.h3
-rw-r--r--include/net/netns/nftables.h2
-rw-r--r--include/net/nexthop.h23
-rw-r--r--include/net/nsh.h2
-rw-r--r--include/net/p8022.h3
-rw-r--r--include/net/page_pool.h414
-rw-r--r--include/net/page_pool/helpers.h238
-rw-r--r--include/net/page_pool/types.h236
-rw-r--r--include/net/phonet/phonet.h21
-rw-r--r--include/net/pie.h2
-rw-r--r--include/net/ping.h6
-rw-r--r--include/net/pkt_cls.h15
-rw-r--r--include/net/pkt_sched.h62
-rw-r--r--include/net/raw.h7
-rw-r--r--include/net/rawv6.h2
-rw-r--r--include/net/regulatory.h13
-rw-r--r--include/net/route.h21
-rw-r--r--include/net/rpl.h6
-rw-r--r--include/net/rsi_91x.h2
-rw-r--r--include/net/rtnetlink.h4
-rw-r--r--include/net/sch_generic.h56
-rw-r--r--include/net/scm.h81
-rw-r--r--include/net/sctp/sctp.h14
-rw-r--r--include/net/sctp/sm.h3
-rw-r--r--include/net/sctp/stream_sched.h2
-rw-r--r--include/net/sctp/structs.h13
-rw-r--r--include/net/smc.h1
-rw-r--r--include/net/sock.h60
-rw-r--r--include/net/switchdev.h12
-rw-r--r--include/net/tcp.h104
-rw-r--r--include/net/tcx.h206
-rw-r--r--include/net/tls.h23
-rw-r--r--include/net/tls_prot.h68
-rw-r--r--include/net/transp_v6.h2
-rw-r--r--include/net/udp.h9
-rw-r--r--include/net/udplite.h2
-rw-r--r--include/net/vxlan.h46
-rw-r--r--include/net/x25.h5
-rw-r--r--include/net/xdp.h58
-rw-r--r--include/net/xdp_sock.h8
-rw-r--r--include/net/xdp_sock_drv.h52
-rw-r--r--include/net/xfrm.h7
-rw-r--r--include/net/xsk_buff_pool.h14
130 files changed, 4422 insertions, 1853 deletions
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 429adf6be29c..60cad0d200a4 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -42,6 +42,8 @@ enum p9_debug_flags {
P9_DEBUG_PKT = (1<<10),
P9_DEBUG_FSC = (1<<11),
P9_DEBUG_VPKT = (1<<12),
+ P9_DEBUG_CACHE = (1<<13),
+ P9_DEBUG_MMAP = (1<<14),
};
#ifdef CONFIG_NET_9P_DEBUG
@@ -213,6 +215,10 @@ enum p9_open_mode_t {
P9_ORCLOSE = 0x40,
P9_OAPPEND = 0x80,
P9_OEXCL = 0x1000,
+ P9L_MODE_MASK = 0x1FFF, /* don't send anything under this to server */
+ P9L_DIRECT = 0x2000, /* cache disabled */
+ P9L_NOWRITECACHE = 0x4000, /* no write caching */
+ P9L_LOOSE = 0x8000, /* loose cache */
};
/**
diff --git a/include/net/Space.h b/include/net/Space.h
index 08ca9cef0213..c29f3d51c078 100644
--- a/include/net/Space.h
+++ b/include/net/Space.h
@@ -3,18 +3,11 @@
* ethernet adaptor have the name "eth[0123...]".
*/
-struct net_device *hp100_probe(int unit);
struct net_device *ultra_probe(int unit);
struct net_device *wd_probe(int unit);
struct net_device *ne_probe(int unit);
-struct net_device *fmv18x_probe(int unit);
-struct net_device *ni65_probe(int unit);
-struct net_device *sonic_probe(int unit);
struct net_device *smc_init(int unit);
struct net_device *cs89x0_probe(int unit);
struct net_device *tc515_probe(int unit);
struct net_device *lance_probe(int unit);
struct net_device *cops_probe(int unit);
-
-/* Fibre Channel adapters */
-int iph5526_probe(struct net_device *dev);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index c04f359655b8..82da55101b5a 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -223,7 +223,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
const struct in6_addr *addr);
void __ipv6_sock_mc_close(struct sock *sk);
void ipv6_sock_mc_close(struct sock *sk);
-bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
+bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr,
const struct in6_addr *src_addr);
int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr);
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index ba717eac0229..5531dd08061e 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -40,16 +40,17 @@ typedef void (*rxrpc_user_attach_call_t)(struct rxrpc_call *, unsigned long);
void rxrpc_kernel_new_call_notification(struct socket *,
rxrpc_notify_new_call_t,
rxrpc_discard_new_call_t);
-struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
- struct sockaddr_rxrpc *,
- struct key *,
- unsigned long,
- s64,
- gfp_t,
- rxrpc_notify_rx_t,
- bool,
- enum rxrpc_interruptibility,
- unsigned int);
+struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
+ struct sockaddr_rxrpc *srx,
+ struct key *key,
+ unsigned long user_call_ID,
+ s64 tx_total_len,
+ u32 hard_timeout,
+ gfp_t gfp,
+ rxrpc_notify_rx_t notify_rx,
+ bool upgrade,
+ enum rxrpc_interruptibility interruptibility,
+ unsigned int debug_id);
int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
struct msghdr *, size_t,
rxrpc_notify_end_tx_t);
@@ -57,7 +58,8 @@ int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *,
struct iov_iter *, size_t *, bool, u32 *, u16 *);
bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
u32, int, enum rxrpc_abort_reason);
-void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
+void rxrpc_kernel_shutdown_call(struct socket *sock, struct rxrpc_call *call);
+void rxrpc_kernel_put_call(struct socket *sock, struct rxrpc_call *call);
void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
struct sockaddr_rxrpc *);
bool rxrpc_kernel_get_srtt(struct socket *, struct rxrpc_call *, u32 *);
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 480fa579787e..824c258143a3 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -11,6 +11,7 @@
void unix_inflight(struct user_struct *user, struct file *fp);
void unix_notinflight(struct user_struct *user, struct file *fp);
void unix_destruct_scm(struct sk_buff *skb);
+void io_uring_destruct_scm(struct sk_buff *skb);
void unix_gc(void);
void wait_for_unix_gc(void);
struct sock *unix_get_socket(struct file *filp);
@@ -73,10 +74,7 @@ struct unix_sock {
#endif
};
-static inline struct unix_sock *unix_sk(const struct sock *sk)
-{
- return (struct unix_sock *)sk;
-}
+#define unix_sk(ptr) container_of_const(ptr, struct unix_sock, sk)
#define peer_wait peer_wq.wait
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 568a87c5e0d0..b01cf9ac2437 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -75,6 +75,7 @@ struct vsock_sock {
void *trans;
};
+s64 vsock_connectible_has_data(struct vsock_sock *vsk);
s64 vsock_stream_has_data(struct vsock_sock *vsk);
s64 vsock_stream_has_space(struct vsock_sock *vsk);
struct sock *vsock_create_connected(struct sock *parent);
@@ -173,6 +174,9 @@ struct vsock_transport {
/* Addressing. */
u32 (*get_local_cid)(void);
+
+ /* Read a single skb */
+ int (*read_skb)(struct vsock_sock *, skb_read_actor_t);
};
/**** CORE ****/
@@ -197,7 +201,6 @@ static inline bool __vsock_in_connected_table(struct vsock_sock *vsk)
return !list_empty(&vsk->connected_table);
}
-void vsock_release_pending(struct sock *pending);
void vsock_add_pending(struct sock *listener, struct sock *pending);
void vsock_remove_pending(struct sock *listener, struct sock *pending);
void vsock_enqueue_accept(struct sock *listener, struct sock *connected);
@@ -221,9 +224,21 @@ struct vsock_tap {
struct list_head list;
};
-int vsock_init_tap(void);
int vsock_add_tap(struct vsock_tap *vt);
int vsock_remove_tap(struct vsock_tap *vt);
void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque);
+int vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags);
+int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t len, int flags);
+
+#ifdef CONFIG_BPF_SYSCALL
+extern struct proto vsock_proto;
+int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
+void __init vsock_bpf_build_proto(void);
+#else
+static inline void __init vsock_bpf_build_proto(void)
+{}
+#endif
#endif /* __AF_VSOCK_H__ */
diff --git a/include/net/arp.h b/include/net/arp.h
index d7ef4ec71dfe..e8747e0713c7 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -38,11 +38,11 @@ static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32
{
struct neighbour *n;
- rcu_read_lock_bh();
+ rcu_read_lock();
n = __ipv4_neigh_lookup_noref(dev, key);
if (n && !refcount_inc_not_zero(&n->refcnt))
n = NULL;
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return n;
}
@@ -51,10 +51,10 @@ static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key)
{
struct neighbour *n;
- rcu_read_lock_bh();
+ rcu_read_lock();
n = __ipv4_neigh_lookup_noref(dev, key);
neigh_confirm(n);
- rcu_read_unlock_bh();
+ rcu_read_unlock();
}
void arp_init(void);
diff --git a/include/net/ax25.h b/include/net/ax25.h
index f8cf3629a419..0d939e5aee4e 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -260,10 +260,7 @@ struct ax25_sock {
struct ax25_cb *cb;
};
-static inline struct ax25_sock *ax25_sk(const struct sock *sk)
-{
- return (struct ax25_sock *) sk;
-}
+#define ax25_sk(ptr) container_of_const(ptr, struct ax25_sock, sk)
static inline struct ax25_cb *sk_to_ax25(const struct sock *sk)
{
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index bcc5a4cd2c17..aa90adc3b2a4 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -1,6 +1,7 @@
/*
BlueZ - Bluetooth protocol stack for Linux
Copyright (C) 2000-2001 Qualcomm Incorporated
+ Copyright 2023 NXP
Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
@@ -171,23 +172,39 @@ struct bt_iso_io_qos {
__u8 rtn;
};
-struct bt_iso_qos {
- union {
- __u8 cig;
- __u8 big;
- };
- union {
- __u8 cis;
- __u8 bis;
- };
- union {
- __u8 sca;
- __u8 sync_interval;
- };
+struct bt_iso_ucast_qos {
+ __u8 cig;
+ __u8 cis;
+ __u8 sca;
+ __u8 packing;
+ __u8 framing;
+ struct bt_iso_io_qos in;
+ struct bt_iso_io_qos out;
+};
+
+struct bt_iso_bcast_qos {
+ __u8 big;
+ __u8 bis;
+ __u8 sync_factor;
__u8 packing;
__u8 framing;
struct bt_iso_io_qos in;
struct bt_iso_io_qos out;
+ __u8 encryption;
+ __u8 bcode[16];
+ __u8 options;
+ __u16 skip;
+ __u16 sync_timeout;
+ __u8 sync_cte_type;
+ __u8 mse;
+ __u16 timeout;
+};
+
+struct bt_iso_qos {
+ union {
+ struct bt_iso_ucast_qos ucast;
+ struct bt_iso_bcast_qos bcast;
+ };
};
#define BT_ISO_PHY_1M 0x01
@@ -369,6 +386,7 @@ struct bt_sock {
enum {
BT_SK_DEFER_SETUP,
BT_SK_SUSPEND,
+ BT_SK_PKT_STATUS
};
struct bt_sock_list {
@@ -383,6 +401,8 @@ int bt_sock_register(int proto, const struct net_proto_family *ops);
void bt_sock_unregister(int proto);
void bt_sock_link(struct bt_sock_list *l, struct sock *s);
void bt_sock_unlink(struct bt_sock_list *l, struct sock *s);
+struct sock *bt_sock_alloc(struct net *net, struct socket *sock,
+ struct proto *prot, int proto, gfp_t prio, int kern);
int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags);
int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg,
@@ -413,10 +433,6 @@ struct l2cap_ctrl {
struct l2cap_chan *chan;
};
-struct sco_ctrl {
- u8 pkt_status;
-};
-
struct hci_dev;
typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode);
@@ -447,16 +463,18 @@ struct bt_skb_cb {
u8 force_active;
u16 expect;
u8 incoming:1;
+ u8 pkt_status:2;
union {
struct l2cap_ctrl l2cap;
- struct sco_ctrl sco;
struct hci_ctrl hci;
struct mgmt_ctrl mgmt;
+ struct scm_creds creds;
};
};
#define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb))
#define hci_skb_pkt_type(skb) bt_cb((skb))->pkt_type
+#define hci_skb_pkt_status(skb) bt_cb((skb))->pkt_status
#define hci_skb_expect(skb) bt_cb((skb))->expect
#define hci_skb_opcode(skb) bt_cb((skb))->hci.opcode
#define hci_skb_event(skb) bt_cb((skb))->hci.req_event
diff --git a/include/net/bluetooth/coredump.h b/include/net/bluetooth/coredump.h
new file mode 100644
index 000000000000..72f51b587a04
--- /dev/null
+++ b/include/net/bluetooth/coredump.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 Google Corporation
+ */
+
+#ifndef __COREDUMP_H
+#define __COREDUMP_H
+
+#define DEVCOREDUMP_TIMEOUT msecs_to_jiffies(10000) /* 10 sec */
+
+typedef void (*coredump_t)(struct hci_dev *hdev);
+typedef void (*dmp_hdr_t)(struct hci_dev *hdev, struct sk_buff *skb);
+typedef void (*notify_change_t)(struct hci_dev *hdev, int state);
+
+/* struct hci_devcoredump - Devcoredump state
+ *
+ * @supported: Indicates if FW dump collection is supported by driver
+ * @state: Current state of dump collection
+ * @timeout: Indicates a timeout for collecting the devcoredump
+ *
+ * @alloc_size: Total size of the dump
+ * @head: Start of the dump
+ * @tail: Pointer to current end of dump
+ * @end: head + alloc_size for easy comparisons
+ *
+ * @dump_q: Dump queue for state machine to process
+ * @dump_rx: Devcoredump state machine work
+ * @dump_timeout: Devcoredump timeout work
+ *
+ * @coredump: Called from the driver's .coredump() function.
+ * @dmp_hdr: Create a dump header to identify controller/fw/driver info
+ * @notify_change: Notify driver when devcoredump state has changed
+ */
+struct hci_devcoredump {
+ bool supported;
+
+ enum devcoredump_state {
+ HCI_DEVCOREDUMP_IDLE,
+ HCI_DEVCOREDUMP_ACTIVE,
+ HCI_DEVCOREDUMP_DONE,
+ HCI_DEVCOREDUMP_ABORT,
+ HCI_DEVCOREDUMP_TIMEOUT,
+ } state;
+
+ unsigned long timeout;
+
+ size_t alloc_size;
+ char *head;
+ char *tail;
+ char *end;
+
+ struct sk_buff_head dump_q;
+ struct work_struct dump_rx;
+ struct delayed_work dump_timeout;
+
+ coredump_t coredump;
+ dmp_hdr_t dmp_hdr;
+ notify_change_t notify_change;
+};
+
+#ifdef CONFIG_DEV_COREDUMP
+
+void hci_devcd_reset(struct hci_dev *hdev);
+void hci_devcd_rx(struct work_struct *work);
+void hci_devcd_timeout(struct work_struct *work);
+
+int hci_devcd_register(struct hci_dev *hdev, coredump_t coredump,
+ dmp_hdr_t dmp_hdr, notify_change_t notify_change);
+int hci_devcd_init(struct hci_dev *hdev, u32 dump_size);
+int hci_devcd_append(struct hci_dev *hdev, struct sk_buff *skb);
+int hci_devcd_append_pattern(struct hci_dev *hdev, u8 pattern, u32 len);
+int hci_devcd_complete(struct hci_dev *hdev);
+int hci_devcd_abort(struct hci_dev *hdev);
+
+#else
+
+static inline void hci_devcd_reset(struct hci_dev *hdev) {}
+static inline void hci_devcd_rx(struct work_struct *work) {}
+static inline void hci_devcd_timeout(struct work_struct *work) {}
+
+static inline int hci_devcd_register(struct hci_dev *hdev, coredump_t coredump,
+ dmp_hdr_t dmp_hdr,
+ notify_change_t notify_change)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int hci_devcd_init(struct hci_dev *hdev, u32 dump_size)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int hci_devcd_append(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int hci_devcd_append_pattern(struct hci_dev *hdev,
+ u8 pattern, u32 len)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int hci_devcd_complete(struct hci_dev *hdev)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int hci_devcd_abort(struct hci_dev *hdev)
+{
+ return -EOPNOTSUPP;
+}
+
+#endif /* CONFIG_DEV_COREDUMP */
+
+#endif /* __COREDUMP_H */
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 400f8a7d0c3f..87d92accc26e 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -294,6 +294,41 @@ enum {
* during the hdev->setup vendor callback.
*/
HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG,
+
+ /* When this quirk is set, max_page for local extended features
+ * is set to 1, even if controller reports higher number. Some
+ * controllers (e.g. RTL8723CS) report more pages, but they
+ * don't actually support features declared there.
+ */
+ HCI_QUIRK_BROKEN_LOCAL_EXT_FEATURES_PAGE_2,
+
+ /*
+ * When this quirk is set, the HCI_OP_LE_SET_RPA_TIMEOUT command is
+ * skipped during initialization. This is required for the Actions
+ * Semiconductor ATS2851 based controllers, which erroneously claims
+ * to support it.
+ */
+ HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT,
+
+ /* When this quirk is set, MSFT extension monitor tracking by
+ * address filter is supported. Since tracking quantity of each
+ * pattern is limited, this feature supports tracking multiple
+ * devices concurrently if controller supports multiple
+ * address filters.
+ *
+ * This quirk must be set before hci_register_dev is called.
+ */
+ HCI_QUIRK_USE_MSFT_EXT_ADDRESS_FILTER,
+
+ /*
+ * When this quirk is set, LE Coded PHY shall not be used. This is
+ * required for some Intel controllers which erroneously claim to
+ * support it but it causes problems with extended scanning.
+ *
+ * This quirk can be set before hci_register_dev is called or
+ * during the hdev->setup vendor callback.
+ */
+ HCI_QUIRK_BROKEN_LE_CODED,
};
/* HCI device flags */
@@ -335,6 +370,7 @@ enum {
enum {
HCI_SETUP,
HCI_CONFIG,
+ HCI_DEBUGFS_CREATED,
HCI_AUTO_OFF,
HCI_RFKILLED,
HCI_MGMT,
@@ -561,6 +597,7 @@ enum {
#define HCI_LE_CIS_CENTRAL 0x10
#define HCI_LE_CIS_PERIPHERAL 0x20
#define HCI_LE_ISO_BROADCASTER 0x40
+#define HCI_LE_ISO_SYNC_RECEIVER 0x80
/* Connection modes */
#define HCI_CM_ACTIVE 0x0000
@@ -2744,6 +2781,17 @@ struct hci_ev_le_enh_conn_complete {
__u8 clk_accurancy;
} __packed;
+#define HCI_EV_LE_PER_ADV_REPORT 0x0f
+struct hci_ev_le_per_adv_report {
+ __le16 sync_handle;
+ __u8 tx_power;
+ __u8 rssi;
+ __u8 cte_type;
+ __u8 data_status;
+ __u8 length;
+ __u8 data[];
+} __packed;
+
#define HCI_EV_LE_EXT_ADV_SET_TERM 0x12
struct hci_evt_le_ext_adv_set_term {
__u8 status;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index d5311ceb21c6..e6359f7346f1 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1,6 +1,7 @@
/*
BlueZ - Bluetooth protocol stack for Linux
Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
+ Copyright 2023 NXP
Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
@@ -32,6 +33,7 @@
#include <net/bluetooth/hci.h>
#include <net/bluetooth/hci_sync.h>
#include <net/bluetooth/hci_sock.h>
+#include <net/bluetooth/coredump.h>
/* HCI priority */
#define HCI_PRIO_MAX 7
@@ -81,7 +83,7 @@ struct discovery_state {
u8 last_adv_addr_type;
s8 last_adv_rssi;
u32 last_adv_flags;
- u8 last_adv_data[HCI_MAX_AD_LENGTH];
+ u8 last_adv_data[HCI_MAX_EXT_AD_LENGTH];
u8 last_adv_data_len;
bool report_invalid_rssi;
bool result_filtering;
@@ -288,7 +290,7 @@ struct adv_pattern {
__u8 ad_type;
__u8 offset;
__u8 length;
- __u8 value[HCI_MAX_AD_LENGTH];
+ __u8 value[HCI_MAX_EXT_AD_LENGTH];
};
struct adv_rssi_thresholds {
@@ -319,8 +321,8 @@ struct adv_monitor {
#define HCI_MAX_SHORT_NAME_LENGTH 10
-#define HCI_CONN_HANDLE_UNSET 0xffff
#define HCI_CONN_HANDLE_MAX 0x0eff
+#define HCI_CONN_HANDLE_UNSET(_handle) (_handle > HCI_CONN_HANDLE_MAX)
/* Min encryption key size to match with SMP */
#define HCI_MIN_ENC_KEY_SIZE 7
@@ -513,6 +515,7 @@ struct hci_dev {
struct work_struct cmd_sync_work;
struct list_head cmd_sync_work_list;
struct mutex cmd_sync_work_lock;
+ struct mutex unregister_lock;
struct work_struct cmd_sync_cancel_work;
struct work_struct reenable_adv_work;
@@ -590,6 +593,8 @@ struct hci_dev {
const char *fw_info;
struct dentry *debugfs;
+ struct hci_devcoredump dump;
+
struct device dev;
struct rfkill *rfkill;
@@ -721,7 +726,7 @@ struct hci_conn {
__u16 le_conn_interval;
__u16 le_conn_latency;
__u16 le_supv_timeout;
- __u8 le_adv_data[HCI_MAX_AD_LENGTH];
+ __u8 le_adv_data[HCI_MAX_EXT_AD_LENGTH];
__u8 le_adv_data_len;
__u8 le_per_adv_data[HCI_MAX_PER_AD_LENGTH];
__u8 le_per_adv_data_len;
@@ -734,6 +739,7 @@ struct hci_conn {
unsigned long flags;
enum conn_reasons conn_reason;
+ __u8 abort_reason;
__u32 clock;
__u16 clock_accuracy;
@@ -753,7 +759,6 @@ struct hci_conn {
struct delayed_work auto_accept_work;
struct delayed_work idle_work;
struct delayed_work le_conn_timeout;
- struct work_struct le_scan_cleanup;
struct device dev;
struct dentry *debugfs;
@@ -764,7 +769,10 @@ struct hci_conn {
void *iso_data;
struct amp_mgr *amp_mgr;
- struct hci_conn *link;
+ struct list_head link_list;
+ struct hci_conn *parent;
+ struct hci_link *link;
+
struct bt_codec codec;
void (*connect_cfm_cb) (struct hci_conn *conn, u8 status);
@@ -774,6 +782,11 @@ struct hci_conn {
void (*cleanup)(struct hci_conn *conn);
};
+struct hci_link {
+ struct list_head list;
+ struct hci_conn *conn;
+};
+
struct hci_chan {
struct list_head list;
__u16 handle;
@@ -807,6 +820,7 @@ struct hci_conn_params {
struct hci_conn *conn;
bool explicit_connect;
+ /* Accessed without hdev->lock: */
hci_conn_flags_t flags;
u8 privacy_mode;
};
@@ -960,6 +974,12 @@ enum {
HCI_CONN_SCANNING,
HCI_CONN_AUTH_FAILURE,
HCI_CONN_PER_ADV,
+ HCI_CONN_BIG_CREATED,
+ HCI_CONN_CREATE_CIS,
+ HCI_CONN_BIG_SYNC,
+ HCI_CONN_BIG_SYNC_FAILED,
+ HCI_CONN_PA_SYNC,
+ HCI_CONN_PA_SYNC_FAILED,
};
static inline bool hci_conn_ssp_enabled(struct hci_conn *conn)
@@ -979,7 +999,7 @@ static inline bool hci_conn_sc_enabled(struct hci_conn *conn)
static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c)
{
struct hci_conn_hash *h = &hdev->conn_hash;
- list_add_rcu(&c->list, &h->list);
+ list_add_tail_rcu(&c->list, &h->list);
switch (c->type) {
case ACL_LINK:
h->acl_num++;
@@ -1079,8 +1099,7 @@ static inline __u8 hci_conn_lookup_type(struct hci_dev *hdev, __u16 handle)
}
static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev,
- bdaddr_t *ba,
- __u8 big, __u8 bis)
+ bdaddr_t *ba, __u8 bis)
{
struct hci_conn_hash *h = &hdev->conn_hash;
struct hci_conn *c;
@@ -1091,7 +1110,33 @@ static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev,
if (bacmp(&c->dst, ba) || c->type != ISO_LINK)
continue;
- if (c->iso_qos.big == big && c->iso_qos.bis == bis) {
+ if (c->iso_qos.bcast.bis == bis) {
+ rcu_read_unlock();
+ return c;
+ }
+ }
+ rcu_read_unlock();
+
+ return NULL;
+}
+
+static inline struct hci_conn *
+hci_conn_hash_lookup_per_adv_bis(struct hci_dev *hdev,
+ bdaddr_t *ba,
+ __u8 big, __u8 bis)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (bacmp(&c->dst, ba) || c->type != ISO_LINK ||
+ !test_bit(HCI_CONN_PER_ADV, &c->flags))
+ continue;
+
+ if (c->iso_qos.bcast.big == big &&
+ c->iso_qos.bcast.bis == bis) {
rcu_read_unlock();
return c;
}
@@ -1166,7 +1211,9 @@ static inline struct hci_conn *hci_conn_hash_lookup_le(struct hci_dev *hdev,
static inline struct hci_conn *hci_conn_hash_lookup_cis(struct hci_dev *hdev,
bdaddr_t *ba,
- __u8 ba_type)
+ __u8 ba_type,
+ __u8 cig,
+ __u8 id)
{
struct hci_conn_hash *h = &hdev->conn_hash;
struct hci_conn *c;
@@ -1174,10 +1221,19 @@ static inline struct hci_conn *hci_conn_hash_lookup_cis(struct hci_dev *hdev,
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK)
+ if (c->type != ISO_LINK || !bacmp(&c->dst, BDADDR_ANY))
continue;
- if (ba_type == c->dst_type && !bacmp(&c->dst, ba)) {
+ /* Match CIG ID if set */
+ if (cig != BT_ISO_QOS_CIG_UNSET && cig != c->iso_qos.ucast.cig)
+ continue;
+
+ /* Match CIS ID if set */
+ if (id != BT_ISO_QOS_CIS_UNSET && id != c->iso_qos.ucast.cis)
+ continue;
+
+ /* Match destination address if set */
+ if (!ba || (ba_type == c->dst_type && !bacmp(&c->dst, ba))) {
rcu_read_unlock();
return c;
}
@@ -1197,10 +1253,10 @@ static inline struct hci_conn *hci_conn_hash_lookup_cig(struct hci_dev *hdev,
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK)
+ if (c->type != ISO_LINK || !bacmp(&c->dst, BDADDR_ANY))
continue;
- if (handle == c->iso_qos.cig) {
+ if (handle == c->iso_qos.ucast.cig) {
rcu_read_unlock();
return c;
}
@@ -1223,7 +1279,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev,
if (bacmp(&c->dst, BDADDR_ANY) || c->type != ISO_LINK)
continue;
- if (handle == c->iso_qos.big) {
+ if (handle == c->iso_qos.bcast.big) {
rcu_read_unlock();
return c;
}
@@ -1234,6 +1290,52 @@ static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev,
return NULL;
}
+static inline struct hci_conn *hci_conn_hash_lookup_big_any_dst(struct hci_dev *hdev,
+ __u8 handle)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (c->type != ISO_LINK)
+ continue;
+
+ if (handle != BT_ISO_QOS_BIG_UNSET && handle == c->iso_qos.bcast.big) {
+ rcu_read_unlock();
+ return c;
+ }
+ }
+
+ rcu_read_unlock();
+
+ return NULL;
+}
+
+static inline struct hci_conn *
+hci_conn_hash_lookup_pa_sync(struct hci_dev *hdev, __u8 big)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (c->type != ISO_LINK ||
+ !test_bit(HCI_CONN_PA_SYNC, &c->flags))
+ continue;
+
+ if (c->iso_qos.bcast.big == big) {
+ rcu_read_unlock();
+ return c;
+ }
+ }
+ rcu_read_unlock();
+
+ return NULL;
+}
+
static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev,
__u8 type, __u16 state)
{
@@ -1295,15 +1397,37 @@ static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev)
return NULL;
}
+/* Returns true if an le connection is in the scanning state */
+static inline bool hci_is_le_conn_scanning(struct hci_dev *hdev)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (c->type == LE_LINK && c->state == BT_CONNECT &&
+ test_bit(HCI_CONN_SCANNING, &c->flags)) {
+ rcu_read_unlock();
+ return true;
+ }
+ }
+
+ rcu_read_unlock();
+
+ return false;
+}
+
int hci_disconnect(struct hci_conn *conn, __u8 reason);
bool hci_setup_sync(struct hci_conn *conn, __u16 handle);
void hci_sco_setup(struct hci_conn *conn, __u8 status);
bool hci_iso_setup_path(struct hci_conn *conn);
-int hci_le_create_cis(struct hci_conn *conn);
+int hci_le_create_cis_pending(struct hci_dev *hdev);
+int hci_conn_check_create_cis(struct hci_conn *conn);
struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
u8 role);
-int hci_conn_del(struct hci_conn *conn);
+void hci_conn_del(struct hci_conn *conn);
void hci_conn_hash_flush(struct hci_dev *hdev);
void hci_conn_check_pending(struct hci_dev *hdev);
@@ -1326,14 +1450,18 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
__u16 setting, struct bt_codec *codec);
struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, struct bt_iso_qos *qos);
+struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
+ struct bt_iso_qos *qos,
+ __u8 base_len, __u8 *base);
struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, struct bt_iso_qos *qos);
struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, struct bt_iso_qos *qos,
__u8 data_len, __u8 *data);
int hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type,
- __u8 sid);
-int hci_le_big_create_sync(struct hci_dev *hdev, struct bt_iso_qos *qos,
+ __u8 sid, struct bt_iso_qos *qos);
+int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
+ struct bt_iso_qos *qos,
__u16 sync_handle, __u8 num_bis, __u8 bis[]);
int hci_conn_check_link_mode(struct hci_conn *conn);
int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level);
@@ -1344,6 +1472,7 @@ int hci_conn_switch_role(struct hci_conn *conn, __u8 role);
void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active);
void hci_conn_failed(struct hci_conn *conn, u8 status);
+u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle);
/*
* hci_conn_get() and hci_conn_put() are used to control the life-time of an
@@ -1377,12 +1506,14 @@ static inline void hci_conn_put(struct hci_conn *conn)
put_device(&conn->dev);
}
-static inline void hci_conn_hold(struct hci_conn *conn)
+static inline struct hci_conn *hci_conn_hold(struct hci_conn *conn)
{
BT_DBG("hcon %p orig refcnt %d", conn, atomic_read(&conn->refcnt));
atomic_inc(&conn->refcnt);
cancel_delayed_work(&conn->disc_work);
+
+ return conn;
}
static inline void hci_conn_drop(struct hci_conn *conn)
@@ -1497,6 +1628,15 @@ static inline void hci_set_aosp_capable(struct hci_dev *hdev)
#endif
}
+static inline void hci_devcd_setup(struct hci_dev *hdev)
+{
+#ifdef CONFIG_DEV_COREDUMP
+ INIT_WORK(&hdev->dump.dump_rx, hci_devcd_rx);
+ INIT_DELAYED_WORK(&hdev->dump.dump_timeout, hci_devcd_timeout);
+ skb_queue_head_init(&hdev->dump.dump_q);
+#endif
+}
+
int hci_dev_open(__u16 dev);
int hci_dev_close(__u16 dev);
int hci_dev_do_close(struct hci_dev *hdev);
@@ -1536,7 +1676,11 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
bdaddr_t *addr, u8 addr_type);
void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type);
void hci_conn_params_clear_disabled(struct hci_dev *hdev);
+void hci_conn_params_free(struct hci_conn_params *param);
+void hci_pend_le_list_del_init(struct hci_conn_params *param);
+void hci_pend_le_list_add(struct hci_conn_params *param,
+ struct list_head *list);
struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
bdaddr_t *addr,
u8 addr_type);
@@ -1668,9 +1812,15 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
#define scan_1m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_1M) || \
((dev)->le_rx_def_phys & HCI_LE_SET_PHY_1M))
+#define le_2m_capable(dev) (((dev)->le_features[1] & HCI_LE_PHY_2M))
+
#define scan_2m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_2M) || \
((dev)->le_rx_def_phys & HCI_LE_SET_PHY_2M))
+#define le_coded_capable(dev) (((dev)->le_features[1] & HCI_LE_PHY_CODED) && \
+ !test_bit(HCI_QUIRK_BROKEN_LE_CODED, \
+ &(dev)->quirks))
+
#define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \
((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED))
@@ -1701,6 +1851,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
/* Extended advertising support */
#define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV))
+/* Maximum advertising length */
+#define max_adv_len(dev) \
+ (ext_adv_capable(dev) ? HCI_MAX_EXT_AD_LENGTH : HCI_MAX_AD_LENGTH)
+
/* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 1789:
*
* C24: Mandatory if the LE Controller supports Connection State and either
@@ -1721,6 +1875,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
#define cis_peripheral_capable(dev) \
((dev)->le_features[3] & HCI_LE_CIS_PERIPHERAL)
#define bis_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_BROADCASTER)
+#define sync_recv_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_SYNC_RECEIVER)
#define mws_transport_config_capable(dev) (((dev)->commands[30] & 0x08) && \
(!test_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &(dev)->quirks)))
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 17f5a4c32f36..57eeb07aeb25 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -5,6 +5,9 @@
* Copyright (C) 2021 Intel Corporation
*/
+#define UINT_PTR(_handle) ((void *)((uintptr_t)_handle))
+#define PTR_UINT(_ptr) ((uintptr_t)((void *)_ptr))
+
typedef int (*hci_cmd_sync_work_func_t)(struct hci_dev *hdev, void *data);
typedef void (*hci_cmd_sync_work_destroy_t)(struct hci_dev *hdev, void *data,
int err);
@@ -41,6 +44,8 @@ void hci_cmd_sync_clear(struct hci_dev *hdev);
void hci_cmd_sync_cancel(struct hci_dev *hdev, int err);
void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err);
+int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
+ void *data, hci_cmd_sync_work_destroy_t destroy);
int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
void *data, hci_cmd_sync_work_destroy_t destroy);
@@ -122,6 +127,8 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason);
int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn);
+int hci_le_create_cis_sync(struct hci_dev *hdev);
+
int hci_le_remove_cig_sync(struct hci_dev *hdev, u8 handle);
int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason);
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 2f766e3437ce..cf393e72d6ed 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -694,7 +694,7 @@ struct l2cap_conn {
struct sk_buff_head pending_rx;
struct work_struct pending_rx_work;
- struct work_struct id_addr_update_work;
+ struct delayed_work id_addr_timer;
__u8 disc_reason;
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index e18a927669c0..d382679efd2b 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -91,26 +91,28 @@ struct mgmt_rp_read_index_list {
#define MGMT_MAX_NAME_LENGTH (HCI_MAX_NAME_LENGTH + 1)
#define MGMT_MAX_SHORT_NAME_LENGTH (HCI_MAX_SHORT_NAME_LENGTH + 1)
-#define MGMT_SETTING_POWERED 0x00000001
-#define MGMT_SETTING_CONNECTABLE 0x00000002
-#define MGMT_SETTING_FAST_CONNECTABLE 0x00000004
-#define MGMT_SETTING_DISCOVERABLE 0x00000008
-#define MGMT_SETTING_BONDABLE 0x00000010
-#define MGMT_SETTING_LINK_SECURITY 0x00000020
-#define MGMT_SETTING_SSP 0x00000040
-#define MGMT_SETTING_BREDR 0x00000080
-#define MGMT_SETTING_HS 0x00000100
-#define MGMT_SETTING_LE 0x00000200
-#define MGMT_SETTING_ADVERTISING 0x00000400
-#define MGMT_SETTING_SECURE_CONN 0x00000800
-#define MGMT_SETTING_DEBUG_KEYS 0x00001000
-#define MGMT_SETTING_PRIVACY 0x00002000
-#define MGMT_SETTING_CONFIGURATION 0x00004000
-#define MGMT_SETTING_STATIC_ADDRESS 0x00008000
-#define MGMT_SETTING_PHY_CONFIGURATION 0x00010000
-#define MGMT_SETTING_WIDEBAND_SPEECH 0x00020000
-#define MGMT_SETTING_CIS_CENTRAL 0x00040000
-#define MGMT_SETTING_CIS_PERIPHERAL 0x00080000
+#define MGMT_SETTING_POWERED BIT(0)
+#define MGMT_SETTING_CONNECTABLE BIT(1)
+#define MGMT_SETTING_FAST_CONNECTABLE BIT(2)
+#define MGMT_SETTING_DISCOVERABLE BIT(3)
+#define MGMT_SETTING_BONDABLE BIT(4)
+#define MGMT_SETTING_LINK_SECURITY BIT(5)
+#define MGMT_SETTING_SSP BIT(6)
+#define MGMT_SETTING_BREDR BIT(7)
+#define MGMT_SETTING_HS BIT(8)
+#define MGMT_SETTING_LE BIT(9)
+#define MGMT_SETTING_ADVERTISING BIT(10)
+#define MGMT_SETTING_SECURE_CONN BIT(11)
+#define MGMT_SETTING_DEBUG_KEYS BIT(12)
+#define MGMT_SETTING_PRIVACY BIT(13)
+#define MGMT_SETTING_CONFIGURATION BIT(14)
+#define MGMT_SETTING_STATIC_ADDRESS BIT(15)
+#define MGMT_SETTING_PHY_CONFIGURATION BIT(16)
+#define MGMT_SETTING_WIDEBAND_SPEECH BIT(17)
+#define MGMT_SETTING_CIS_CENTRAL BIT(18)
+#define MGMT_SETTING_CIS_PERIPHERAL BIT(19)
+#define MGMT_SETTING_ISO_BROADCASTER BIT(20)
+#define MGMT_SETTING_ISO_SYNC_RECEIVER BIT(21)
#define MGMT_OP_READ_INFO 0x0004
#define MGMT_READ_INFO_SIZE 0
@@ -635,21 +637,21 @@ struct mgmt_rp_get_phy_configuration {
} __packed;
#define MGMT_GET_PHY_CONFIGURATION_SIZE 0
-#define MGMT_PHY_BR_1M_1SLOT 0x00000001
-#define MGMT_PHY_BR_1M_3SLOT 0x00000002
-#define MGMT_PHY_BR_1M_5SLOT 0x00000004
-#define MGMT_PHY_EDR_2M_1SLOT 0x00000008
-#define MGMT_PHY_EDR_2M_3SLOT 0x00000010
-#define MGMT_PHY_EDR_2M_5SLOT 0x00000020
-#define MGMT_PHY_EDR_3M_1SLOT 0x00000040
-#define MGMT_PHY_EDR_3M_3SLOT 0x00000080
-#define MGMT_PHY_EDR_3M_5SLOT 0x00000100
-#define MGMT_PHY_LE_1M_TX 0x00000200
-#define MGMT_PHY_LE_1M_RX 0x00000400
-#define MGMT_PHY_LE_2M_TX 0x00000800
-#define MGMT_PHY_LE_2M_RX 0x00001000
-#define MGMT_PHY_LE_CODED_TX 0x00002000
-#define MGMT_PHY_LE_CODED_RX 0x00004000
+#define MGMT_PHY_BR_1M_1SLOT BIT(0)
+#define MGMT_PHY_BR_1M_3SLOT BIT(1)
+#define MGMT_PHY_BR_1M_5SLOT BIT(2)
+#define MGMT_PHY_EDR_2M_1SLOT BIT(3)
+#define MGMT_PHY_EDR_2M_3SLOT BIT(4)
+#define MGMT_PHY_EDR_2M_5SLOT BIT(5)
+#define MGMT_PHY_EDR_3M_1SLOT BIT(6)
+#define MGMT_PHY_EDR_3M_3SLOT BIT(7)
+#define MGMT_PHY_EDR_3M_5SLOT BIT(8)
+#define MGMT_PHY_LE_1M_TX BIT(9)
+#define MGMT_PHY_LE_1M_RX BIT(10)
+#define MGMT_PHY_LE_2M_TX BIT(11)
+#define MGMT_PHY_LE_2M_RX BIT(12)
+#define MGMT_PHY_LE_CODED_TX BIT(13)
+#define MGMT_PHY_LE_CODED_RX BIT(14)
#define MGMT_PHY_BREDR_MASK (MGMT_PHY_BR_1M_1SLOT | MGMT_PHY_BR_1M_3SLOT | \
MGMT_PHY_BR_1M_5SLOT | MGMT_PHY_EDR_2M_1SLOT | \
@@ -974,11 +976,12 @@ struct mgmt_ev_auth_failed {
__u8 status;
} __packed;
-#define MGMT_DEV_FOUND_CONFIRM_NAME 0x01
-#define MGMT_DEV_FOUND_LEGACY_PAIRING 0x02
-#define MGMT_DEV_FOUND_NOT_CONNECTABLE 0x04
-#define MGMT_DEV_FOUND_INITIATED_CONN 0x08
-#define MGMT_DEV_FOUND_NAME_REQUEST_FAILED 0x10
+#define MGMT_DEV_FOUND_CONFIRM_NAME BIT(0)
+#define MGMT_DEV_FOUND_LEGACY_PAIRING BIT(1)
+#define MGMT_DEV_FOUND_NOT_CONNECTABLE BIT(2)
+#define MGMT_DEV_FOUND_INITIATED_CONN BIT(3)
+#define MGMT_DEV_FOUND_NAME_REQUEST_FAILED BIT(4)
+#define MGMT_DEV_FOUND_SCAN_RSP BIT(5)
#define MGMT_EV_DEVICE_FOUND 0x0012
struct mgmt_ev_device_found {
diff --git a/include/net/bluetooth/sco.h b/include/net/bluetooth/sco.h
index 1aa2e14b6c94..f40ddb4264fc 100644
--- a/include/net/bluetooth/sco.h
+++ b/include/net/bluetooth/sco.h
@@ -46,6 +46,4 @@ struct sco_conninfo {
__u8 dev_class[3];
};
-#define SCO_CMSG_PKT_STATUS 0x01
-
#endif /* __SCO_H */
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index a016f275cb01..c5e57c6bd873 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -301,7 +301,6 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond,
int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
struct slave *slave);
int bond_3ad_set_carrier(struct bonding *bond);
-void bond_3ad_update_lacp_active(struct bonding *bond);
void bond_3ad_update_lacp_rate(struct bonding *bond);
void bond_3ad_update_ad_actor_settings(struct bonding *bond);
int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats);
diff --git a/include/net/bonding.h b/include/net/bonding.h
index c3843239517d..5b8b1b644a2d 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-1.0+ */
/*
* Bond several ethernet interfaces into a Cisco, running 'Etherchannel'.
*
@@ -7,9 +8,6 @@
* BUT, I'm the one who modified it for ethernet, so:
* (c) Copyright 1999, Thomas Davis, tadavis@lbl.gov
*
- * This software may be used and distributed according to the terms
- * of the GNU Public License, incorporated herein by reference.
- *
*/
#ifndef _NET_BONDING_H
@@ -221,6 +219,7 @@ struct bonding {
struct bond_up_slave __rcu *usable_slaves;
struct bond_up_slave __rcu *all_slaves;
bool force_primary;
+ bool notifier_ctx;
s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
int (*recv_probe)(const struct sk_buff *, struct bonding *,
struct slave *);
@@ -233,7 +232,7 @@ struct bonding {
*/
spinlock_t mode_lock;
spinlock_t stats_lock;
- u8 send_peer_notif;
+ u32 send_peer_notif;
u8 igmp_retrans;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc_entry;
@@ -278,7 +277,7 @@ struct bond_vlan_tag {
unsigned short vlan_id;
};
-/**
+/*
* Returns NULL if the net_device does not belong to any of the bond's slaves
*
* Caller must hold bond lock for read
@@ -659,6 +658,7 @@ void bond_destroy_sysfs(struct bond_net *net);
void bond_prepare_sysfs_group(struct bonding *bond);
int bond_sysfs_slave_add(struct slave *slave);
void bond_sysfs_slave_del(struct slave *slave);
+void bond_xdp_set_features(struct net_device *bond_dev);
int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
struct netlink_ext_ack *extack);
int bond_release(struct net_device *bond_dev, struct net_device *slave_dev);
@@ -722,23 +722,14 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond,
}
/* Caller must hold rcu_read_lock() for read */
-static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac)
+static inline bool bond_slave_has_mac_rcu(struct bonding *bond, const u8 *mac)
{
struct list_head *iter;
struct slave *tmp;
- struct netdev_hw_addr *ha;
bond_for_each_slave_rcu(bond, tmp, iter)
if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))
return true;
-
- if (netdev_uc_empty(bond->dev))
- return false;
-
- netdev_for_each_uc_addr(ha, bond->dev)
- if (ether_addr_equal_64bits(mac, ha->addr))
- return true;
-
return false;
}
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index f90f0021f5f2..4dabeb6c76d3 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -16,6 +16,7 @@
#include <linux/sched/clock.h>
#include <linux/sched/signal.h>
#include <net/ip.h>
+#include <net/xdp.h>
/* 0 - Reserved to indicate value not set
* 1..NR_CPUS - Reserved for sender_cpu
diff --git a/include/net/caif/cfsrvl.h b/include/net/caif/cfsrvl.h
index bd5440977f7f..5ee7b322e18b 100644
--- a/include/net/caif/cfsrvl.h
+++ b/include/net/caif/cfsrvl.h
@@ -33,9 +33,6 @@ struct cflayer *cfrfml_create(u8 linkid, struct dev_info *dev_info,
int mtu_size);
struct cflayer *cfdbgl_create(u8 linkid, struct dev_info *dev_info);
-void cfsrvl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
- int phyid);
-
bool cfsrvl_phyid_match(struct cflayer *layer, int phyid);
void cfsrvl_init(struct cfsrvl *service,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f115b2550309..3a4b684f89bf 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -7,7 +7,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2021, 2023 Intel Corporation
*/
#include <linux/ethtool.h>
@@ -263,7 +263,7 @@ enum ieee80211_privacy {
* are only for driver use when pointers to this structure are
* passed around.
*
- * @flags: rate-specific flags
+ * @flags: rate-specific flags from &enum ieee80211_rate_flags
* @bitrate: bitrate in units of 100 Kbps
* @hw_value: driver/hardware value for this rate
* @hw_value_short: driver/hardware value for this rate when
@@ -562,6 +562,9 @@ ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband,
if (WARN_ON(iftype >= NL80211_IFTYPE_MAX))
return NULL;
+ if (iftype == NL80211_IFTYPE_AP_VLAN)
+ iftype = NL80211_IFTYPE_AP;
+
for (i = 0; i < sband->n_iftype_data; i++) {
const struct ieee80211_sband_iftype_data *data =
&sband->iftype_data[i];
@@ -808,7 +811,7 @@ struct cfg80211_tid_cfg {
struct cfg80211_tid_config {
const u8 *peer;
u32 n_tid_conf;
- struct cfg80211_tid_cfg tid_conf[];
+ struct cfg80211_tid_cfg tid_conf[] __counted_by(n_tid_conf);
};
/**
@@ -828,6 +831,18 @@ struct cfg80211_fils_aad {
};
/**
+ * struct cfg80211_set_hw_timestamp - enable/disable HW timestamping
+ * @macaddr: peer MAC address. NULL to enable/disable HW timestamping for all
+ * addresses.
+ * @enable: if set, enable HW timestamping for the specified MAC address.
+ * Otherwise disable HW timestamping for the specified MAC address.
+ */
+struct cfg80211_set_hw_timestamp {
+ const u8 *macaddr;
+ bool enable;
+};
+
+/**
* cfg80211_get_chandef_type - return old channel type from chandef
* @chandef: the channel definition
*
@@ -939,6 +954,15 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
enum nl80211_iftype iftype);
/**
+ * nl80211_send_chandef - sends the channel definition.
+ * @msg: the msg to send channel definition
+ * @chandef: the channel definition to check
+ *
+ * Returns: 0 if sent the channel definition to msg, < 0 on error
+ **/
+int nl80211_send_chandef(struct sk_buff *msg, const struct cfg80211_chan_def *chandef);
+
+/**
* ieee80211_chanwidth_rate_flags - return rate flags for channel width
* @width: the channel width of the channel
*
@@ -1163,7 +1187,24 @@ struct cfg80211_mbssid_elems {
struct {
const u8 *data;
size_t len;
- } elem[];
+ } elem[] __counted_by(cnt);
+};
+
+/**
+ * struct cfg80211_rnr_elems - Reduced neighbor report (RNR) elements
+ *
+ * @cnt: Number of elements in array %elems.
+ *
+ * @elem: Array of RNR element(s) to be added into Beacon frames.
+ * @elem.data: Data for RNR elements.
+ * @elem.len: Length of data.
+ */
+struct cfg80211_rnr_elems {
+ u8 cnt;
+ struct {
+ const u8 *data;
+ size_t len;
+ } elem[] __counted_by(cnt);
};
/**
@@ -1186,6 +1227,7 @@ struct cfg80211_mbssid_elems {
* @probe_resp_len: length of probe response template (@probe_resp)
* @probe_resp: probe response template (AP mode only)
* @mbssid_ies: multiple BSSID elements
+ * @rnr_ies: reduced neighbor report elements
* @ftm_responder: enable FTM responder functionality; -1 for no change
* (which also implies no change in LCI/civic location data)
* @lci: Measurement Report element content, starting with Measurement Token
@@ -1209,6 +1251,7 @@ struct cfg80211_beacon_data {
const u8 *lci;
const u8 *civicloc;
struct cfg80211_mbssid_elems *mbssid_ies;
+ struct cfg80211_rnr_elems *rnr_ies;
s8 ftm_responder;
size_t head_len, tail_len;
@@ -1239,7 +1282,7 @@ struct cfg80211_acl_data {
int n_acl_entries;
/* Keep it last */
- struct mac_address mac_addrs[];
+ struct mac_address mac_addrs[] __counted_by(n_acl_entries);
};
/**
@@ -1310,7 +1353,7 @@ struct cfg80211_unsol_bcast_probe_resp {
* @twt_responder: Enable Target Wait Time
* @he_required: stations must support HE
* @sae_h2e_required: stations must support direct H2E technique in SAE
- * @flags: flags, as defined in enum cfg80211_ap_settings_flags
+ * @flags: flags, as defined in &enum nl80211_ap_settings_flags
* @he_obss_pd: OBSS Packet Detection settings
* @he_oper: HE operation IE (or %NULL if HE isn't enabled)
* @fils_discovery: FILS discovery transmission parameters
@@ -1439,7 +1482,6 @@ struct iface_combination_params {
* @STATION_PARAM_APPLY_UAPSD: apply new uAPSD parameters (uapsd_queues, max_sp)
* @STATION_PARAM_APPLY_CAPABILITY: apply new capability
* @STATION_PARAM_APPLY_PLINK_STATE: apply new plink state
- * @STATION_PARAM_APPLY_STA_TXPOWER: apply tx power for STA
*
* Not all station parameters have in-band "no change" signalling,
* for those that don't these flags will are used.
@@ -1662,6 +1704,7 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
* @RATE_INFO_FLAGS_EDMG: 60GHz MCS in EDMG mode
* @RATE_INFO_FLAGS_EXTENDED_SC_DMG: 60GHz extended SC MCS
* @RATE_INFO_FLAGS_EHT_MCS: EHT MCS information
+ * @RATE_INFO_FLAGS_S1G_MCS: MCS field filled with S1G MCS
*/
enum rate_info_flags {
RATE_INFO_FLAGS_MCS = BIT(0),
@@ -1672,6 +1715,7 @@ enum rate_info_flags {
RATE_INFO_FLAGS_EDMG = BIT(5),
RATE_INFO_FLAGS_EXTENDED_SC_DMG = BIT(6),
RATE_INFO_FLAGS_EHT_MCS = BIT(7),
+ RATE_INFO_FLAGS_S1G_MCS = BIT(8),
};
/**
@@ -1688,6 +1732,11 @@ enum rate_info_flags {
* @RATE_INFO_BW_HE_RU: bandwidth determined by HE RU allocation
* @RATE_INFO_BW_320: 320 MHz bandwidth
* @RATE_INFO_BW_EHT_RU: bandwidth determined by EHT RU allocation
+ * @RATE_INFO_BW_1: 1 MHz bandwidth
+ * @RATE_INFO_BW_2: 2 MHz bandwidth
+ * @RATE_INFO_BW_4: 4 MHz bandwidth
+ * @RATE_INFO_BW_8: 8 MHz bandwidth
+ * @RATE_INFO_BW_16: 16 MHz bandwidth
*/
enum rate_info_bw {
RATE_INFO_BW_20 = 0,
@@ -1699,6 +1748,11 @@ enum rate_info_bw {
RATE_INFO_BW_HE_RU,
RATE_INFO_BW_320,
RATE_INFO_BW_EHT_RU,
+ RATE_INFO_BW_1,
+ RATE_INFO_BW_2,
+ RATE_INFO_BW_4,
+ RATE_INFO_BW_8,
+ RATE_INFO_BW_16,
};
/**
@@ -1707,8 +1761,8 @@ enum rate_info_bw {
* Information about a receiving or transmitting bitrate
*
* @flags: bitflag of flags from &enum rate_info_flags
- * @mcs: mcs index if struct describes an HT/VHT/HE rate
* @legacy: bitrate in 100kbit/s for 802.11abg
+ * @mcs: mcs index if struct describes an HT/VHT/HE/EHT/S1G rate
* @nss: number of streams (VHT & HE only)
* @bw: bandwidth (from &enum rate_info_bw)
* @he_gi: HE guard interval (from &enum nl80211_he_gi)
@@ -1721,9 +1775,9 @@ enum rate_info_bw {
* only valid if bw is %RATE_INFO_BW_EHT_RU)
*/
struct rate_info {
- u8 flags;
- u8 mcs;
+ u16 flags;
u16 legacy;
+ u8 mcs;
u8 nss;
u8 bw;
u8 he_gi;
@@ -2101,7 +2155,7 @@ enum mpath_info_flags {
* @sn: target sequence number
* @metric: metric (cost) of this mesh path
* @exptime: expiration time for the mesh path from now, in msecs
- * @flags: mesh path flags
+ * @flags: mesh path flags from &enum mesh_path_flags
* @discovery_timeout: total mesh path discovery timeout, in msecs
* @discovery_retries: mesh path discovery retries
* @generation: generation number for nl80211 dumps.
@@ -2414,6 +2468,7 @@ struct cfg80211_scan_info {
* @short_ssid_valid: @short_ssid is valid and can be used
* @psc_no_listen: when set, and the channel is a PSC channel, no need to wait
* 20 TUs before starting to send probe requests.
+ * @psd_20: The AP's 20 MHz PSD value.
*/
struct cfg80211_scan_6ghz_params {
u32 short_ssid;
@@ -2422,6 +2477,7 @@ struct cfg80211_scan_6ghz_params {
bool unsolicited_probe;
bool short_ssid_valid;
bool psc_no_listen;
+ s8 psd_20;
};
/**
@@ -2439,7 +2495,7 @@ struct cfg80211_scan_6ghz_params {
* the actual dwell time may be shorter.
* @duration_mandatory: if set, the scan duration must be as specified by the
* %duration field.
- * @flags: bit field of flags controlling operation
+ * @flags: control flags from &enum nl80211_scan_flags
* @rates: bitmap of rates to advertise for each band
* @wiphy: the wiphy this was for
* @scan_start: time (in jiffies) when the scan started
@@ -2487,7 +2543,7 @@ struct cfg80211_scan_request {
struct cfg80211_scan_6ghz_params *scan_6ghz_params;
/* keep last */
- struct ieee80211_channel *channels[];
+ struct ieee80211_channel *channels[] __counted_by(n_channels);
};
static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask)
@@ -2559,7 +2615,7 @@ struct cfg80211_bss_select_adjust {
* @scan_width: channel width for scanning
* @ie: optional information element(s) to add into Probe Request or %NULL
* @ie_len: length of ie in octets
- * @flags: bit field of flags controlling operation
+ * @flags: control flags from &enum nl80211_scan_flags
* @match_sets: sets of parameters to be matched for a scan result
* entry to be considered valid and to be passed to the host
* (others are filtered out).
@@ -2668,6 +2724,7 @@ enum cfg80211_signal_type {
* the BSS that requested the scan in which the beacon/probe was received.
* @chains: bitmask for filled values in @chain_signal.
* @chain_signal: per-chain signal strength of last received BSS in dBm.
+ * @drv_data: Data to be passed through to @inform_bss
*/
struct cfg80211_inform_bss {
struct ieee80211_channel *chan;
@@ -2678,6 +2735,8 @@ struct cfg80211_inform_bss {
u8 parent_bssid[ETH_ALEN] __aligned(2);
u8 chains;
s8 chain_signal[IEEE80211_MAX_CHAINS];
+
+ void *drv_data;
};
/**
@@ -2830,11 +2889,14 @@ struct cfg80211_auth_request {
* if this is %NULL for a link, that link is not requested
* @elems: extra elements for the per-STA profile for this link
* @elems_len: length of the elements
+ * @disabled: If set this link should be included during association etc. but it
+ * should not be used until enabled by the AP MLD.
*/
struct cfg80211_assoc_link {
struct cfg80211_bss *bss;
const u8 *elems;
size_t elems_len;
+ bool disabled;
};
/**
@@ -3885,7 +3947,7 @@ struct cfg80211_pmsr_request {
struct list_head list;
- struct cfg80211_pmsr_request_peer peers[];
+ struct cfg80211_pmsr_request_peer peers[] __counted_by(n_peers);
};
/**
@@ -4046,6 +4108,13 @@ struct mgmt_frame_regs {
*
* @change_bss: Modify parameters for a given BSS.
*
+ * @inform_bss: Called by cfg80211 while being informed about new BSS data
+ * for every BSS found within the reported data or frame. This is called
+ * from within the cfg8011 inform_bss handlers while holding the bss_lock.
+ * The data parameter is passed through from drv_data inside
+ * struct cfg80211_inform_bss.
+ * The new IE data for the BSS is explicitly passed.
+ *
* @set_txq_params: Set TX queue parameters
*
* @libertas_set_mesh_channel: Only for backward compatibility for libertas,
@@ -4330,6 +4399,8 @@ struct mgmt_frame_regs {
* @add_link_station: Add a link to a station.
* @mod_link_station: Modify a link of a station.
* @del_link_station: Remove a link of a station.
+ *
+ * @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames.
*/
struct cfg80211_ops {
int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -4431,6 +4502,9 @@ struct cfg80211_ops {
int (*change_bss)(struct wiphy *wiphy, struct net_device *dev,
struct bss_parameters *params);
+ void (*inform_bss)(struct wiphy *wiphy, struct cfg80211_bss *bss,
+ const struct cfg80211_bss_ies *ies, void *data);
+
int (*set_txq_params)(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_txq_params *params);
@@ -4550,9 +4624,10 @@ struct cfg80211_ops {
struct cfg80211_gtk_rekey_data *data);
int (*tdls_mgmt)(struct wiphy *wiphy, struct net_device *dev,
- const u8 *peer, u8 action_code, u8 dialog_token,
- u16 status_code, u32 peer_capability,
- bool initiator, const u8 *buf, size_t len);
+ const u8 *peer, int link_id,
+ u8 action_code, u8 dialog_token, u16 status_code,
+ u32 peer_capability, bool initiator,
+ const u8 *buf, size_t len);
int (*tdls_oper)(struct wiphy *wiphy, struct net_device *dev,
const u8 *peer, enum nl80211_tdls_operation oper);
@@ -4683,6 +4758,8 @@ struct cfg80211_ops {
struct link_station_parameters *params);
int (*del_link_station)(struct wiphy *wiphy, struct net_device *dev,
struct link_station_del_parameters *params);
+ int (*set_hw_timestamp)(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_set_hw_timestamp *hwts);
};
/*
@@ -5139,6 +5216,8 @@ struct wiphy_iftype_akm_suites {
int n_akm_suites;
};
+#define CFG80211_HW_TIMESTAMP_ALL_PEERS 0xffff
+
/**
* struct wiphy - wireless hardware description
* @mtx: mutex for the data (structures) of this device
@@ -5348,6 +5427,13 @@ struct wiphy_iftype_akm_suites {
* NL80211_MAX_NR_AKM_SUITES in order to avoid compatibility issues with
* legacy userspace and maximum allowed value is
* CFG80211_MAX_NUM_AKM_SUITES.
+ *
+ * @hw_timestamp_max_peers: maximum number of peers that the driver supports
+ * enabling HW timestamping for concurrently. Setting this field to a
+ * non-zero value indicates that the driver supports HW timestamping.
+ * A value of %CFG80211_HW_TIMESTAMP_ALL_PEERS indicates the driver
+ * supports enabling HW timestamping for all peers (i.e. no need to
+ * specify a mac address).
*/
struct wiphy {
struct mutex mtx;
@@ -5496,6 +5582,8 @@ struct wiphy {
u8 ema_max_profile_periodicity;
u16 max_num_akm_suites;
+ u16 hw_timestamp_max_peers;
+
char priv[] __aligned(NETDEV_ALIGN);
};
@@ -5669,12 +5757,17 @@ struct cfg80211_cqm_config;
* wiphy_lock - lock the wiphy
* @wiphy: the wiphy to lock
*
- * This is mostly exposed so it can be done around registering and
- * unregistering netdevs that aren't created through cfg80211 calls,
- * since that requires locking in cfg80211 when the notifiers is
- * called, but that cannot differentiate which way it's called.
+ * This is needed around registering and unregistering netdevs that
+ * aren't created through cfg80211 calls, since that requires locking
+ * in cfg80211 when the notifiers is called, but that cannot
+ * differentiate which way it's called.
+ *
+ * It can also be used by drivers for their own purposes.
*
* When cfg80211 ops are called, the wiphy is already locked.
+ *
+ * Note that this makes sure that no workers that have been queued
+ * with wiphy_queue_work() are running.
*/
static inline void wiphy_lock(struct wiphy *wiphy)
__acquires(&wiphy->mtx)
@@ -5694,6 +5787,88 @@ static inline void wiphy_unlock(struct wiphy *wiphy)
mutex_unlock(&wiphy->mtx);
}
+struct wiphy_work;
+typedef void (*wiphy_work_func_t)(struct wiphy *, struct wiphy_work *);
+
+struct wiphy_work {
+ struct list_head entry;
+ wiphy_work_func_t func;
+};
+
+static inline void wiphy_work_init(struct wiphy_work *work,
+ wiphy_work_func_t func)
+{
+ INIT_LIST_HEAD(&work->entry);
+ work->func = func;
+}
+
+/**
+ * wiphy_work_queue - queue work for the wiphy
+ * @wiphy: the wiphy to queue for
+ * @work: the work item
+ *
+ * This is useful for work that must be done asynchronously, and work
+ * queued here has the special property that the wiphy mutex will be
+ * held as if wiphy_lock() was called, and that it cannot be running
+ * after wiphy_lock() was called. Therefore, wiphy_cancel_work() can
+ * use just cancel_work() instead of cancel_work_sync(), it requires
+ * being in a section protected by wiphy_lock().
+ */
+void wiphy_work_queue(struct wiphy *wiphy, struct wiphy_work *work);
+
+/**
+ * wiphy_work_cancel - cancel previously queued work
+ * @wiphy: the wiphy, for debug purposes
+ * @work: the work to cancel
+ *
+ * Cancel the work *without* waiting for it, this assumes being
+ * called under the wiphy mutex acquired by wiphy_lock().
+ */
+void wiphy_work_cancel(struct wiphy *wiphy, struct wiphy_work *work);
+
+struct wiphy_delayed_work {
+ struct wiphy_work work;
+ struct wiphy *wiphy;
+ struct timer_list timer;
+};
+
+void wiphy_delayed_work_timer(struct timer_list *t);
+
+static inline void wiphy_delayed_work_init(struct wiphy_delayed_work *dwork,
+ wiphy_work_func_t func)
+{
+ timer_setup(&dwork->timer, wiphy_delayed_work_timer, 0);
+ wiphy_work_init(&dwork->work, func);
+}
+
+/**
+ * wiphy_delayed_work_queue - queue delayed work for the wiphy
+ * @wiphy: the wiphy to queue for
+ * @dwork: the delayable worker
+ * @delay: number of jiffies to wait before queueing
+ *
+ * This is useful for work that must be done asynchronously, and work
+ * queued here has the special property that the wiphy mutex will be
+ * held as if wiphy_lock() was called, and that it cannot be running
+ * after wiphy_lock() was called. Therefore, wiphy_cancel_work() can
+ * use just cancel_work() instead of cancel_work_sync(), it requires
+ * being in a section protected by wiphy_lock().
+ */
+void wiphy_delayed_work_queue(struct wiphy *wiphy,
+ struct wiphy_delayed_work *dwork,
+ unsigned long delay);
+
+/**
+ * wiphy_delayed_work_cancel - cancel previously queued delayed work
+ * @wiphy: the wiphy, for debug purposes
+ * @dwork: the delayed work to cancel
+ *
+ * Cancel the work *without* waiting for it, this assumes being
+ * called under the wiphy mutex acquired by wiphy_lock().
+ */
+void wiphy_delayed_work_cancel(struct wiphy *wiphy,
+ struct wiphy_delayed_work *dwork);
+
/**
* struct wireless_dev - wireless device state
*
@@ -6247,10 +6422,13 @@ static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
* mesh control field.
*
* @skb: The input A-MSDU frame without any headers.
- * @mesh_hdr: use standard compliant mesh A-MSDU subframe header
+ * @mesh_hdr: the type of mesh header to test
+ * 0: non-mesh A-MSDU length field
+ * 1: big-endian mesh A-MSDU length field
+ * 2: little-endian mesh A-MSDU length field
* Returns: true if subframe header lengths are valid for the @mesh_hdr mode
*/
-bool ieee80211_is_valid_amsdu(struct sk_buff *skb, bool mesh_hdr);
+bool ieee80211_is_valid_amsdu(struct sk_buff *skb, u8 mesh_hdr);
/**
* ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame
@@ -6267,13 +6445,13 @@ bool ieee80211_is_valid_amsdu(struct sk_buff *skb, bool mesh_hdr);
* @extra_headroom: The hardware extra headroom for SKBs in the @list.
* @check_da: DA to check in the inner ethernet header, or NULL
* @check_sa: SA to check in the inner ethernet header, or NULL
- * @mesh_control: A-MSDU subframe header includes the mesh control field
+ * @mesh_control: see mesh_hdr in ieee80211_is_valid_amsdu
*/
void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
const u8 *addr, enum nl80211_iftype iftype,
const unsigned int extra_headroom,
const u8 *check_da, const u8 *check_sa,
- bool mesh_control);
+ u8 mesh_control);
/**
* ieee80211_get_8023_tunnel_proto - get RFC1042 or bridge tunnel encap protocol
@@ -6503,6 +6681,28 @@ cfg80211_find_vendor_ie(unsigned int oui, int oui_type,
}
/**
+ * cfg80211_defragment_element - Defrag the given element data into a buffer
+ *
+ * @elem: the element to defragment
+ * @ies: elements where @elem is contained
+ * @ieslen: length of @ies
+ * @data: buffer to store element data
+ * @data_len: length of @data
+ * @frag_id: the element ID of fragments
+ *
+ * Return: length of @data, or -EINVAL on error
+ *
+ * Copy out all data from an element that may be fragmented into @data, while
+ * skipping all headers.
+ *
+ * The function uses memmove() internally. It is acceptable to defragment an
+ * element in-place.
+ */
+ssize_t cfg80211_defragment_element(const struct element *elem, const u8 *ies,
+ size_t ieslen, u8 *data, size_t data_len,
+ u8 frag_id);
+
+/**
* cfg80211_send_layer2_update - send layer 2 update frame
*
* @dev: network device
@@ -6814,13 +7014,11 @@ enum cfg80211_bss_frame_type {
* @ie: IEs
* @ielen: length of IEs
* @band: enum nl80211_band of the channel
- * @ftype: frame type
*
* Returns the channel number, or -1 if none could be determined.
*/
int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen,
- enum nl80211_band band,
- enum cfg80211_bss_frame_type ftype);
+ enum nl80211_band band);
/**
* cfg80211_inform_bss_data - inform cfg80211 of a new BSS
@@ -7919,7 +8117,7 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
* @link_id: the ID of the link the frame was received on
* @buf: Management frame (header + body)
* @len: length of the frame data
- * @flags: flags, as defined in enum nl80211_rxmgmt_flags
+ * @flags: flags, as defined in &enum nl80211_rxmgmt_flags
* @rx_tstamp: Hardware timestamp of frame RX in nanoseconds
* @ack_tstamp: Hardware timestamp of ack TX in nanoseconds
*/
@@ -8101,6 +8299,7 @@ void cfg80211_control_port_tx_status(struct wireless_dev *wdev, u64 cookie,
* responsible for any cleanup. The caller must also ensure that
* skb->protocol is set appropriately.
* @unencrypted: Whether the frame was received unencrypted
+ * @link_id: the link the frame was received on, -1 if not applicable or unknown
*
* This function is used to inform userspace about a received control port
* frame. It should only be used if userspace indicated it wants to receive
@@ -8111,8 +8310,8 @@ void cfg80211_control_port_tx_status(struct wireless_dev *wdev, u64 cookie,
*
* Return: %true if the frame was passed to userspace
*/
-bool cfg80211_rx_control_port(struct net_device *dev,
- struct sk_buff *skb, bool unencrypted);
+bool cfg80211_rx_control_port(struct net_device *dev, struct sk_buff *skb,
+ bool unencrypted, int link_id);
/**
* cfg80211_cqm_rssi_notify - connection quality monitoring rssi event
@@ -9010,4 +9209,17 @@ static inline int cfg80211_color_change_notify(struct net_device *dev)
bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap,
const struct cfg80211_chan_def *chandef);
+/**
+ * cfg80211_links_removed - Notify about removed STA MLD setup links.
+ * @dev: network device.
+ * @link_mask: BIT mask of removed STA MLD setup link IDs.
+ *
+ * Inform cfg80211 and the userspace about removed STA MLD setup links due to
+ * AP MLD removing the corresponding affiliated APs with Multi-Link
+ * reconfiguration. Note that it's not valid to remove all links, in this
+ * case disconnect instead.
+ * Also note that the wdev mutex must be held.
+ */
+void cfg80211_links_removed(struct net_device *dev, u16 link_mask);
+
#endif /* __NET_CFG80211_H */
diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index 0c2778a836db..f79ce133e51a 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -170,7 +170,8 @@ wpan_phy_cca_cmp(const struct wpan_phy_cca *a, const struct wpan_phy_cca *b)
}
/**
- * @WPAN_PHY_FLAG_TRANSMIT_POWER: Indicates that transceiver will support
+ * enum wpan_phy_flags - WPAN PHY state flags
+ * @WPAN_PHY_FLAG_TXPOWER: Indicates that transceiver will support
* transmit power setting.
* @WPAN_PHY_FLAG_CCA_ED_LEVEL: Indicates that transceiver will support cca ed
* level setting.
@@ -178,12 +179,15 @@ wpan_phy_cca_cmp(const struct wpan_phy_cca *a, const struct wpan_phy_cca *b)
* setting.
* @WPAN_PHY_FLAG_STATE_QUEUE_STOPPED: Indicates that the transmit queue was
* temporarily stopped.
+ * @WPAN_PHY_FLAG_DATAGRAMS_ONLY: Indicates that transceiver is only able to
+ * send/receive datagrams.
*/
enum wpan_phy_flags {
WPAN_PHY_FLAG_TXPOWER = BIT(1),
WPAN_PHY_FLAG_CCA_ED_LEVEL = BIT(2),
WPAN_PHY_FLAG_CCA_MODE = BIT(3),
WPAN_PHY_FLAG_STATE_QUEUE_STOPPED = BIT(4),
+ WPAN_PHY_FLAG_DATAGRAMS_ONLY = BIT(5),
};
struct wpan_phy {
diff --git a/include/net/codel.h b/include/net/codel.h
index 5fed2f16cb8d..aa80f744826c 100644
--- a/include/net/codel.h
+++ b/include/net/codel.h
@@ -145,8 +145,8 @@ struct codel_vars {
* @maxpacket: largest packet we've seen so far
* @drop_count: temp count of dropped packets in dequeue()
* @drop_len: bytes of dropped packets in dequeue()
- * ecn_mark: number of packets we ECN marked instead of dropping
- * ce_mark: number of packets CE marked because sojourn time was above ce_threshold
+ * @ecn_mark: number of packets we ECN marked instead of dropping
+ * @ce_mark: number of packets CE marked because sojourn time was above ce_threshold
*/
struct codel_stats {
u32 maxpacket;
diff --git a/include/net/datalink.h b/include/net/datalink.h
index c837ffc7ebf8..6c529a40e00d 100644
--- a/include/net/datalink.h
+++ b/include/net/datalink.h
@@ -23,6 +23,4 @@ struct datalink_proto {
struct list_head node;
};
-struct datalink_proto *make_EII_client(void);
-void destroy_EII_client(struct datalink_proto *dl);
#endif
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 6a942e70e451..29fd1b4ee654 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -123,6 +123,7 @@ struct devlink_port {
struct list_head list;
struct list_head region_list;
struct devlink *devlink;
+ const struct devlink_port_ops *ops;
unsigned int index;
spinlock_t type_lock; /* Protects type and type_eth/ib
* structures consistency.
@@ -220,7 +221,7 @@ struct devlink_dpipe_field {
/**
* struct devlink_dpipe_header - dpipe header object
* @name: header name
- * @id: index, global/local detrmined by global bit
+ * @id: index, global/local determined by global bit
* @fields: fields
* @fields_count: number of fields
* @global: indicates if header is shared like most protocol header
@@ -240,7 +241,7 @@ struct devlink_dpipe_header {
* @header_index: header index (packets can have several headers of same
* type like in case of tunnels)
* @header: header
- * @fieled_id: field index
+ * @field_id: field index
*/
struct devlink_dpipe_match {
enum devlink_dpipe_match_type type;
@@ -255,7 +256,7 @@ struct devlink_dpipe_match {
* @header_index: header index (packets can have several headers of same
* type like in case of tunnels)
* @header: header
- * @fieled_id: field index
+ * @field_id: field index
*/
struct devlink_dpipe_action {
enum devlink_dpipe_action_type type;
@@ -291,7 +292,7 @@ struct devlink_dpipe_value {
* struct devlink_dpipe_entry - table entry object
* @index: index of the entry in the table
* @match_values: match values
- * @matche_values_count: count of matches tuples
+ * @match_values_count: count of matches tuples
* @action_values: actions values
* @action_values_count: count of actions values
* @counter: value of counter
@@ -341,7 +342,9 @@ struct devlink_dpipe_table_ops;
*/
struct devlink_dpipe_table {
void *priv;
+ /* private: */
struct list_head list;
+ /* public: */
const char *name;
bool counters_enabled;
bool counter_control_extern;
@@ -354,13 +357,13 @@ struct devlink_dpipe_table {
/**
* struct devlink_dpipe_table_ops - dpipe_table ops
- * @actions_dump - dumps all tables actions
- * @matches_dump - dumps all tables matches
- * @entries_dump - dumps all active entries in the table
- * @counters_set_update - when changing the counter status hardware sync
+ * @actions_dump: dumps all tables actions
+ * @matches_dump: dumps all tables matches
+ * @entries_dump: dumps all active entries in the table
+ * @counters_set_update: when changing the counter status hardware sync
* maybe needed to allocate/free counter related
* resources
- * @size_get - get size
+ * @size_get: get size
*/
struct devlink_dpipe_table_ops {
int (*actions_dump)(void *priv, struct sk_buff *skb);
@@ -373,8 +376,8 @@ struct devlink_dpipe_table_ops {
/**
* struct devlink_dpipe_headers - dpipe headers
- * @headers - header array can be shared (global bit) or driver specific
- * @headers_count - count of headers
+ * @headers: header array can be shared (global bit) or driver specific
+ * @headers_count: count of headers
*/
struct devlink_dpipe_headers {
struct devlink_dpipe_header **headers;
@@ -386,7 +389,7 @@ struct devlink_dpipe_headers {
* @size_min: minimum size which can be set
* @size_max: maximum size which can be set
* @size_granularity: size granularity
- * @size_unit: resource's basic unit
+ * @unit: resource's basic unit
*/
struct devlink_resource_size_params {
u64 size_min;
@@ -456,6 +459,7 @@ struct devlink_flash_notify {
/**
* struct devlink_param - devlink configuration parameter data
+ * @id: devlink parameter id number
* @name: name of the parameter
* @generic: indicates if the parameter is generic or driver specific
* @type: parameter type
@@ -631,6 +635,7 @@ enum devlink_param_generic_id {
* struct devlink_flash_update_params - Flash Update parameters
* @fw: pointer to the firmware data to update from
* @component: the flash component to update
+ * @overwrite_mask: which types of flash update are supported (may be %0)
*
* With the exception of fw, drivers must opt-in to parameters by
* setting the appropriate bit in the supported_flash_update_params field in
@@ -1261,7 +1266,7 @@ struct devlink_ops {
/**
* @supported_flash_update_params:
* mask of parameters supported by the driver's .flash_update
- * implemementation.
+ * implementation.
*/
u32 supported_flash_update_params;
unsigned long reload_actions;
@@ -1273,12 +1278,6 @@ struct devlink_ops {
int (*reload_up)(struct devlink *devlink, enum devlink_reload_action action,
enum devlink_reload_limit limit, u32 *actions_performed,
struct netlink_ext_ack *extack);
- int (*port_type_set)(struct devlink_port *devlink_port,
- enum devlink_port_type port_type);
- int (*port_split)(struct devlink *devlink, struct devlink_port *port,
- unsigned int count, struct netlink_ext_ack *extack);
- int (*port_unsplit)(struct devlink *devlink, struct devlink_port *port,
- struct netlink_ext_ack *extack);
int (*sb_pool_get)(struct devlink *devlink, unsigned int sb_index,
u16 pool_index,
struct devlink_sb_pool_info *pool_info);
@@ -1435,80 +1434,17 @@ struct devlink_ops {
const struct devlink_trap_policer *policer,
u64 *p_drops);
/**
- * @port_function_hw_addr_get: Port function's hardware address get function.
- *
- * Should be used by device drivers to report the hardware address of a function managed
- * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port
- * function handling for a particular port.
- *
- * Note: @extack can be NULL when port notifier queries the port function.
- */
- int (*port_function_hw_addr_get)(struct devlink_port *port, u8 *hw_addr,
- int *hw_addr_len,
- struct netlink_ext_ack *extack);
- /**
- * @port_function_hw_addr_set: Port function's hardware address set function.
- *
- * Should be used by device drivers to set the hardware address of a function managed
- * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port
- * function handling for a particular port.
- */
- int (*port_function_hw_addr_set)(struct devlink_port *port,
- const u8 *hw_addr, int hw_addr_len,
- struct netlink_ext_ack *extack);
- /**
- * @port_fn_roce_get: Port function's roce get function.
- *
- * Query RoCE state of a function managed by the devlink port.
- * Return -EOPNOTSUPP if port function RoCE handling is not supported.
- */
- int (*port_fn_roce_get)(struct devlink_port *devlink_port,
- bool *is_enable,
- struct netlink_ext_ack *extack);
- /**
- * @port_fn_roce_set: Port function's roce set function.
- *
- * Enable/Disable the RoCE state of a function managed by the devlink
- * port.
- * Return -EOPNOTSUPP if port function RoCE handling is not supported.
- */
- int (*port_fn_roce_set)(struct devlink_port *devlink_port,
- bool enable, struct netlink_ext_ack *extack);
- /**
- * @port_fn_migratable_get: Port function's migratable get function.
- *
- * Query migratable state of a function managed by the devlink port.
- * Return -EOPNOTSUPP if port function migratable handling is not
- * supported.
- */
- int (*port_fn_migratable_get)(struct devlink_port *devlink_port,
- bool *is_enable,
- struct netlink_ext_ack *extack);
- /**
- * @port_fn_migratable_set: Port function's migratable set function.
- *
- * Enable/Disable migratable state of a function managed by the devlink
- * port.
- * Return -EOPNOTSUPP if port function migratable handling is not
- * supported.
- */
- int (*port_fn_migratable_set)(struct devlink_port *devlink_port,
- bool enable,
- struct netlink_ext_ack *extack);
- /**
* port_new() - Add a new port function of a specified flavor
* @devlink: Devlink instance
* @attrs: attributes of the new port
* @extack: extack for reporting error messages
- * @new_port_index: index of the new port
+ * @devlink_port: pointer to store new devlink port pointer
*
* Devlink core will call this device driver function upon user request
* to create a new port function of a specified flavor and optional
* attributes
*
* Notes:
- * - Called without devlink instance lock being held. Drivers must
- * implement own means of synchronization
* - On success, drivers must register a port with devlink core
*
* Return: 0 on success, negative value otherwise.
@@ -1516,56 +1452,7 @@ struct devlink_ops {
int (*port_new)(struct devlink *devlink,
const struct devlink_port_new_attrs *attrs,
struct netlink_ext_ack *extack,
- unsigned int *new_port_index);
- /**
- * port_del() - Delete a port function
- * @devlink: Devlink instance
- * @port_index: port function index to delete
- * @extack: extack for reporting error messages
- *
- * Devlink core will call this device driver function upon user request
- * to delete a previously created port function
- *
- * Notes:
- * - Called without devlink instance lock being held. Drivers must
- * implement own means of synchronization
- * - On success, drivers must unregister the corresponding devlink
- * port
- *
- * Return: 0 on success, negative value otherwise.
- */
- int (*port_del)(struct devlink *devlink, unsigned int port_index,
- struct netlink_ext_ack *extack);
- /**
- * port_fn_state_get() - Get the state of a port function
- * @devlink: Devlink instance
- * @port: The devlink port
- * @state: Admin configured state
- * @opstate: Current operational state
- * @extack: extack for reporting error messages
- *
- * Reports the admin and operational state of a devlink port function
- *
- * Return: 0 on success, negative value otherwise.
- */
- int (*port_fn_state_get)(struct devlink_port *port,
- enum devlink_port_fn_state *state,
- enum devlink_port_fn_opstate *opstate,
- struct netlink_ext_ack *extack);
- /**
- * port_fn_state_set() - Set the admin state of a port function
- * @devlink: Devlink instance
- * @port: The devlink port
- * @state: Admin state
- * @extack: extack for reporting error messages
- *
- * Set the admin state of a devlink port function
- *
- * Return: 0 on success, negative value otherwise.
- */
- int (*port_fn_state_set)(struct devlink_port *port,
- enum devlink_port_fn_state state,
- struct netlink_ext_ack *extack);
+ struct devlink_port **devlink_port);
/**
* Rate control callbacks.
@@ -1655,15 +1542,146 @@ void devl_unregister(struct devlink *devlink);
void devlink_register(struct devlink *devlink);
void devlink_unregister(struct devlink *devlink);
void devlink_free(struct devlink *devlink);
+
+/**
+ * struct devlink_port_ops - Port operations
+ * @port_split: Callback used to split the port into multiple ones.
+ * @port_unsplit: Callback used to unsplit the port group back into
+ * a single port.
+ * @port_type_set: Callback used to set a type of a port.
+ * @port_del: Callback used to delete selected port along with related function.
+ * Devlink core calls this upon user request to delete
+ * a port previously created by devlink_ops->port_new().
+ * @port_fn_hw_addr_get: Callback used to set port function's hardware address.
+ * Should be used by device drivers to report
+ * the hardware address of a function managed
+ * by the devlink port.
+ * @port_fn_hw_addr_set: Callback used to set port function's hardware address.
+ * Should be used by device drivers to set the hardware
+ * address of a function managed by the devlink port.
+ * @port_fn_roce_get: Callback used to get port function's RoCE capability.
+ * Should be used by device drivers to report
+ * the current state of RoCE capability of a function
+ * managed by the devlink port.
+ * @port_fn_roce_set: Callback used to set port function's RoCE capability.
+ * Should be used by device drivers to enable/disable
+ * RoCE capability of a function managed
+ * by the devlink port.
+ * @port_fn_migratable_get: Callback used to get port function's migratable
+ * capability. Should be used by device drivers
+ * to report the current state of migratable capability
+ * of a function managed by the devlink port.
+ * @port_fn_migratable_set: Callback used to set port function's migratable
+ * capability. Should be used by device drivers
+ * to enable/disable migratable capability of
+ * a function managed by the devlink port.
+ * @port_fn_state_get: Callback used to get port function's state.
+ * Should be used by device drivers to report
+ * the current admin and operational state of a
+ * function managed by the devlink port.
+ * @port_fn_state_set: Callback used to get port function's state.
+ * Should be used by device drivers set
+ * the admin state of a function managed
+ * by the devlink port.
+ * @port_fn_ipsec_crypto_get: Callback used to get port function's ipsec_crypto
+ * capability. Should be used by device drivers
+ * to report the current state of ipsec_crypto
+ * capability of a function managed by the devlink
+ * port.
+ * @port_fn_ipsec_crypto_set: Callback used to set port function's ipsec_crypto
+ * capability. Should be used by device drivers to
+ * enable/disable ipsec_crypto capability of a
+ * function managed by the devlink port.
+ * @port_fn_ipsec_packet_get: Callback used to get port function's ipsec_packet
+ * capability. Should be used by device drivers
+ * to report the current state of ipsec_packet
+ * capability of a function managed by the devlink
+ * port.
+ * @port_fn_ipsec_packet_set: Callback used to set port function's ipsec_packet
+ * capability. Should be used by device drivers to
+ * enable/disable ipsec_packet capability of a
+ * function managed by the devlink port.
+ *
+ * Note: Driver should return -EOPNOTSUPP if it doesn't support
+ * port function (@port_fn_*) handling for a particular port.
+ */
+struct devlink_port_ops {
+ int (*port_split)(struct devlink *devlink, struct devlink_port *port,
+ unsigned int count, struct netlink_ext_ack *extack);
+ int (*port_unsplit)(struct devlink *devlink, struct devlink_port *port,
+ struct netlink_ext_ack *extack);
+ int (*port_type_set)(struct devlink_port *devlink_port,
+ enum devlink_port_type port_type);
+ int (*port_del)(struct devlink *devlink, struct devlink_port *port,
+ struct netlink_ext_ack *extack);
+ int (*port_fn_hw_addr_get)(struct devlink_port *port, u8 *hw_addr,
+ int *hw_addr_len,
+ struct netlink_ext_ack *extack);
+ int (*port_fn_hw_addr_set)(struct devlink_port *port,
+ const u8 *hw_addr, int hw_addr_len,
+ struct netlink_ext_ack *extack);
+ int (*port_fn_roce_get)(struct devlink_port *devlink_port,
+ bool *is_enable,
+ struct netlink_ext_ack *extack);
+ int (*port_fn_roce_set)(struct devlink_port *devlink_port,
+ bool enable, struct netlink_ext_ack *extack);
+ int (*port_fn_migratable_get)(struct devlink_port *devlink_port,
+ bool *is_enable,
+ struct netlink_ext_ack *extack);
+ int (*port_fn_migratable_set)(struct devlink_port *devlink_port,
+ bool enable,
+ struct netlink_ext_ack *extack);
+ int (*port_fn_state_get)(struct devlink_port *port,
+ enum devlink_port_fn_state *state,
+ enum devlink_port_fn_opstate *opstate,
+ struct netlink_ext_ack *extack);
+ int (*port_fn_state_set)(struct devlink_port *port,
+ enum devlink_port_fn_state state,
+ struct netlink_ext_ack *extack);
+ int (*port_fn_ipsec_crypto_get)(struct devlink_port *devlink_port,
+ bool *is_enable,
+ struct netlink_ext_ack *extack);
+ int (*port_fn_ipsec_crypto_set)(struct devlink_port *devlink_port,
+ bool enable,
+ struct netlink_ext_ack *extack);
+ int (*port_fn_ipsec_packet_get)(struct devlink_port *devlink_port,
+ bool *is_enable,
+ struct netlink_ext_ack *extack);
+ int (*port_fn_ipsec_packet_set)(struct devlink_port *devlink_port,
+ bool enable,
+ struct netlink_ext_ack *extack);
+};
+
void devlink_port_init(struct devlink *devlink,
struct devlink_port *devlink_port);
void devlink_port_fini(struct devlink_port *devlink_port);
-int devl_port_register(struct devlink *devlink,
- struct devlink_port *devlink_port,
- unsigned int port_index);
-int devlink_port_register(struct devlink *devlink,
- struct devlink_port *devlink_port,
- unsigned int port_index);
+
+int devl_port_register_with_ops(struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ unsigned int port_index,
+ const struct devlink_port_ops *ops);
+
+static inline int devl_port_register(struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ unsigned int port_index)
+{
+ return devl_port_register_with_ops(devlink, devlink_port,
+ port_index, NULL);
+}
+
+int devlink_port_register_with_ops(struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ unsigned int port_index,
+ const struct devlink_port_ops *ops);
+
+static inline int devlink_port_register(struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ unsigned int port_index)
+{
+ return devlink_port_register_with_ops(devlink, devlink_port,
+ port_index, NULL);
+}
+
void devl_port_unregister(struct devlink_port *devlink_port);
void devlink_port_unregister(struct devlink_port *devlink_port);
void devlink_port_type_eth_set(struct devlink_port *devlink_port);
@@ -1755,9 +1773,6 @@ int devl_resource_size_get(struct devlink *devlink,
int devl_dpipe_table_resource_set(struct devlink *devlink,
const char *table_name, u64 resource_id,
u64 resource_units);
-int devlink_dpipe_table_resource_set(struct devlink *devlink,
- const char *table_name, u64 resource_id,
- u64 resource_units);
void devl_resource_occ_get_register(struct devlink *devlink,
u64 resource_id,
devlink_resource_occ_get_t *occ_get,
@@ -1802,8 +1817,6 @@ devlink_port_region_create(struct devlink_port *port,
u32 region_max_snapshots, u64 region_size);
void devl_region_destroy(struct devlink_region *region);
void devlink_region_destroy(struct devlink_region *region);
-void devlink_port_region_destroy(struct devlink_region *region);
-
int devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id);
void devlink_region_snapshot_id_put(struct devlink *devlink, u32 id);
int devlink_region_snapshot_create(struct devlink_region *region,
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
new file mode 100644
index 000000000000..a587e83fc169
--- /dev/null
+++ b/include/net/dropreason-core.h
@@ -0,0 +1,376 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _LINUX_DROPREASON_CORE_H
+#define _LINUX_DROPREASON_CORE_H
+
+#define DEFINE_DROP_REASON(FN, FNe) \
+ FN(NOT_SPECIFIED) \
+ FN(NO_SOCKET) \
+ FN(PKT_TOO_SMALL) \
+ FN(TCP_CSUM) \
+ FN(SOCKET_FILTER) \
+ FN(UDP_CSUM) \
+ FN(NETFILTER_DROP) \
+ FN(OTHERHOST) \
+ FN(IP_CSUM) \
+ FN(IP_INHDR) \
+ FN(IP_RPFILTER) \
+ FN(UNICAST_IN_L2_MULTICAST) \
+ FN(XFRM_POLICY) \
+ FN(IP_NOPROTO) \
+ FN(SOCKET_RCVBUFF) \
+ FN(PROTO_MEM) \
+ FN(TCP_MD5NOTFOUND) \
+ FN(TCP_MD5UNEXPECTED) \
+ FN(TCP_MD5FAILURE) \
+ FN(SOCKET_BACKLOG) \
+ FN(TCP_FLAGS) \
+ FN(TCP_ZEROWINDOW) \
+ FN(TCP_OLD_DATA) \
+ FN(TCP_OVERWINDOW) \
+ FN(TCP_OFOMERGE) \
+ FN(TCP_RFC7323_PAWS) \
+ FN(TCP_OLD_SEQUENCE) \
+ FN(TCP_INVALID_SEQUENCE) \
+ FN(TCP_RESET) \
+ FN(TCP_INVALID_SYN) \
+ FN(TCP_CLOSE) \
+ FN(TCP_FASTOPEN) \
+ FN(TCP_OLD_ACK) \
+ FN(TCP_TOO_OLD_ACK) \
+ FN(TCP_ACK_UNSENT_DATA) \
+ FN(TCP_OFO_QUEUE_PRUNE) \
+ FN(TCP_OFO_DROP) \
+ FN(IP_OUTNOROUTES) \
+ FN(BPF_CGROUP_EGRESS) \
+ FN(IPV6DISABLED) \
+ FN(NEIGH_CREATEFAIL) \
+ FN(NEIGH_FAILED) \
+ FN(NEIGH_QUEUEFULL) \
+ FN(NEIGH_DEAD) \
+ FN(TC_EGRESS) \
+ FN(QDISC_DROP) \
+ FN(CPU_BACKLOG) \
+ FN(XDP) \
+ FN(TC_INGRESS) \
+ FN(UNHANDLED_PROTO) \
+ FN(SKB_CSUM) \
+ FN(SKB_GSO_SEG) \
+ FN(SKB_UCOPY_FAULT) \
+ FN(DEV_HDR) \
+ FN(DEV_READY) \
+ FN(FULL_RING) \
+ FN(NOMEM) \
+ FN(HDR_TRUNC) \
+ FN(TAP_FILTER) \
+ FN(TAP_TXFILTER) \
+ FN(ICMP_CSUM) \
+ FN(INVALID_PROTO) \
+ FN(IP_INADDRERRORS) \
+ FN(IP_INNOROUTES) \
+ FN(PKT_TOO_BIG) \
+ FN(DUP_FRAG) \
+ FN(FRAG_REASM_TIMEOUT) \
+ FN(FRAG_TOO_FAR) \
+ FN(TCP_MINTTL) \
+ FN(IPV6_BAD_EXTHDR) \
+ FN(IPV6_NDISC_FRAG) \
+ FN(IPV6_NDISC_HOP_LIMIT) \
+ FN(IPV6_NDISC_BAD_CODE) \
+ FN(IPV6_NDISC_BAD_OPTIONS) \
+ FN(IPV6_NDISC_NS_OTHERHOST) \
+ FN(QUEUE_PURGE) \
+ FNe(MAX)
+
+/**
+ * enum skb_drop_reason - the reasons of skb drops
+ *
+ * The reason of skb drop, which is used in kfree_skb_reason().
+ */
+enum skb_drop_reason {
+ /**
+ * @SKB_NOT_DROPPED_YET: skb is not dropped yet (used for no-drop case)
+ */
+ SKB_NOT_DROPPED_YET = 0,
+ /** @SKB_CONSUMED: packet has been consumed */
+ SKB_CONSUMED,
+ /** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */
+ SKB_DROP_REASON_NOT_SPECIFIED,
+ /** @SKB_DROP_REASON_NO_SOCKET: socket not found */
+ SKB_DROP_REASON_NO_SOCKET,
+ /** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */
+ SKB_DROP_REASON_PKT_TOO_SMALL,
+ /** @SKB_DROP_REASON_TCP_CSUM: TCP checksum error */
+ SKB_DROP_REASON_TCP_CSUM,
+ /** @SKB_DROP_REASON_SOCKET_FILTER: dropped by socket filter */
+ SKB_DROP_REASON_SOCKET_FILTER,
+ /** @SKB_DROP_REASON_UDP_CSUM: UDP checksum error */
+ SKB_DROP_REASON_UDP_CSUM,
+ /** @SKB_DROP_REASON_NETFILTER_DROP: dropped by netfilter */
+ SKB_DROP_REASON_NETFILTER_DROP,
+ /**
+ * @SKB_DROP_REASON_OTHERHOST: packet don't belong to current host
+ * (interface is in promisc mode)
+ */
+ SKB_DROP_REASON_OTHERHOST,
+ /** @SKB_DROP_REASON_IP_CSUM: IP checksum error */
+ SKB_DROP_REASON_IP_CSUM,
+ /**
+ * @SKB_DROP_REASON_IP_INHDR: there is something wrong with IP header (see
+ * IPSTATS_MIB_INHDRERRORS)
+ */
+ SKB_DROP_REASON_IP_INHDR,
+ /**
+ * @SKB_DROP_REASON_IP_RPFILTER: IP rpfilter validate failed. see the
+ * document for rp_filter in ip-sysctl.rst for more information
+ */
+ SKB_DROP_REASON_IP_RPFILTER,
+ /**
+ * @SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST: destination address of L2 is
+ * multicast, but L3 is unicast.
+ */
+ SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST,
+ /** @SKB_DROP_REASON_XFRM_POLICY: xfrm policy check failed */
+ SKB_DROP_REASON_XFRM_POLICY,
+ /** @SKB_DROP_REASON_IP_NOPROTO: no support for IP protocol */
+ SKB_DROP_REASON_IP_NOPROTO,
+ /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */
+ SKB_DROP_REASON_SOCKET_RCVBUFF,
+ /**
+ * @SKB_DROP_REASON_PROTO_MEM: proto memory limition, such as udp packet
+ * drop out of udp_memory_allocated.
+ */
+ SKB_DROP_REASON_PROTO_MEM,
+ /**
+ * @SKB_DROP_REASON_TCP_MD5NOTFOUND: no MD5 hash and one expected,
+ * corresponding to LINUX_MIB_TCPMD5NOTFOUND
+ */
+ SKB_DROP_REASON_TCP_MD5NOTFOUND,
+ /**
+ * @SKB_DROP_REASON_TCP_MD5UNEXPECTED: MD5 hash and we're not expecting
+ * one, corresponding to LINUX_MIB_TCPMD5UNEXPECTED
+ */
+ SKB_DROP_REASON_TCP_MD5UNEXPECTED,
+ /**
+ * @SKB_DROP_REASON_TCP_MD5FAILURE: MD5 hash and its wrong, corresponding
+ * to LINUX_MIB_TCPMD5FAILURE
+ */
+ SKB_DROP_REASON_TCP_MD5FAILURE,
+ /**
+ * @SKB_DROP_REASON_SOCKET_BACKLOG: failed to add skb to socket backlog (
+ * see LINUX_MIB_TCPBACKLOGDROP)
+ */
+ SKB_DROP_REASON_SOCKET_BACKLOG,
+ /** @SKB_DROP_REASON_TCP_FLAGS: TCP flags invalid */
+ SKB_DROP_REASON_TCP_FLAGS,
+ /**
+ * @SKB_DROP_REASON_TCP_ZEROWINDOW: TCP receive window size is zero,
+ * see LINUX_MIB_TCPZEROWINDOWDROP
+ */
+ SKB_DROP_REASON_TCP_ZEROWINDOW,
+ /**
+ * @SKB_DROP_REASON_TCP_OLD_DATA: the TCP data reveived is already
+ * received before (spurious retrans may happened), see
+ * LINUX_MIB_DELAYEDACKLOST
+ */
+ SKB_DROP_REASON_TCP_OLD_DATA,
+ /**
+ * @SKB_DROP_REASON_TCP_OVERWINDOW: the TCP data is out of window,
+ * the seq of the first byte exceed the right edges of receive
+ * window
+ */
+ SKB_DROP_REASON_TCP_OVERWINDOW,
+ /**
+ * @SKB_DROP_REASON_TCP_OFOMERGE: the data of skb is already in the ofo
+ * queue, corresponding to LINUX_MIB_TCPOFOMERGE
+ */
+ SKB_DROP_REASON_TCP_OFOMERGE,
+ /**
+ * @SKB_DROP_REASON_TCP_RFC7323_PAWS: PAWS check, corresponding to
+ * LINUX_MIB_PAWSESTABREJECTED
+ */
+ SKB_DROP_REASON_TCP_RFC7323_PAWS,
+ /** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */
+ SKB_DROP_REASON_TCP_OLD_SEQUENCE,
+ /** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */
+ SKB_DROP_REASON_TCP_INVALID_SEQUENCE,
+ /** @SKB_DROP_REASON_TCP_RESET: Invalid RST packet */
+ SKB_DROP_REASON_TCP_RESET,
+ /**
+ * @SKB_DROP_REASON_TCP_INVALID_SYN: Incoming packet has unexpected
+ * SYN flag
+ */
+ SKB_DROP_REASON_TCP_INVALID_SYN,
+ /** @SKB_DROP_REASON_TCP_CLOSE: TCP socket in CLOSE state */
+ SKB_DROP_REASON_TCP_CLOSE,
+ /** @SKB_DROP_REASON_TCP_FASTOPEN: dropped by FASTOPEN request socket */
+ SKB_DROP_REASON_TCP_FASTOPEN,
+ /** @SKB_DROP_REASON_TCP_OLD_ACK: TCP ACK is old, but in window */
+ SKB_DROP_REASON_TCP_OLD_ACK,
+ /** @SKB_DROP_REASON_TCP_TOO_OLD_ACK: TCP ACK is too old */
+ SKB_DROP_REASON_TCP_TOO_OLD_ACK,
+ /**
+ * @SKB_DROP_REASON_TCP_ACK_UNSENT_DATA: TCP ACK for data we haven't
+ * sent yet
+ */
+ SKB_DROP_REASON_TCP_ACK_UNSENT_DATA,
+ /** @SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE: pruned from TCP OFO queue */
+ SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE,
+ /** @SKB_DROP_REASON_TCP_OFO_DROP: data already in receive queue */
+ SKB_DROP_REASON_TCP_OFO_DROP,
+ /** @SKB_DROP_REASON_IP_OUTNOROUTES: route lookup failed */
+ SKB_DROP_REASON_IP_OUTNOROUTES,
+ /**
+ * @SKB_DROP_REASON_BPF_CGROUP_EGRESS: dropped by BPF_PROG_TYPE_CGROUP_SKB
+ * eBPF program
+ */
+ SKB_DROP_REASON_BPF_CGROUP_EGRESS,
+ /** @SKB_DROP_REASON_IPV6DISABLED: IPv6 is disabled on the device */
+ SKB_DROP_REASON_IPV6DISABLED,
+ /** @SKB_DROP_REASON_NEIGH_CREATEFAIL: failed to create neigh entry */
+ SKB_DROP_REASON_NEIGH_CREATEFAIL,
+ /** @SKB_DROP_REASON_NEIGH_FAILED: neigh entry in failed state */
+ SKB_DROP_REASON_NEIGH_FAILED,
+ /** @SKB_DROP_REASON_NEIGH_QUEUEFULL: arp_queue for neigh entry is full */
+ SKB_DROP_REASON_NEIGH_QUEUEFULL,
+ /** @SKB_DROP_REASON_NEIGH_DEAD: neigh entry is dead */
+ SKB_DROP_REASON_NEIGH_DEAD,
+ /** @SKB_DROP_REASON_TC_EGRESS: dropped in TC egress HOOK */
+ SKB_DROP_REASON_TC_EGRESS,
+ /**
+ * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting (
+ * failed to enqueue to current qdisc)
+ */
+ SKB_DROP_REASON_QDISC_DROP,
+ /**
+ * @SKB_DROP_REASON_CPU_BACKLOG: failed to enqueue the skb to the per CPU
+ * backlog queue. This can be caused by backlog queue full (see
+ * netdev_max_backlog in net.rst) or RPS flow limit
+ */
+ SKB_DROP_REASON_CPU_BACKLOG,
+ /** @SKB_DROP_REASON_XDP: dropped by XDP in input path */
+ SKB_DROP_REASON_XDP,
+ /** @SKB_DROP_REASON_TC_INGRESS: dropped in TC ingress HOOK */
+ SKB_DROP_REASON_TC_INGRESS,
+ /** @SKB_DROP_REASON_UNHANDLED_PROTO: protocol not implemented or not supported */
+ SKB_DROP_REASON_UNHANDLED_PROTO,
+ /** @SKB_DROP_REASON_SKB_CSUM: sk_buff checksum computation error */
+ SKB_DROP_REASON_SKB_CSUM,
+ /** @SKB_DROP_REASON_SKB_GSO_SEG: gso segmentation error */
+ SKB_DROP_REASON_SKB_GSO_SEG,
+ /**
+ * @SKB_DROP_REASON_SKB_UCOPY_FAULT: failed to copy data from user space,
+ * e.g., via zerocopy_sg_from_iter() or skb_orphan_frags_rx()
+ */
+ SKB_DROP_REASON_SKB_UCOPY_FAULT,
+ /** @SKB_DROP_REASON_DEV_HDR: device driver specific header/metadata is invalid */
+ SKB_DROP_REASON_DEV_HDR,
+ /**
+ * @SKB_DROP_REASON_DEV_READY: the device is not ready to xmit/recv due to
+ * any of its data structure that is not up/ready/initialized,
+ * e.g., the IFF_UP is not set, or driver specific tun->tfiles[txq]
+ * is not initialized
+ */
+ SKB_DROP_REASON_DEV_READY,
+ /** @SKB_DROP_REASON_FULL_RING: ring buffer is full */
+ SKB_DROP_REASON_FULL_RING,
+ /** @SKB_DROP_REASON_NOMEM: error due to OOM */
+ SKB_DROP_REASON_NOMEM,
+ /**
+ * @SKB_DROP_REASON_HDR_TRUNC: failed to trunc/extract the header from
+ * networking data, e.g., failed to pull the protocol header from
+ * frags via pskb_may_pull()
+ */
+ SKB_DROP_REASON_HDR_TRUNC,
+ /**
+ * @SKB_DROP_REASON_TAP_FILTER: dropped by (ebpf) filter directly attached
+ * to tun/tap, e.g., via TUNSETFILTEREBPF
+ */
+ SKB_DROP_REASON_TAP_FILTER,
+ /**
+ * @SKB_DROP_REASON_TAP_TXFILTER: dropped by tx filter implemented at
+ * tun/tap, e.g., check_filter()
+ */
+ SKB_DROP_REASON_TAP_TXFILTER,
+ /** @SKB_DROP_REASON_ICMP_CSUM: ICMP checksum error */
+ SKB_DROP_REASON_ICMP_CSUM,
+ /**
+ * @SKB_DROP_REASON_INVALID_PROTO: the packet doesn't follow RFC 2211,
+ * such as a broadcasts ICMP_TIMESTAMP
+ */
+ SKB_DROP_REASON_INVALID_PROTO,
+ /**
+ * @SKB_DROP_REASON_IP_INADDRERRORS: host unreachable, corresponding to
+ * IPSTATS_MIB_INADDRERRORS
+ */
+ SKB_DROP_REASON_IP_INADDRERRORS,
+ /**
+ * @SKB_DROP_REASON_IP_INNOROUTES: network unreachable, corresponding to
+ * IPSTATS_MIB_INADDRERRORS
+ */
+ SKB_DROP_REASON_IP_INNOROUTES,
+ /**
+ * @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the
+ * MTU)
+ */
+ SKB_DROP_REASON_PKT_TOO_BIG,
+ /** @SKB_DROP_REASON_DUP_FRAG: duplicate fragment */
+ SKB_DROP_REASON_DUP_FRAG,
+ /** @SKB_DROP_REASON_FRAG_REASM_TIMEOUT: fragment reassembly timeout */
+ SKB_DROP_REASON_FRAG_REASM_TIMEOUT,
+ /**
+ * @SKB_DROP_REASON_FRAG_TOO_FAR: ipv4 fragment too far.
+ * (/proc/sys/net/ipv4/ipfrag_max_dist)
+ */
+ SKB_DROP_REASON_FRAG_TOO_FAR,
+ /**
+ * @SKB_DROP_REASON_TCP_MINTTL: ipv4 ttl or ipv6 hoplimit below
+ * the threshold (IP_MINTTL or IPV6_MINHOPCOUNT).
+ */
+ SKB_DROP_REASON_TCP_MINTTL,
+ /** @SKB_DROP_REASON_IPV6_BAD_EXTHDR: Bad IPv6 extension header. */
+ SKB_DROP_REASON_IPV6_BAD_EXTHDR,
+ /** @SKB_DROP_REASON_IPV6_NDISC_FRAG: invalid frag (suppress_frag_ndisc). */
+ SKB_DROP_REASON_IPV6_NDISC_FRAG,
+ /** @SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT: invalid hop limit. */
+ SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT,
+ /** @SKB_DROP_REASON_IPV6_NDISC_BAD_CODE: invalid NDISC icmp6 code. */
+ SKB_DROP_REASON_IPV6_NDISC_BAD_CODE,
+ /** @SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS: invalid NDISC options. */
+ SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS,
+ /**
+ * @SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST: NEIGHBOUR SOLICITATION
+ * for another host.
+ */
+ SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST,
+ /** @SKB_DROP_REASON_QUEUE_PURGE: bulk free. */
+ SKB_DROP_REASON_QUEUE_PURGE,
+ /**
+ * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
+ * shouldn't be used as a real 'reason' - only for tracing code gen
+ */
+ SKB_DROP_REASON_MAX,
+
+ /**
+ * @SKB_DROP_REASON_SUBSYS_MASK: subsystem mask in drop reasons,
+ * see &enum skb_drop_reason_subsys
+ */
+ SKB_DROP_REASON_SUBSYS_MASK = 0xffff0000,
+};
+
+#define SKB_DROP_REASON_SUBSYS_SHIFT 16
+
+#define SKB_DR_INIT(name, reason) \
+ enum skb_drop_reason name = SKB_DROP_REASON_##reason
+#define SKB_DR(name) \
+ SKB_DR_INIT(name, NOT_SPECIFIED)
+#define SKB_DR_SET(name, reason) \
+ (name = SKB_DROP_REASON_##reason)
+#define SKB_DR_OR(name, reason) \
+ do { \
+ if (name == SKB_DROP_REASON_NOT_SPECIFIED || \
+ name == SKB_NOT_DROPPED_YET) \
+ SKB_DR_SET(name, reason); \
+ } while (0)
+
+#endif
diff --git a/include/net/dropreason.h b/include/net/dropreason.h
index c0a3ea806cd5..56cb7be92244 100644
--- a/include/net/dropreason.h
+++ b/include/net/dropreason.h
@@ -2,362 +2,48 @@
#ifndef _LINUX_DROPREASON_H
#define _LINUX_DROPREASON_H
-
-#define DEFINE_DROP_REASON(FN, FNe) \
- FN(NOT_SPECIFIED) \
- FN(NO_SOCKET) \
- FN(PKT_TOO_SMALL) \
- FN(TCP_CSUM) \
- FN(SOCKET_FILTER) \
- FN(UDP_CSUM) \
- FN(NETFILTER_DROP) \
- FN(OTHERHOST) \
- FN(IP_CSUM) \
- FN(IP_INHDR) \
- FN(IP_RPFILTER) \
- FN(UNICAST_IN_L2_MULTICAST) \
- FN(XFRM_POLICY) \
- FN(IP_NOPROTO) \
- FN(SOCKET_RCVBUFF) \
- FN(PROTO_MEM) \
- FN(TCP_MD5NOTFOUND) \
- FN(TCP_MD5UNEXPECTED) \
- FN(TCP_MD5FAILURE) \
- FN(SOCKET_BACKLOG) \
- FN(TCP_FLAGS) \
- FN(TCP_ZEROWINDOW) \
- FN(TCP_OLD_DATA) \
- FN(TCP_OVERWINDOW) \
- FN(TCP_OFOMERGE) \
- FN(TCP_RFC7323_PAWS) \
- FN(TCP_INVALID_SEQUENCE) \
- FN(TCP_RESET) \
- FN(TCP_INVALID_SYN) \
- FN(TCP_CLOSE) \
- FN(TCP_FASTOPEN) \
- FN(TCP_OLD_ACK) \
- FN(TCP_TOO_OLD_ACK) \
- FN(TCP_ACK_UNSENT_DATA) \
- FN(TCP_OFO_QUEUE_PRUNE) \
- FN(TCP_OFO_DROP) \
- FN(IP_OUTNOROUTES) \
- FN(BPF_CGROUP_EGRESS) \
- FN(IPV6DISABLED) \
- FN(NEIGH_CREATEFAIL) \
- FN(NEIGH_FAILED) \
- FN(NEIGH_QUEUEFULL) \
- FN(NEIGH_DEAD) \
- FN(TC_EGRESS) \
- FN(QDISC_DROP) \
- FN(CPU_BACKLOG) \
- FN(XDP) \
- FN(TC_INGRESS) \
- FN(UNHANDLED_PROTO) \
- FN(SKB_CSUM) \
- FN(SKB_GSO_SEG) \
- FN(SKB_UCOPY_FAULT) \
- FN(DEV_HDR) \
- FN(DEV_READY) \
- FN(FULL_RING) \
- FN(NOMEM) \
- FN(HDR_TRUNC) \
- FN(TAP_FILTER) \
- FN(TAP_TXFILTER) \
- FN(ICMP_CSUM) \
- FN(INVALID_PROTO) \
- FN(IP_INADDRERRORS) \
- FN(IP_INNOROUTES) \
- FN(PKT_TOO_BIG) \
- FN(DUP_FRAG) \
- FN(FRAG_REASM_TIMEOUT) \
- FN(FRAG_TOO_FAR) \
- FN(TCP_MINTTL) \
- FN(IPV6_BAD_EXTHDR) \
- FN(IPV6_NDISC_FRAG) \
- FN(IPV6_NDISC_HOP_LIMIT) \
- FN(IPV6_NDISC_BAD_CODE) \
- FN(IPV6_NDISC_BAD_OPTIONS) \
- FN(IPV6_NDISC_NS_OTHERHOST) \
- FNe(MAX)
+#include <net/dropreason-core.h>
/**
- * enum skb_drop_reason - the reasons of skb drops
- *
- * The reason of skb drop, which is used in kfree_skb_reason().
+ * enum skb_drop_reason_subsys - subsystem tag for (extended) drop reasons
*/
-enum skb_drop_reason {
- /**
- * @SKB_NOT_DROPPED_YET: skb is not dropped yet (used for no-drop case)
- */
- SKB_NOT_DROPPED_YET = 0,
- /** @SKB_CONSUMED: packet has been consumed */
- SKB_CONSUMED,
- /** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */
- SKB_DROP_REASON_NOT_SPECIFIED,
- /** @SKB_DROP_REASON_NO_SOCKET: socket not found */
- SKB_DROP_REASON_NO_SOCKET,
- /** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */
- SKB_DROP_REASON_PKT_TOO_SMALL,
- /** @SKB_DROP_REASON_TCP_CSUM: TCP checksum error */
- SKB_DROP_REASON_TCP_CSUM,
- /** @SKB_DROP_REASON_SOCKET_FILTER: dropped by socket filter */
- SKB_DROP_REASON_SOCKET_FILTER,
- /** @SKB_DROP_REASON_UDP_CSUM: UDP checksum error */
- SKB_DROP_REASON_UDP_CSUM,
- /** @SKB_DROP_REASON_NETFILTER_DROP: dropped by netfilter */
- SKB_DROP_REASON_NETFILTER_DROP,
- /**
- * @SKB_DROP_REASON_OTHERHOST: packet don't belong to current host
- * (interface is in promisc mode)
- */
- SKB_DROP_REASON_OTHERHOST,
- /** @SKB_DROP_REASON_IP_CSUM: IP checksum error */
- SKB_DROP_REASON_IP_CSUM,
- /**
- * @SKB_DROP_REASON_IP_INHDR: there is something wrong with IP header (see
- * IPSTATS_MIB_INHDRERRORS)
- */
- SKB_DROP_REASON_IP_INHDR,
- /**
- * @SKB_DROP_REASON_IP_RPFILTER: IP rpfilter validate failed. see the
- * document for rp_filter in ip-sysctl.rst for more information
- */
- SKB_DROP_REASON_IP_RPFILTER,
- /**
- * @SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST: destination address of L2 is
- * multicast, but L3 is unicast.
- */
- SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST,
- /** @SKB_DROP_REASON_XFRM_POLICY: xfrm policy check failed */
- SKB_DROP_REASON_XFRM_POLICY,
- /** @SKB_DROP_REASON_IP_NOPROTO: no support for IP protocol */
- SKB_DROP_REASON_IP_NOPROTO,
- /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */
- SKB_DROP_REASON_SOCKET_RCVBUFF,
- /**
- * @SKB_DROP_REASON_PROTO_MEM: proto memory limition, such as udp packet
- * drop out of udp_memory_allocated.
- */
- SKB_DROP_REASON_PROTO_MEM,
- /**
- * @SKB_DROP_REASON_TCP_MD5NOTFOUND: no MD5 hash and one expected,
- * corresponding to LINUX_MIB_TCPMD5NOTFOUND
- */
- SKB_DROP_REASON_TCP_MD5NOTFOUND,
- /**
- * @SKB_DROP_REASON_TCP_MD5UNEXPECTED: MD5 hash and we're not expecting
- * one, corresponding to LINUX_MIB_TCPMD5UNEXPECTED
- */
- SKB_DROP_REASON_TCP_MD5UNEXPECTED,
- /**
- * @SKB_DROP_REASON_TCP_MD5FAILURE: MD5 hash and its wrong, corresponding
- * to LINUX_MIB_TCPMD5FAILURE
- */
- SKB_DROP_REASON_TCP_MD5FAILURE,
- /**
- * @SKB_DROP_REASON_SOCKET_BACKLOG: failed to add skb to socket backlog (
- * see LINUX_MIB_TCPBACKLOGDROP)
- */
- SKB_DROP_REASON_SOCKET_BACKLOG,
- /** @SKB_DROP_REASON_TCP_FLAGS: TCP flags invalid */
- SKB_DROP_REASON_TCP_FLAGS,
- /**
- * @SKB_DROP_REASON_TCP_ZEROWINDOW: TCP receive window size is zero,
- * see LINUX_MIB_TCPZEROWINDOWDROP
- */
- SKB_DROP_REASON_TCP_ZEROWINDOW,
- /**
- * @SKB_DROP_REASON_TCP_OLD_DATA: the TCP data reveived is already
- * received before (spurious retrans may happened), see
- * LINUX_MIB_DELAYEDACKLOST
- */
- SKB_DROP_REASON_TCP_OLD_DATA,
- /**
- * @SKB_DROP_REASON_TCP_OVERWINDOW: the TCP data is out of window,
- * the seq of the first byte exceed the right edges of receive
- * window
- */
- SKB_DROP_REASON_TCP_OVERWINDOW,
- /**
- * @SKB_DROP_REASON_TCP_OFOMERGE: the data of skb is already in the ofo
- * queue, corresponding to LINUX_MIB_TCPOFOMERGE
- */
- SKB_DROP_REASON_TCP_OFOMERGE,
- /**
- * @SKB_DROP_REASON_TCP_RFC7323_PAWS: PAWS check, corresponding to
- * LINUX_MIB_PAWSESTABREJECTED
- */
- SKB_DROP_REASON_TCP_RFC7323_PAWS,
- /** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */
- SKB_DROP_REASON_TCP_INVALID_SEQUENCE,
- /** @SKB_DROP_REASON_TCP_RESET: Invalid RST packet */
- SKB_DROP_REASON_TCP_RESET,
- /**
- * @SKB_DROP_REASON_TCP_INVALID_SYN: Incoming packet has unexpected
- * SYN flag
- */
- SKB_DROP_REASON_TCP_INVALID_SYN,
- /** @SKB_DROP_REASON_TCP_CLOSE: TCP socket in CLOSE state */
- SKB_DROP_REASON_TCP_CLOSE,
- /** @SKB_DROP_REASON_TCP_FASTOPEN: dropped by FASTOPEN request socket */
- SKB_DROP_REASON_TCP_FASTOPEN,
- /** @SKB_DROP_REASON_TCP_OLD_ACK: TCP ACK is old, but in window */
- SKB_DROP_REASON_TCP_OLD_ACK,
- /** @SKB_DROP_REASON_TCP_TOO_OLD_ACK: TCP ACK is too old */
- SKB_DROP_REASON_TCP_TOO_OLD_ACK,
- /**
- * @SKB_DROP_REASON_TCP_ACK_UNSENT_DATA: TCP ACK for data we haven't
- * sent yet
- */
- SKB_DROP_REASON_TCP_ACK_UNSENT_DATA,
- /** @SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE: pruned from TCP OFO queue */
- SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE,
- /** @SKB_DROP_REASON_TCP_OFO_DROP: data already in receive queue */
- SKB_DROP_REASON_TCP_OFO_DROP,
- /** @SKB_DROP_REASON_IP_OUTNOROUTES: route lookup failed */
- SKB_DROP_REASON_IP_OUTNOROUTES,
- /**
- * @SKB_DROP_REASON_BPF_CGROUP_EGRESS: dropped by BPF_PROG_TYPE_CGROUP_SKB
- * eBPF program
- */
- SKB_DROP_REASON_BPF_CGROUP_EGRESS,
- /** @SKB_DROP_REASON_IPV6DISABLED: IPv6 is disabled on the device */
- SKB_DROP_REASON_IPV6DISABLED,
- /** @SKB_DROP_REASON_NEIGH_CREATEFAIL: failed to create neigh entry */
- SKB_DROP_REASON_NEIGH_CREATEFAIL,
- /** @SKB_DROP_REASON_NEIGH_FAILED: neigh entry in failed state */
- SKB_DROP_REASON_NEIGH_FAILED,
- /** @SKB_DROP_REASON_NEIGH_QUEUEFULL: arp_queue for neigh entry is full */
- SKB_DROP_REASON_NEIGH_QUEUEFULL,
- /** @SKB_DROP_REASON_NEIGH_DEAD: neigh entry is dead */
- SKB_DROP_REASON_NEIGH_DEAD,
- /** @SKB_DROP_REASON_TC_EGRESS: dropped in TC egress HOOK */
- SKB_DROP_REASON_TC_EGRESS,
- /**
- * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting (
- * failed to enqueue to current qdisc)
- */
- SKB_DROP_REASON_QDISC_DROP,
- /**
- * @SKB_DROP_REASON_CPU_BACKLOG: failed to enqueue the skb to the per CPU
- * backlog queue. This can be caused by backlog queue full (see
- * netdev_max_backlog in net.rst) or RPS flow limit
- */
- SKB_DROP_REASON_CPU_BACKLOG,
- /** @SKB_DROP_REASON_XDP: dropped by XDP in input path */
- SKB_DROP_REASON_XDP,
- /** @SKB_DROP_REASON_TC_INGRESS: dropped in TC ingress HOOK */
- SKB_DROP_REASON_TC_INGRESS,
- /** @SKB_DROP_REASON_UNHANDLED_PROTO: protocol not implemented or not supported */
- SKB_DROP_REASON_UNHANDLED_PROTO,
- /** @SKB_DROP_REASON_SKB_CSUM: sk_buff checksum computation error */
- SKB_DROP_REASON_SKB_CSUM,
- /** @SKB_DROP_REASON_SKB_GSO_SEG: gso segmentation error */
- SKB_DROP_REASON_SKB_GSO_SEG,
- /**
- * @SKB_DROP_REASON_SKB_UCOPY_FAULT: failed to copy data from user space,
- * e.g., via zerocopy_sg_from_iter() or skb_orphan_frags_rx()
- */
- SKB_DROP_REASON_SKB_UCOPY_FAULT,
- /** @SKB_DROP_REASON_DEV_HDR: device driver specific header/metadata is invalid */
- SKB_DROP_REASON_DEV_HDR,
- /**
- * @SKB_DROP_REASON_DEV_READY: the device is not ready to xmit/recv due to
- * any of its data structure that is not up/ready/initialized,
- * e.g., the IFF_UP is not set, or driver specific tun->tfiles[txq]
- * is not initialized
- */
- SKB_DROP_REASON_DEV_READY,
- /** @SKB_DROP_REASON_FULL_RING: ring buffer is full */
- SKB_DROP_REASON_FULL_RING,
- /** @SKB_DROP_REASON_NOMEM: error due to OOM */
- SKB_DROP_REASON_NOMEM,
- /**
- * @SKB_DROP_REASON_HDR_TRUNC: failed to trunc/extract the header from
- * networking data, e.g., failed to pull the protocol header from
- * frags via pskb_may_pull()
- */
- SKB_DROP_REASON_HDR_TRUNC,
- /**
- * @SKB_DROP_REASON_TAP_FILTER: dropped by (ebpf) filter directly attached
- * to tun/tap, e.g., via TUNSETFILTEREBPF
- */
- SKB_DROP_REASON_TAP_FILTER,
- /**
- * @SKB_DROP_REASON_TAP_TXFILTER: dropped by tx filter implemented at
- * tun/tap, e.g., check_filter()
- */
- SKB_DROP_REASON_TAP_TXFILTER,
- /** @SKB_DROP_REASON_ICMP_CSUM: ICMP checksum error */
- SKB_DROP_REASON_ICMP_CSUM,
- /**
- * @SKB_DROP_REASON_INVALID_PROTO: the packet doesn't follow RFC 2211,
- * such as a broadcasts ICMP_TIMESTAMP
- */
- SKB_DROP_REASON_INVALID_PROTO,
- /**
- * @SKB_DROP_REASON_IP_INADDRERRORS: host unreachable, corresponding to
- * IPSTATS_MIB_INADDRERRORS
- */
- SKB_DROP_REASON_IP_INADDRERRORS,
- /**
- * @SKB_DROP_REASON_IP_INNOROUTES: network unreachable, corresponding to
- * IPSTATS_MIB_INADDRERRORS
- */
- SKB_DROP_REASON_IP_INNOROUTES,
- /**
- * @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the
- * MTU)
- */
- SKB_DROP_REASON_PKT_TOO_BIG,
- /** @SKB_DROP_REASON_DUP_FRAG: duplicate fragment */
- SKB_DROP_REASON_DUP_FRAG,
- /** @SKB_DROP_REASON_FRAG_REASM_TIMEOUT: fragment reassembly timeout */
- SKB_DROP_REASON_FRAG_REASM_TIMEOUT,
+enum skb_drop_reason_subsys {
+ /** @SKB_DROP_REASON_SUBSYS_CORE: core drop reasons defined above */
+ SKB_DROP_REASON_SUBSYS_CORE,
+
/**
- * @SKB_DROP_REASON_FRAG_TOO_FAR: ipv4 fragment too far.
- * (/proc/sys/net/ipv4/ipfrag_max_dist)
+ * @SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE: mac80211 drop reasons
+ * for unusable frames, see net/mac80211/drop.h
*/
- SKB_DROP_REASON_FRAG_TOO_FAR,
+ SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE,
+
/**
- * @SKB_DROP_REASON_TCP_MINTTL: ipv4 ttl or ipv6 hoplimit below
- * the threshold (IP_MINTTL or IPV6_MINHOPCOUNT).
- */
- SKB_DROP_REASON_TCP_MINTTL,
- /** @SKB_DROP_REASON_IPV6_BAD_EXTHDR: Bad IPv6 extension header. */
- SKB_DROP_REASON_IPV6_BAD_EXTHDR,
- /** @SKB_DROP_REASON_IPV6_NDISC_FRAG: invalid frag (suppress_frag_ndisc). */
- SKB_DROP_REASON_IPV6_NDISC_FRAG,
- /** @SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT: invalid hop limit. */
- SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT,
- /** @SKB_DROP_REASON_IPV6_NDISC_BAD_CODE: invalid NDISC icmp6 code. */
- SKB_DROP_REASON_IPV6_NDISC_BAD_CODE,
- /** @SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS: invalid NDISC options. */
- SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS,
- /** @SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST: NEIGHBOUR SOLICITATION
- * for another host.
+ * @SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR: mac80211 drop reasons
+ * for frames still going to monitor, see net/mac80211/drop.h
*/
- SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST,
+ SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR,
+
/**
- * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be
- * used as a real 'reason'
+ * @SKB_DROP_REASON_SUBSYS_OPENVSWITCH: openvswitch drop reasons,
+ * see net/openvswitch/drop.h
*/
- SKB_DROP_REASON_MAX,
+ SKB_DROP_REASON_SUBSYS_OPENVSWITCH,
+
+ /** @SKB_DROP_REASON_SUBSYS_NUM: number of subsystems defined */
+ SKB_DROP_REASON_SUBSYS_NUM
+};
+
+struct drop_reason_list {
+ const char * const *reasons;
+ size_t n_reasons;
};
-#define SKB_DR_INIT(name, reason) \
- enum skb_drop_reason name = SKB_DROP_REASON_##reason
-#define SKB_DR(name) \
- SKB_DR_INIT(name, NOT_SPECIFIED)
-#define SKB_DR_SET(name, reason) \
- (name = SKB_DROP_REASON_##reason)
-#define SKB_DR_OR(name, reason) \
- do { \
- if (name == SKB_DROP_REASON_NOT_SPECIFIED || \
- name == SKB_NOT_DROPPED_YET) \
- SKB_DR_SET(name, reason); \
- } while (0)
+/* Note: due to dynamic registrations, access must be under RCU */
+extern const struct drop_reason_list __rcu *
+drop_reasons_by_subsys[SKB_DROP_REASON_SUBSYS_NUM];
-extern const char * const drop_reasons[];
+void drop_reasons_register_subsys(enum skb_drop_reason_subsys subsys,
+ const struct drop_reason_list *list);
+void drop_reasons_unregister_subsys(enum skb_drop_reason_subsys subsys);
#endif
diff --git a/include/net/dsa.h b/include/net/dsa.h
index a15f17a38eca..0b9c6aa27047 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -109,16 +109,6 @@ struct dsa_device_ops {
bool promisc_on_master;
};
-/* This structure defines the control interfaces that are overlayed by the
- * DSA layer on top of the DSA CPU/management net_device instance. This is
- * used by the core net_device layer while calling various net_device_ops
- * function pointers.
- */
-struct dsa_netdevice_ops {
- int (*ndo_eth_ioctl)(struct net_device *dev, struct ifreq *ifr,
- int cmd);
-};
-
struct dsa_lag {
struct net_device *dev;
unsigned int id;
@@ -317,11 +307,6 @@ struct dsa_port {
*/
const struct ethtool_ops *orig_ethtool_ops;
- /*
- * Original copy of the master netdev net_device_ops
- */
- const struct dsa_netdevice_ops *netdev_ops;
-
/* List of MAC addresses that must be forwarded on this port.
* These are only valid on CPU ports and DSA links.
*/
@@ -329,9 +314,17 @@ struct dsa_port {
struct list_head fdbs;
struct list_head mdbs;
- /* List of VLANs that CPU and DSA ports are members of. */
struct mutex vlans_lock;
- struct list_head vlans;
+ union {
+ /* List of VLANs that CPU and DSA ports are members of.
+ * Access to this is serialized by the sleepable @vlans_lock.
+ */
+ struct list_head vlans;
+ /* List of VLANs that user ports are members of.
+ * Access to this is serialized by netif_addr_lock_bh().
+ */
+ struct list_head user_vlans;
+ };
};
/* TODO: ideally DSA ports would have a single dp->link_dp member,
@@ -880,12 +873,15 @@ struct dsa_switch_ops {
struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds,
int port,
phy_interface_t iface);
- int (*phylink_mac_link_state)(struct dsa_switch *ds, int port,
- struct phylink_link_state *state);
+ int (*phylink_mac_prepare)(struct dsa_switch *ds, int port,
+ unsigned int mode,
+ phy_interface_t interface);
void (*phylink_mac_config)(struct dsa_switch *ds, int port,
unsigned int mode,
const struct phylink_link_state *state);
- void (*phylink_mac_an_restart)(struct dsa_switch *ds, int port);
+ int (*phylink_mac_finish)(struct dsa_switch *ds, int port,
+ unsigned int mode,
+ phy_interface_t interface);
void (*phylink_mac_link_down)(struct dsa_switch *ds, int port,
unsigned int mode,
phy_interface_t interface);
@@ -974,6 +970,14 @@ struct dsa_switch_ops {
void (*port_disable)(struct dsa_switch *ds, int port);
/*
+ * Compatibility between device trees defining multiple CPU ports and
+ * drivers which are not OK to use by default the numerically smallest
+ * CPU port of a switch for its local ports. This can return NULL,
+ * meaning "don't know/don't care".
+ */
+ struct dsa_port *(*preferred_default_local_cpu_port)(struct dsa_switch *ds);
+
+ /*
* Port's MAC EEE settings
*/
int (*set_mac_eee)(struct dsa_switch *ds, int port,
@@ -1339,42 +1343,6 @@ static inline void dsa_tag_generic_flow_dissect(const struct sk_buff *skb,
#endif
}
-#if IS_ENABLED(CONFIG_NET_DSA)
-static inline int __dsa_netdevice_ops_check(struct net_device *dev)
-{
- int err = -EOPNOTSUPP;
-
- if (!dev->dsa_ptr)
- return err;
-
- if (!dev->dsa_ptr->netdev_ops)
- return err;
-
- return 0;
-}
-
-static inline int dsa_ndo_eth_ioctl(struct net_device *dev, struct ifreq *ifr,
- int cmd)
-{
- const struct dsa_netdevice_ops *ops;
- int err;
-
- err = __dsa_netdevice_ops_check(dev);
- if (err)
- return err;
-
- ops = dev->dsa_ptr->netdev_ops;
-
- return ops->ndo_eth_ioctl(dev, ifr, cmd);
-}
-#else
-static inline int dsa_ndo_eth_ioctl(struct net_device *dev, struct ifreq *ifr,
- int cmd)
-{
- return -EOPNOTSUPP;
-}
-#endif
-
void dsa_unregister_switch(struct dsa_switch *ds);
int dsa_register_switch(struct dsa_switch *ds);
void dsa_switch_shutdown(struct dsa_switch *ds);
diff --git a/include/net/dsa_stubs.h b/include/net/dsa_stubs.h
new file mode 100644
index 000000000000..361811750a54
--- /dev/null
+++ b/include/net/dsa_stubs.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * include/net/dsa_stubs.h - Stubs for the Distributed Switch Architecture framework
+ */
+
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/net_tstamp.h>
+#include <net/dsa.h>
+
+#if IS_ENABLED(CONFIG_NET_DSA)
+
+extern const struct dsa_stubs *dsa_stubs;
+
+struct dsa_stubs {
+ int (*master_hwtstamp_validate)(struct net_device *dev,
+ const struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack);
+};
+
+static inline int dsa_master_hwtstamp_validate(struct net_device *dev,
+ const struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
+{
+ if (!netdev_uses_dsa(dev))
+ return 0;
+
+ /* rtnl_lock() is a sufficient guarantee, because as long as
+ * netdev_uses_dsa() returns true, the dsa_core module is still
+ * registered, and so, dsa_unregister_stubs() couldn't have run.
+ * For netdev_uses_dsa() to start returning false, it would imply that
+ * dsa_master_teardown() has executed, which requires rtnl_lock().
+ */
+ ASSERT_RTNL();
+
+ return dsa_stubs->master_hwtstamp_validate(dev, config, extack);
+}
+
+#else
+
+static inline int dsa_master_hwtstamp_validate(struct net_device *dev,
+ const struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
+{
+ return 0;
+}
+
+#endif
diff --git a/include/net/dst.h b/include/net/dst.h
index d67fda89cd0f..78884429deed 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -16,6 +16,7 @@
#include <linux/bug.h>
#include <linux/jiffies.h>
#include <linux/refcount.h>
+#include <linux/rcuref.h>
#include <net/neighbour.h>
#include <asm/processor.h>
#include <linux/indirect_call_wrapper.h>
@@ -61,23 +62,36 @@ struct dst_entry {
unsigned short trailer_len; /* space to reserve at tail */
/*
- * __refcnt wants to be on a different cache line from
+ * __rcuref wants to be on a different cache line from
* input/output/ops or performance tanks badly
*/
#ifdef CONFIG_64BIT
- atomic_t __refcnt; /* 64-bit offset 64 */
+ rcuref_t __rcuref; /* 64-bit offset 64 */
#endif
int __use;
unsigned long lastuse;
- struct lwtunnel_state *lwtstate;
struct rcu_head rcu_head;
short error;
short __pad;
__u32 tclassid;
#ifndef CONFIG_64BIT
- atomic_t __refcnt; /* 32-bit offset 64 */
+ struct lwtunnel_state *lwtstate;
+ rcuref_t __rcuref; /* 32-bit offset 64 */
#endif
netdevice_tracker dev_tracker;
+
+ /*
+ * Used by rtable and rt6_info. Moves lwtstate into the next cache
+ * line on 64bit so that lwtstate does not cause false sharing with
+ * __rcuref under contention of __rcuref. This also puts the
+ * frequently accessed members of rtable and rt6_info out of the
+ * __rcuref cache line.
+ */
+ struct list_head rt_uncached;
+ struct uncached_list *rt_uncached_list;
+#ifdef CONFIG_64BIT
+ struct lwtunnel_state *lwtstate;
+#endif
};
struct dst_metrics {
@@ -225,10 +239,10 @@ static inline void dst_hold(struct dst_entry *dst)
{
/*
* If your kernel compilation stops here, please check
- * the placement of __refcnt in struct dst_entry
+ * the placement of __rcuref in struct dst_entry
*/
- BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63);
- WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0);
+ BUILD_BUG_ON(offsetof(struct dst_entry, __rcuref) & 63);
+ WARN_ON(!rcuref_get(&dst->__rcuref));
}
static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
@@ -292,7 +306,7 @@ static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb
*/
static inline bool dst_hold_safe(struct dst_entry *dst)
{
- return atomic_inc_not_zero(&dst->__refcnt);
+ return rcuref_get(&dst->__rcuref);
}
/**
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 632086b2f644..6d1c8541183d 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -23,7 +23,7 @@ struct dst_ops {
u32 * (*cow_metrics)(struct dst_entry *, unsigned long);
void (*destroy)(struct dst_entry *);
void (*ifdown)(struct dst_entry *,
- struct net_device *dev, int how);
+ struct net_device *dev);
struct dst_entry * (*negative_advice)(struct dst_entry *);
void (*link_failure)(struct sk_buff *);
void (*update_pmtu)(struct dst_entry *dst, struct sock *sk,
diff --git a/include/net/flow.h b/include/net/flow.h
index bb8651a6eaa7..7f0adda3bf2f 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -116,11 +116,10 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
}
/* Reset some input parameters after previous lookup */
-static inline void flowi4_update_output(struct flowi4 *fl4, int oif, __u8 tos,
+static inline void flowi4_update_output(struct flowi4 *fl4, int oif,
__be32 daddr, __be32 saddr)
{
fl4->flowi4_oif = oif;
- fl4->flowi4_tos = tos;
fl4->daddr = daddr;
fl4->saddr = saddr;
}
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 5ccf52ef8809..1a7131d6cb0e 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -14,7 +14,9 @@ struct sk_buff;
/**
* struct flow_dissector_key_control:
- * @thoff: Transport header offset
+ * @thoff: Transport header offset
+ * @addr_type: Type of key. One of FLOW_DISSECTOR_KEY_*
+ * @flags: Key flags. Any of FLOW_DIS_(IS_FRAGMENT|FIRST_FRAGENCAPSULATION)
*/
struct flow_dissector_key_control {
u16 thoff;
@@ -36,8 +38,9 @@ enum flow_dissect_ret {
/**
* struct flow_dissector_key_basic:
- * @n_proto: Network header protocol (eg. IPv4/IPv6)
+ * @n_proto: Network header protocol (eg. IPv4/IPv6)
* @ip_proto: Transport header protocol (eg. TCP/UDP)
+ * @padding: Unused
*/
struct flow_dissector_key_basic {
__be16 n_proto;
@@ -135,6 +138,7 @@ struct flow_dissector_key_tipc {
* struct flow_dissector_key_addrs:
* @v4addrs: IPv4 addresses
* @v6addrs: IPv6 addresses
+ * @tipckey: TIPC key
*/
struct flow_dissector_key_addrs {
union {
@@ -145,14 +149,12 @@ struct flow_dissector_key_addrs {
};
/**
- * flow_dissector_key_arp:
- * @ports: Operation, source and target addresses for an ARP header
- * for Ethernet hardware addresses and IPv4 protocol addresses
- * sip: Sender IP address
- * tip: Target IP address
- * op: Operation
- * sha: Sender hardware address
- * tpa: Target hardware address
+ * struct flow_dissector_key_arp:
+ * @sip: Sender IP address
+ * @tip: Target IP address
+ * @op: Operation
+ * @sha: Sender hardware address
+ * @tha: Target hardware address
*/
struct flow_dissector_key_arp {
__u32 sip;
@@ -163,10 +165,10 @@ struct flow_dissector_key_arp {
};
/**
- * flow_dissector_key_tp_ports:
- * @ports: port numbers of Transport header
- * src: source port number
- * dst: destination port number
+ * struct flow_dissector_key_ports:
+ * @ports: port numbers of Transport header
+ * @src: source port number
+ * @dst: destination port number
*/
struct flow_dissector_key_ports {
union {
@@ -195,10 +197,10 @@ struct flow_dissector_key_ports_range {
};
/**
- * flow_dissector_key_icmp:
- * type: ICMP type
- * code: ICMP code
- * id: session identifier
+ * struct flow_dissector_key_icmp:
+ * @type: ICMP type
+ * @code: ICMP code
+ * @id: Session identifier
*/
struct flow_dissector_key_icmp {
struct {
@@ -241,10 +243,12 @@ struct flow_dissector_key_ip {
* struct flow_dissector_key_meta:
* @ingress_ifindex: ingress ifindex
* @ingress_iftype: ingress interface type
+ * @l2_miss: packet did not match an L2 entry during forwarding
*/
struct flow_dissector_key_meta {
int ingress_ifindex;
u16 ingress_iftype;
+ u8 l2_miss;
};
/**
@@ -297,6 +301,34 @@ struct flow_dissector_key_l2tpv3 {
__be32 session_id;
};
+/**
+ * struct flow_dissector_key_ipsec:
+ * @spi: identifier for a ipsec connection
+ */
+struct flow_dissector_key_ipsec {
+ __be32 spi;
+};
+
+/**
+ * struct flow_dissector_key_cfm
+ * @mdl_ver: maintenance domain level (mdl) and cfm protocol version
+ * @opcode: code specifying a type of cfm protocol packet
+ *
+ * See 802.1ag, ITU-T G.8013/Y.1731
+ * 1 2
+ * |7 6 5 4 3 2 1 0|7 6 5 4 3 2 1 0|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | mdl | version | opcode |
+ * +-----+---------+-+-+-+-+-+-+-+-+
+ */
+struct flow_dissector_key_cfm {
+ u8 mdl_ver;
+ u8 opcode;
+};
+
+#define FLOW_DIS_CFM_MDL_MASK GENMASK(7, 5)
+#define FLOW_DIS_CFM_MDL_MAX 7
+
enum flow_dissector_key_id {
FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */
FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */
@@ -329,6 +361,8 @@ enum flow_dissector_key_id {
FLOW_DISSECTOR_KEY_NUM_OF_VLANS, /* struct flow_dissector_key_num_of_vlans */
FLOW_DISSECTOR_KEY_PPPOE, /* struct flow_dissector_key_pppoe */
FLOW_DISSECTOR_KEY_L2TPV3, /* struct flow_dissector_key_l2tpv3 */
+ FLOW_DISSECTOR_KEY_CFM, /* struct flow_dissector_key_cfm */
+ FLOW_DISSECTOR_KEY_IPSEC, /* struct flow_dissector_key_ipsec */
FLOW_DISSECTOR_KEY_MAX,
};
@@ -345,7 +379,8 @@ struct flow_dissector_key {
};
struct flow_dissector {
- unsigned int used_keys; /* each bit repesents presence of one key id */
+ unsigned long long used_keys;
+ /* each bit represents presence of one key id */
unsigned short int offset[FLOW_DISSECTOR_KEY_MAX];
};
@@ -405,7 +440,7 @@ void skb_flow_get_icmp_tci(const struct sk_buff *skb,
static inline bool dissector_uses_key(const struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id)
{
- return flow_dissector->used_keys & (1 << key_id);
+ return flow_dissector->used_keys & (1ULL << key_id);
}
static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 118082eae48c..9efa9a59e81f 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -64,6 +64,10 @@ struct flow_match_tcp {
struct flow_dissector_key_tcp *key, *mask;
};
+struct flow_match_ipsec {
+ struct flow_dissector_key_ipsec *key, *mask;
+};
+
struct flow_match_mpls {
struct flow_dissector_key_mpls *key, *mask;
};
@@ -116,6 +120,8 @@ void flow_rule_match_ports_range(const struct flow_rule *rule,
struct flow_match_ports_range *out);
void flow_rule_match_tcp(const struct flow_rule *rule,
struct flow_match_tcp *out);
+void flow_rule_match_ipsec(const struct flow_rule *rule,
+ struct flow_match_ipsec *out);
void flow_rule_match_icmp(const struct flow_rule *rule,
struct flow_match_icmp *out);
void flow_rule_match_mpls(const struct flow_rule *rule,
diff --git a/include/net/fou.h b/include/net/fou.h
index 80f56e275b08..824eb4b231fd 100644
--- a/include/net/fou.h
+++ b/include/net/fou.h
@@ -17,4 +17,6 @@ int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
u8 *protocol, __be16 *sport, int type);
+int register_fou_bpf(void);
+
#endif
diff --git a/include/net/fq.h b/include/net/fq.h
index 07b5aff6ec58..99fbe4127b95 100644
--- a/include/net/fq.h
+++ b/include/net/fq.h
@@ -98,9 +98,4 @@ typedef bool fq_skb_filter_t(struct fq *,
struct sk_buff *,
void *);
-typedef struct fq_flow *fq_flow_get_default_t(struct fq *,
- struct fq_tin *,
- int idx,
- struct sk_buff *);
-
#endif
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index ed4622dd4828..e18a4c0d69ee 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -93,9 +93,9 @@ struct genl_family {
* struct genl_info - receiving information
* @snd_seq: sending sequence number
* @snd_portid: netlink portid of sender
+ * @family: generic netlink family
* @nlhdr: netlink message header
* @genlhdr: generic netlink message header
- * @userhdr: user specific header
* @attrs: netlink attributes
* @_net: network namespace
* @user_ptr: user pointers
@@ -104,16 +104,16 @@ struct genl_family {
struct genl_info {
u32 snd_seq;
u32 snd_portid;
- struct nlmsghdr * nlhdr;
+ const struct genl_family *family;
+ const struct nlmsghdr * nlhdr;
struct genlmsghdr * genlhdr;
- void * userhdr;
struct nlattr ** attrs;
possible_net_t _net;
void * user_ptr[2];
struct netlink_ext_ack *extack;
};
-static inline struct net *genl_info_net(struct genl_info *info)
+static inline struct net *genl_info_net(const struct genl_info *info)
{
return read_pnet(&info->_net);
}
@@ -123,6 +123,11 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net)
write_pnet(&info->_net, net);
}
+static inline void *genl_info_userhdr(const struct genl_info *info)
+{
+ return (u8 *)info->genlhdr + GENL_HDRLEN;
+}
+
#define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg)
#define GENL_SET_ERR_MSG_FMT(info, msg, args...) \
@@ -244,14 +249,13 @@ struct genl_split_ops {
/**
* struct genl_dumpit_info - info that is available during dumpit op call
- * @family: generic netlink family - for internal genl code usage
* @op: generic netlink ops - for internal genl code usage
* @attrs: netlink attributes
+ * @info: struct genl_info describing the request
*/
struct genl_dumpit_info {
- const struct genl_family *family;
struct genl_split_ops op;
- struct nlattr **attrs;
+ struct genl_info info;
};
static inline const struct genl_dumpit_info *
@@ -260,6 +264,38 @@ genl_dumpit_info(struct netlink_callback *cb)
return cb->data;
}
+static inline const struct genl_info *
+genl_info_dump(struct netlink_callback *cb)
+{
+ return &genl_dumpit_info(cb)->info;
+}
+
+/**
+ * genl_info_init_ntf() - initialize genl_info for notifications
+ * @info: genl_info struct to set up
+ * @family: pointer to the genetlink family
+ * @cmd: command to be used in the notification
+ *
+ * Initialize a locally declared struct genl_info to pass to various APIs.
+ * Intended to be used when creating notifications.
+ */
+static inline void
+genl_info_init_ntf(struct genl_info *info, const struct genl_family *family,
+ u8 cmd)
+{
+ struct genlmsghdr *hdr = (void *) &info->user_ptr[0];
+
+ memset(info, 0, sizeof(*info));
+ info->family = family;
+ info->genlhdr = hdr;
+ hdr->cmd = cmd;
+}
+
+static inline bool genl_info_is_ntf(const struct genl_info *info)
+{
+ return !info->nlhdr;
+}
+
int genl_register_family(struct genl_family *family);
int genl_unregister_family(const struct genl_family *family);
void genl_notify(const struct genl_family *family, struct sk_buff *skb,
@@ -268,6 +304,32 @@ void genl_notify(const struct genl_family *family, struct sk_buff *skb,
void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
const struct genl_family *family, int flags, u8 cmd);
+static inline void *
+__genlmsg_iput(struct sk_buff *skb, const struct genl_info *info, int flags)
+{
+ return genlmsg_put(skb, info->snd_portid, info->snd_seq, info->family,
+ flags, info->genlhdr->cmd);
+}
+
+/**
+ * genlmsg_iput - start genetlink message based on genl_info
+ * @skb: skb in which message header will be placed
+ * @info: genl_info as provided to do/dump handlers
+ *
+ * Convenience wrapper which starts a genetlink message based on
+ * information in user request. @info should be either the struct passed
+ * by genetlink core to do/dump handlers (when constructing replies to
+ * such requests) or a struct initialized by genl_info_init_ntf()
+ * when constructing notifications.
+ *
+ * Returns pointer to new genetlink header.
+ */
+static inline void *
+genlmsg_iput(struct sk_buff *skb, const struct genl_info *info)
+{
+ return __genlmsg_iput(skb, info, 0);
+}
+
/**
* genlmsg_nlhdr - Obtain netlink header from user specified header
* @user_hdr: user header as returned from genlmsg_put()
diff --git a/include/net/gro.h b/include/net/gro.h
index a4fab706240d..88644b3ca660 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -11,11 +11,23 @@
#include <net/udp.h>
struct napi_gro_cb {
- /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
- void *frag0;
+ union {
+ struct {
+ /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
+ void *frag0;
- /* Length of frag0. */
- unsigned int frag0_len;
+ /* Length of frag0. */
+ unsigned int frag0_len;
+ };
+
+ struct {
+ /* used in skb_gro_receive() slow path */
+ struct sk_buff *last;
+
+ /* jiffies when first packet was created/queued */
+ unsigned long age;
+ };
+ };
/* This indicates where we are processing relative to skb->data. */
int data_offset;
@@ -32,9 +44,6 @@ struct napi_gro_cb {
/* Used in ipv6_gro_receive() and foo-over-udp */
u16 proto;
- /* jiffies when first packet was created/queued */
- unsigned long age;
-
/* Used in napi_gro_cb::free */
#define NAPI_GRO_FREE 1
#define NAPI_GRO_FREE_STOLEN_HEAD 2
@@ -77,9 +86,6 @@ struct napi_gro_cb {
/* used to support CHECKSUM_COMPLETE for tunneling protocols */
__wsum csum;
-
- /* used in skb_gro_receive() slow path */
- struct sk_buff *last;
};
#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
@@ -446,5 +452,49 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb,
gro_normal_list(napi);
}
+/* This function is the alternative of 'inet_iif' and 'inet_sdif'
+ * functions in case we can not rely on fields of IPCB.
+ *
+ * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
+ * The caller must hold the RCU read lock.
+ */
+static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
+{
+ *iif = inet_iif(skb) ?: skb->dev->ifindex;
+ *sdif = 0;
+
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ if (netif_is_l3_slave(skb->dev)) {
+ struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
+
+ *sdif = *iif;
+ *iif = master ? master->ifindex : 0;
+ }
+#endif
+}
+
+/* This function is the alternative of 'inet6_iif' and 'inet6_sdif'
+ * functions in case we can not rely on fields of IP6CB.
+ *
+ * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
+ * The caller must hold the RCU read lock.
+ */
+static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
+{
+ /* using skb->dev->ifindex because skb_dst(skb) is not initialized */
+ *iif = skb->dev->ifindex;
+ *sdif = 0;
+
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ if (netif_is_l3_slave(skb->dev)) {
+ struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
+
+ *sdif = *iif;
+ *iif = master ? master->ifindex : 0;
+ }
+#endif
+}
+
+extern struct list_head offload_base;
#endif /* _NET_IPV6_GRO_H */
diff --git a/include/net/gso.h b/include/net/gso.h
new file mode 100644
index 000000000000..29975440cad5
--- /dev/null
+++ b/include/net/gso.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _NET_GSO_H
+#define _NET_GSO_H
+
+#include <linux/skbuff.h>
+
+/* Keeps track of mac header offset relative to skb->head.
+ * It is useful for TSO of Tunneling protocol. e.g. GRE.
+ * For non-tunnel skb it points to skb_mac_header() and for
+ * tunnel skb it points to outer mac header.
+ * Keeps track of level of encapsulation of network headers.
+ */
+struct skb_gso_cb {
+ union {
+ int mac_offset;
+ int data_offset;
+ };
+ int encap_level;
+ __wsum csum;
+ __u16 csum_start;
+};
+#define SKB_GSO_CB_OFFSET 32
+#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_GSO_CB_OFFSET))
+
+static inline int skb_tnl_header_len(const struct sk_buff *inner_skb)
+{
+ return (skb_mac_header(inner_skb) - inner_skb->head) -
+ SKB_GSO_CB(inner_skb)->mac_offset;
+}
+
+static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra)
+{
+ int new_headroom, headroom;
+ int ret;
+
+ headroom = skb_headroom(skb);
+ ret = pskb_expand_head(skb, extra, 0, GFP_ATOMIC);
+ if (ret)
+ return ret;
+
+ new_headroom = skb_headroom(skb);
+ SKB_GSO_CB(skb)->mac_offset += (new_headroom - headroom);
+ return 0;
+}
+
+static inline void gso_reset_checksum(struct sk_buff *skb, __wsum res)
+{
+ /* Do not update partial checksums if remote checksum is enabled. */
+ if (skb->remcsum_offload)
+ return;
+
+ SKB_GSO_CB(skb)->csum = res;
+ SKB_GSO_CB(skb)->csum_start = skb_checksum_start(skb) - skb->head;
+}
+
+/* Compute the checksum for a gso segment. First compute the checksum value
+ * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and
+ * then add in skb->csum (checksum from csum_start to end of packet).
+ * skb->csum and csum_start are then updated to reflect the checksum of the
+ * resultant packet starting from the transport header-- the resultant checksum
+ * is in the res argument (i.e. normally zero or ~ of checksum of a pseudo
+ * header.
+ */
+static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res)
+{
+ unsigned char *csum_start = skb_transport_header(skb);
+ int plen = (skb->head + SKB_GSO_CB(skb)->csum_start) - csum_start;
+ __wsum partial = SKB_GSO_CB(skb)->csum;
+
+ SKB_GSO_CB(skb)->csum = res;
+ SKB_GSO_CB(skb)->csum_start = csum_start - skb->head;
+
+ return csum_fold(csum_partial(csum_start, plen, partial));
+}
+
+struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+ netdev_features_t features, bool tx_path);
+
+static inline struct sk_buff *skb_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ return __skb_gso_segment(skb, features, true);
+}
+
+struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
+ netdev_features_t features, __be16 type);
+
+struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
+ netdev_features_t features);
+
+bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu);
+
+bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len);
+
+static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol,
+ int pulled_hlen, u16 mac_offset,
+ int mac_len)
+{
+ skb->protocol = protocol;
+ skb->encapsulation = 1;
+ skb_push(skb, pulled_hlen);
+ skb_reset_transport_header(skb);
+ skb->mac_header = mac_offset;
+ skb->network_header = skb->mac_header + mac_len;
+ skb->mac_len = mac_len;
+}
+
+#endif /* _NET_GSO_H */
diff --git a/include/net/handshake.h b/include/net/handshake.h
new file mode 100644
index 000000000000..8ebd4f9ed26e
--- /dev/null
+++ b/include/net/handshake.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Generic netlink HANDSHAKE service.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2023, Oracle and/or its affiliates.
+ */
+
+#ifndef _NET_HANDSHAKE_H
+#define _NET_HANDSHAKE_H
+
+enum {
+ TLS_NO_KEYRING = 0,
+ TLS_NO_PEERID = 0,
+ TLS_NO_CERT = 0,
+ TLS_NO_PRIVKEY = 0,
+};
+
+typedef void (*tls_done_func_t)(void *data, int status,
+ key_serial_t peerid);
+
+struct tls_handshake_args {
+ struct socket *ta_sock;
+ tls_done_func_t ta_done;
+ void *ta_data;
+ const char *ta_peername;
+ unsigned int ta_timeout_ms;
+ key_serial_t ta_keyring;
+ key_serial_t ta_my_cert;
+ key_serial_t ta_my_privkey;
+ unsigned int ta_num_peerids;
+ key_serial_t ta_my_peerids[5];
+};
+
+int tls_client_hello_anon(const struct tls_handshake_args *args, gfp_t flags);
+int tls_client_hello_x509(const struct tls_handshake_args *args, gfp_t flags);
+int tls_client_hello_psk(const struct tls_handshake_args *args, gfp_t flags);
+int tls_server_hello_x509(const struct tls_handshake_args *args, gfp_t flags);
+int tls_server_hello_psk(const struct tls_handshake_args *args, gfp_t flags);
+
+bool tls_handshake_cancel(struct sock *sk);
+void tls_handshake_close(struct socket *sock);
+
+u8 tls_get_record_type(const struct sock *sk, const struct cmsghdr *msg);
+void tls_alert_recv(const struct sock *sk, const struct msghdr *msg,
+ u8 *level, u8 *description);
+
+#endif /* _NET_HANDSHAKE_H */
diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index 598f53d2a3a0..2338f8d2a8b3 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2017 Intel Deutschland GmbH
- * Copyright (c) 2018-2019, 2021 Intel Corporation
+ * Copyright (c) 2018-2019, 2021-2022 Intel Corporation
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -21,7 +21,7 @@
#include <asm/unaligned.h>
/**
- * struct ieee82011_radiotap_header - base radiotap header
+ * struct ieee80211_radiotap_header - base radiotap header
*/
struct ieee80211_radiotap_header {
/**
@@ -82,11 +82,14 @@ enum ieee80211_radiotap_presence {
IEEE80211_RADIOTAP_HE_MU = 24,
IEEE80211_RADIOTAP_ZERO_LEN_PSDU = 26,
IEEE80211_RADIOTAP_LSIG = 27,
+ IEEE80211_RADIOTAP_TLV = 28,
/* valid in every it_present bitmap, even vendor namespaces */
IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE = 29,
IEEE80211_RADIOTAP_VENDOR_NAMESPACE = 30,
- IEEE80211_RADIOTAP_EXT = 31
+ IEEE80211_RADIOTAP_EXT = 31,
+ IEEE80211_RADIOTAP_EHT_USIG = 33,
+ IEEE80211_RADIOTAP_EHT = 34,
};
/* for IEEE80211_RADIOTAP_FLAGS */
@@ -360,8 +363,219 @@ enum ieee80211_radiotap_zero_len_psdu_type {
IEEE80211_RADIOTAP_ZERO_LEN_PSDU_VENDOR = 0xff,
};
+struct ieee80211_radiotap_tlv {
+ __le16 type;
+ __le16 len;
+ u8 data[];
+} __packed;
+
+/**
+ * struct ieee80211_radiotap_vendor_content - radiotap vendor data content
+ * @oui: radiotap vendor namespace OUI
+ * @oui_subtype: radiotap vendor sub namespace
+ * @vendor_type: radiotap vendor type
+ * @reserved: should always be set to zero (to avoid leaking memory)
+ * @data: the actual vendor namespace data
+ */
+struct ieee80211_radiotap_vendor_content {
+ u8 oui[3];
+ u8 oui_subtype;
+ __le16 vendor_type;
+ __le16 reserved;
+ u8 data[];
+} __packed;
+
+/**
+ * struct ieee80211_radiotap_vendor_tlv - vendor radiotap data information
+ * @type: should always be set to IEEE80211_RADIOTAP_VENDOR_NAMESPACE
+ * @len: length of data
+ * @content: vendor content see @ieee80211_radiotap_vendor_content
+ */
+struct ieee80211_radiotap_vendor_tlv {
+ __le16 type; /* IEEE80211_RADIOTAP_VENDOR_NAMESPACE */
+ __le16 len;
+ struct ieee80211_radiotap_vendor_content content;
+};
+
+/* ieee80211_radiotap_eht_usig - content of U-SIG tlv (type 33)
+ * see www.radiotap.org/fields/U-SIG.html for details
+ */
+struct ieee80211_radiotap_eht_usig {
+ __le32 common;
+ __le32 value;
+ __le32 mask;
+} __packed;
+
+/* ieee80211_radiotap_eht - content of EHT tlv (type 34)
+ * see www.radiotap.org/fields/EHT.html for details
+ */
+struct ieee80211_radiotap_eht {
+ __le32 known;
+ __le32 data[9];
+ __le32 user_info[];
+} __packed;
+
+/* Known field for EHT TLV
+ * The ending defines for what the field applies as following
+ * O - OFDMA (including TB), M - MU-MIMO, S - EHT sounding.
+ */
+enum ieee80211_radiotap_eht_known {
+ IEEE80211_RADIOTAP_EHT_KNOWN_SPATIAL_REUSE = 0x00000002,
+ IEEE80211_RADIOTAP_EHT_KNOWN_GI = 0x00000004,
+ IEEE80211_RADIOTAP_EHT_KNOWN_EHT_LTF = 0x00000010,
+ IEEE80211_RADIOTAP_EHT_KNOWN_LDPC_EXTRA_SYM_OM = 0x00000020,
+ IEEE80211_RADIOTAP_EHT_KNOWN_PRE_PADD_FACOR_OM = 0x00000040,
+ IEEE80211_RADIOTAP_EHT_KNOWN_PE_DISAMBIGUITY_OM = 0x00000080,
+ IEEE80211_RADIOTAP_EHT_KNOWN_DISREGARD_O = 0x00000100,
+ IEEE80211_RADIOTAP_EHT_KNOWN_DISREGARD_S = 0x00000200,
+ IEEE80211_RADIOTAP_EHT_KNOWN_CRC1 = 0x00002000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_TAIL1 = 0x00004000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_CRC2_O = 0x00008000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_TAIL2_O = 0x00010000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_NSS_S = 0x00020000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_BEAMFORMED_S = 0x00040000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_NR_NON_OFDMA_USERS_M = 0x00080000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_ENCODING_BLOCK_CRC_M = 0x00100000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_ENCODING_BLOCK_TAIL_M = 0x00200000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_RU_MRU_SIZE_OM = 0x00400000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_RU_MRU_INDEX_OM = 0x00800000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_RU_ALLOC_TB_FMT = 0x01000000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_PRIMARY_80 = 0x02000000,
+};
+
+enum ieee80211_radiotap_eht_data {
+ /* Data 0 */
+ IEEE80211_RADIOTAP_EHT_DATA0_SPATIAL_REUSE = 0x00000078,
+ IEEE80211_RADIOTAP_EHT_DATA0_GI = 0x00000180,
+ IEEE80211_RADIOTAP_EHT_DATA0_LTF = 0x00000600,
+ IEEE80211_RADIOTAP_EHT_DATA0_EHT_LTF = 0x00003800,
+ IEEE80211_RADIOTAP_EHT_DATA0_LDPC_EXTRA_SYM_OM = 0x00004000,
+ IEEE80211_RADIOTAP_EHT_DATA0_PRE_PADD_FACOR_OM = 0x00018000,
+ IEEE80211_RADIOTAP_EHT_DATA0_PE_DISAMBIGUITY_OM = 0x00020000,
+ IEEE80211_RADIOTAP_EHT_DATA0_DISREGARD_S = 0x000c0000,
+ IEEE80211_RADIOTAP_EHT_DATA0_DISREGARD_O = 0x003c0000,
+ IEEE80211_RADIOTAP_EHT_DATA0_CRC1_O = 0x03c00000,
+ IEEE80211_RADIOTAP_EHT_DATA0_TAIL1_O = 0xfc000000,
+ /* Data 1 */
+ IEEE80211_RADIOTAP_EHT_DATA1_RU_SIZE = 0x0000001f,
+ IEEE80211_RADIOTAP_EHT_DATA1_RU_INDEX = 0x00001fe0,
+ IEEE80211_RADIOTAP_EHT_DATA1_RU_ALLOC_CC_1_1_1 = 0x003fe000,
+ IEEE80211_RADIOTAP_EHT_DATA1_RU_ALLOC_CC_1_1_1_KNOWN = 0x00400000,
+ IEEE80211_RADIOTAP_EHT_DATA1_PRIMARY_80 = 0xc0000000,
+ /* Data 2 */
+ IEEE80211_RADIOTAP_EHT_DATA2_RU_ALLOC_CC_2_1_1 = 0x000001ff,
+ IEEE80211_RADIOTAP_EHT_DATA2_RU_ALLOC_CC_2_1_1_KNOWN = 0x00000200,
+ IEEE80211_RADIOTAP_EHT_DATA2_RU_ALLOC_CC_1_1_2 = 0x0007fc00,
+ IEEE80211_RADIOTAP_EHT_DATA2_RU_ALLOC_CC_1_1_2_KNOWN = 0x00080000,
+ IEEE80211_RADIOTAP_EHT_DATA2_RU_ALLOC_CC_2_1_2 = 0x1ff00000,
+ IEEE80211_RADIOTAP_EHT_DATA2_RU_ALLOC_CC_2_1_2_KNOWN = 0x20000000,
+ /* Data 3 */
+ IEEE80211_RADIOTAP_EHT_DATA3_RU_ALLOC_CC_1_2_1 = 0x000001ff,
+ IEEE80211_RADIOTAP_EHT_DATA3_RU_ALLOC_CC_1_2_1_KNOWN = 0x00000200,
+ IEEE80211_RADIOTAP_EHT_DATA3_RU_ALLOC_CC_2_2_1 = 0x0007fc00,
+ IEEE80211_RADIOTAP_EHT_DATA3_RU_ALLOC_CC_2_2_1_KNOWN = 0x00080000,
+ IEEE80211_RADIOTAP_EHT_DATA3_RU_ALLOC_CC_1_2_2 = 0x1ff00000,
+ IEEE80211_RADIOTAP_EHT_DATA3_RU_ALLOC_CC_1_2_2_KNOWN = 0x20000000,
+ /* Data 4 */
+ IEEE80211_RADIOTAP_EHT_DATA4_RU_ALLOC_CC_2_2_2 = 0x000001ff,
+ IEEE80211_RADIOTAP_EHT_DATA4_RU_ALLOC_CC_2_2_2_KNOWN = 0x00000200,
+ IEEE80211_RADIOTAP_EHT_DATA4_RU_ALLOC_CC_1_2_3 = 0x0007fc00,
+ IEEE80211_RADIOTAP_EHT_DATA4_RU_ALLOC_CC_1_2_3_KNOWN = 0x00080000,
+ IEEE80211_RADIOTAP_EHT_DATA4_RU_ALLOC_CC_2_2_3 = 0x1ff00000,
+ IEEE80211_RADIOTAP_EHT_DATA4_RU_ALLOC_CC_2_2_3_KNOWN = 0x20000000,
+ /* Data 5 */
+ IEEE80211_RADIOTAP_EHT_DATA5_RU_ALLOC_CC_1_2_4 = 0x000001ff,
+ IEEE80211_RADIOTAP_EHT_DATA5_RU_ALLOC_CC_1_2_4_KNOWN = 0x00000200,
+ IEEE80211_RADIOTAP_EHT_DATA5_RU_ALLOC_CC_2_2_4 = 0x0007fc00,
+ IEEE80211_RADIOTAP_EHT_DATA5_RU_ALLOC_CC_2_2_4_KNOWN = 0x00080000,
+ IEEE80211_RADIOTAP_EHT_DATA5_RU_ALLOC_CC_1_2_5 = 0x1ff00000,
+ IEEE80211_RADIOTAP_EHT_DATA5_RU_ALLOC_CC_1_2_5_KNOWN = 0x20000000,
+ /* Data 6 */
+ IEEE80211_RADIOTAP_EHT_DATA6_RU_ALLOC_CC_2_2_5 = 0x000001ff,
+ IEEE80211_RADIOTAP_EHT_DATA6_RU_ALLOC_CC_2_2_5_KNOWN = 0x00000200,
+ IEEE80211_RADIOTAP_EHT_DATA6_RU_ALLOC_CC_1_2_6 = 0x0007fc00,
+ IEEE80211_RADIOTAP_EHT_DATA6_RU_ALLOC_CC_1_2_6_KNOWN = 0x00080000,
+ IEEE80211_RADIOTAP_EHT_DATA6_RU_ALLOC_CC_2_2_6 = 0x1ff00000,
+ IEEE80211_RADIOTAP_EHT_DATA6_RU_ALLOC_CC_2_2_6_KNOWN = 0x20000000,
+ /* Data 7 */
+ IEEE80211_RADIOTAP_EHT_DATA7_CRC2_O = 0x0000000f,
+ IEEE80211_RADIOTAP_EHT_DATA7_TAIL_2_O = 0x000003f0,
+ IEEE80211_RADIOTAP_EHT_DATA7_NSS_S = 0x0000f000,
+ IEEE80211_RADIOTAP_EHT_DATA7_BEAMFORMED_S = 0x00010000,
+ IEEE80211_RADIOTAP_EHT_DATA7_NUM_OF_NON_OFDMA_USERS = 0x000e0000,
+ IEEE80211_RADIOTAP_EHT_DATA7_USER_ENCODING_BLOCK_CRC = 0x00f00000,
+ IEEE80211_RADIOTAP_EHT_DATA7_USER_ENCODING_BLOCK_TAIL = 0x3f000000,
+ /* Data 8 */
+ IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_PS_160 = 0x00000001,
+ IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_B0 = 0x00000002,
+ IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_B7_B1 = 0x000001fc,
+};
+
+enum ieee80211_radiotap_eht_user_info {
+ IEEE80211_RADIOTAP_EHT_USER_INFO_STA_ID_KNOWN = 0x00000001,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_MCS_KNOWN = 0x00000002,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_CODING_KNOWN = 0x00000004,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_NSS_KNOWN_O = 0x00000010,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_BEAMFORMING_KNOWN_O = 0x00000020,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_SPATIAL_CONFIG_KNOWN_M = 0x00000040,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_DATA_FOR_USER = 0x00000080,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_STA_ID = 0x0007ff00,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_CODING = 0x00080000,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_MCS = 0x00f00000,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_NSS_O = 0x0f000000,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_BEAMFORMING_O = 0x20000000,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_SPATIAL_CONFIG_M = 0x3f000000,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_RESEVED_c0000000 = 0xc0000000,
+};
+
+enum ieee80211_radiotap_eht_usig_common {
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER_KNOWN = 0x00000001,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_KNOWN = 0x00000002,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL_KNOWN = 0x00000004,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR_KNOWN = 0x00000008,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP_KNOWN = 0x00000010,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BAD_USIG_CRC = 0x00000020,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_VALIDATE_BITS_CHECKED = 0x00000040,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_VALIDATE_BITS_OK = 0x00000080,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER = 0x00007000,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW = 0x00038000,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL = 0x00040000,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR = 0x01f80000,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP = 0xfe000000,
+};
+
+enum ieee80211_radiotap_eht_usig_mu {
+ /* MU-USIG-1 */
+ IEEE80211_RADIOTAP_EHT_USIG1_MU_B20_B24_DISREGARD = 0x0000001f,
+ IEEE80211_RADIOTAP_EHT_USIG1_MU_B25_VALIDATE = 0x00000020,
+ /* MU-USIG-2 */
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B0_B1_PPDU_TYPE = 0x000000c0,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B2_VALIDATE = 0x00000100,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B3_B7_PUNCTURED_INFO = 0x00003e00,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B8_VALIDATE = 0x00004000,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B9_B10_SIG_MCS = 0x00018000,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B11_B15_EHT_SIG_SYMBOLS = 0x003e0000,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B16_B19_CRC = 0x03c00000,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B20_B25_TAIL = 0xfc000000,
+};
+
+enum ieee80211_radiotap_eht_usig_tb {
+ /* TB-USIG-1 */
+ IEEE80211_RADIOTAP_EHT_USIG1_TB_B20_B25_DISREGARD = 0x0000001f,
+
+ /* TB-USIG-2 */
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B0_B1_PPDU_TYPE = 0x000000c0,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B2_VALIDATE = 0x00000100,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B3_B6_SPATIAL_REUSE_1 = 0x00001e00,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B7_B10_SPATIAL_REUSE_2 = 0x0001e000,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B11_B15_DISREGARD = 0x003e0000,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B16_B19_CRC = 0x03c00000,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B20_B25_TAIL = 0xfc000000,
+};
+
/**
* ieee80211_get_radiotap_len - get radiotap header length
+ * @data: pointer to the header
*/
static inline u16 ieee80211_get_radiotap_len(const char *data)
{
diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
index da8a3e648c7a..063313df447d 100644
--- a/include/net/ieee802154_netdev.h
+++ b/include/net/ieee802154_netdev.h
@@ -74,6 +74,10 @@ struct ieee802154_beacon_hdr {
#endif
} __packed;
+struct ieee802154_mac_cmd_pl {
+ u8 cmd_id;
+} __packed;
+
struct ieee802154_sechdr {
#if defined(__LITTLE_ENDIAN_BITFIELD)
u8 level:3,
@@ -149,6 +153,16 @@ struct ieee802154_beacon_frame {
struct ieee802154_beacon_hdr mac_pl;
};
+struct ieee802154_mac_cmd_frame {
+ struct ieee802154_hdr mhr;
+ struct ieee802154_mac_cmd_pl mac_pl;
+};
+
+struct ieee802154_beacon_req_frame {
+ struct ieee802154_hdr mhr;
+ struct ieee802154_mac_cmd_pl mac_pl;
+};
+
/* pushes hdr onto the skb. fields of hdr->fc that can be calculated from
* the contents of hdr will be, and the actual value of those bits in
* hdr->fc will be ignored. this includes the INTRA_PAN bit and the frame
@@ -174,9 +188,13 @@ int ieee802154_hdr_peek_addrs(const struct sk_buff *skb,
*/
int ieee802154_hdr_peek(const struct sk_buff *skb, struct ieee802154_hdr *hdr);
-/* pushes a beacon frame into an skb */
+/* pushes/pulls various frame types into/from an skb */
int ieee802154_beacon_push(struct sk_buff *skb,
struct ieee802154_beacon_frame *beacon);
+int ieee802154_mac_cmd_push(struct sk_buff *skb, void *frame,
+ const void *pl, unsigned int pl_len);
+int ieee802154_mac_cmd_pl_pull(struct sk_buff *skb,
+ struct ieee802154_mac_cmd_pl *mac_pl);
int ieee802154_max_payload(const struct ieee802154_hdr *hdr);
diff --git a/include/net/ila.h b/include/net/ila.h
deleted file mode 100644
index 73ebe5eab272..000000000000
--- a/include/net/ila.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * ILA kernel interface
- *
- * Copyright (c) 2015 Tom Herbert <tom@herbertland.com>
- */
-
-#ifndef _NET_ILA_H
-#define _NET_ILA_H
-
-struct sk_buff;
-
-int ila_xlat_outgoing(struct sk_buff *skb);
-int ila_xlat_incoming(struct sk_buff *skb);
-
-#endif /* _NET_ILA_H */
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 56f1286583d3..533a7337865a 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -48,6 +48,22 @@ struct sock *__inet6_lookup_established(struct net *net,
const u16 hnum, const int dif,
const int sdif);
+typedef u32 (inet6_ehashfn_t)(const struct net *net,
+ const struct in6_addr *laddr, const u16 lport,
+ const struct in6_addr *faddr, const __be16 fport);
+
+inet6_ehashfn_t inet6_ehashfn;
+
+INDIRECT_CALLABLE_DECLARE(inet6_ehashfn_t udp6_ehashfn);
+
+struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned short hnum,
+ inet6_ehashfn_t *ehashfn);
+
struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -57,6 +73,15 @@ struct sock *inet6_lookup_listener(struct net *net,
const unsigned short hnum,
const int dif, const int sdif);
+struct sock *inet6_lookup_run_sk_lookup(struct net *net,
+ int protocol,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ const __be16 sport,
+ const struct in6_addr *daddr,
+ const u16 hnum, const int dif,
+ inet6_ehashfn_t *ehashfn);
+
static inline struct sock *__inet6_lookup(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -78,6 +103,46 @@ static inline struct sock *__inet6_lookup(struct net *net,
daddr, hnum, dif, sdif);
}
+static inline
+struct sock *inet6_steal_sock(struct net *net, struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr, const __be16 sport,
+ const struct in6_addr *daddr, const __be16 dport,
+ bool *refcounted, inet6_ehashfn_t *ehashfn)
+{
+ struct sock *sk, *reuse_sk;
+ bool prefetched;
+
+ sk = skb_steal_sock(skb, refcounted, &prefetched);
+ if (!sk)
+ return NULL;
+
+ if (!prefetched || !sk_fullsock(sk))
+ return sk;
+
+ if (sk->sk_protocol == IPPROTO_TCP) {
+ if (sk->sk_state != TCP_LISTEN)
+ return sk;
+ } else if (sk->sk_protocol == IPPROTO_UDP) {
+ if (sk->sk_state != TCP_CLOSE)
+ return sk;
+ } else {
+ return sk;
+ }
+
+ reuse_sk = inet6_lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, ntohs(dport),
+ ehashfn);
+ if (!reuse_sk)
+ return sk;
+
+ /* We've chosen a new reuseport sock which is never refcounted. This
+ * implies that sk also isn't refcounted.
+ */
+ WARN_ON_ONCE(*refcounted);
+
+ return reuse_sk;
+}
+
static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const __be16 sport,
@@ -85,14 +150,20 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
int iif, int sdif,
bool *refcounted)
{
- struct sock *sk = skb_steal_sock(skb, refcounted);
-
+ struct net *net = dev_net(skb_dst(skb)->dev);
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct sock *sk;
+
+ sk = inet6_steal_sock(net, skb, doff, &ip6h->saddr, sport, &ip6h->daddr, dport,
+ refcounted, inet6_ehashfn);
+ if (IS_ERR(sk))
+ return NULL;
if (sk)
return sk;
- return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
- doff, &ipv6_hdr(skb)->saddr, sport,
- &ipv6_hdr(skb)->daddr, ntohs(dport),
+ return __inet6_lookup(net, hashinfo, skb,
+ doff, &ip6h->saddr, sport,
+ &ip6h->daddr, ntohs(dport),
iif, sdif, refcounted);
}
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index cec453c18f1d..f50a644d87a9 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -31,16 +31,19 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags);
int inet_accept(struct socket *sock, struct socket *newsock, int flags,
bool kern);
+void __inet_accept(struct socket *sock, struct socket *newsock,
+ struct sock *newsk);
int inet_send_prepare(struct sock *sk);
int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
-ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
- size_t size, int flags);
+void inet_splice_eof(struct socket *sock);
int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags);
int inet_shutdown(struct socket *sock, int how);
int inet_listen(struct socket *sock, int backlog);
+int __inet_listen_sk(struct sock *sk, int backlog);
void inet_sock_destruct(struct sock *sk);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
+int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len);
/* Don't allocate port at this moment, defer to connect. */
#define BIND_FORCE_ADDRESS_NO_PORT (1 << 0)
/* Grab and release socket lock. */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c2b15f7e5516..5d2fcc137b88 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -164,7 +164,8 @@ enum inet_csk_ack_state_t {
ICSK_ACK_TIMER = 2,
ICSK_ACK_PUSHED = 4,
ICSK_ACK_PUSHED2 = 8,
- ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */
+ ICSK_ACK_NOW = 16, /* Send the next ACK immediately (once) */
+ ICSK_ACK_NOMEM = 32,
};
void inet_csk_init_xmit_timers(struct sock *sk,
@@ -341,9 +342,9 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
}
-static inline bool inet_csk_has_ulp(struct sock *sk)
+static inline bool inet_csk_has_ulp(const struct sock *sk)
{
- return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops;
+ return inet_test_bit(IS_ICSK, sk) && !!inet_csk(sk)->icsk_ulp_ops;
}
#endif /* _INET_CONNECTION_SOCK_H */
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index b23ddec3cd5c..153960663ce4 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -7,7 +7,7 @@
#include <linux/in6.h>
#include <linux/rbtree_types.h>
#include <linux/refcount.h>
-#include <net/dropreason.h>
+#include <net/dropreason-core.h>
/* Per netns frag queues directory */
struct fqdir {
@@ -29,7 +29,7 @@ struct fqdir {
};
/**
- * fragment queue flags
+ * enum: fragment queue flags
*
* @INET_FRAG_FIRST_IN: first fragment has arrived
* @INET_FRAG_LAST_IN: final fragment has arrived
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 99bd823e97f6..3ecfeadbfa06 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -177,7 +177,7 @@ struct inet_hashinfo {
struct inet_listen_hashbucket *lhash2;
bool pernet;
-};
+} ____cacheline_aligned_in_smp;
static inline struct inet_hashinfo *tcp_or_dccp_get_hashinfo(const struct sock *sk)
{
@@ -379,6 +379,27 @@ struct sock *__inet_lookup_established(struct net *net,
const __be32 daddr, const u16 hnum,
const int dif, const int sdif);
+typedef u32 (inet_ehashfn_t)(const struct net *net,
+ const __be32 laddr, const __u16 lport,
+ const __be32 faddr, const __be16 fport);
+
+inet_ehashfn_t inet_ehashfn;
+
+INDIRECT_CALLABLE_DECLARE(inet_ehashfn_t udp_ehashfn);
+
+struct sock *inet_lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned short hnum,
+ inet_ehashfn_t *ehashfn);
+
+struct sock *inet_lookup_run_sk_lookup(struct net *net,
+ int protocol,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, u16 hnum, const int dif,
+ inet_ehashfn_t *ehashfn);
+
static inline struct sock *
inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo,
const __be32 saddr, const __be16 sport,
@@ -428,6 +449,46 @@ static inline struct sock *inet_lookup(struct net *net,
return sk;
}
+static inline
+struct sock *inet_steal_sock(struct net *net, struct sk_buff *skb, int doff,
+ const __be32 saddr, const __be16 sport,
+ const __be32 daddr, const __be16 dport,
+ bool *refcounted, inet_ehashfn_t *ehashfn)
+{
+ struct sock *sk, *reuse_sk;
+ bool prefetched;
+
+ sk = skb_steal_sock(skb, refcounted, &prefetched);
+ if (!sk)
+ return NULL;
+
+ if (!prefetched || !sk_fullsock(sk))
+ return sk;
+
+ if (sk->sk_protocol == IPPROTO_TCP) {
+ if (sk->sk_state != TCP_LISTEN)
+ return sk;
+ } else if (sk->sk_protocol == IPPROTO_UDP) {
+ if (sk->sk_state != TCP_CLOSE)
+ return sk;
+ } else {
+ return sk;
+ }
+
+ reuse_sk = inet_lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, ntohs(dport),
+ ehashfn);
+ if (!reuse_sk)
+ return sk;
+
+ /* We've chosen a new reuseport sock which is never refcounted. This
+ * implies that sk also isn't refcounted.
+ */
+ WARN_ON_ONCE(*refcounted);
+
+ return reuse_sk;
+}
+
static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
struct sk_buff *skb,
int doff,
@@ -436,22 +497,23 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
const int sdif,
bool *refcounted)
{
- struct sock *sk = skb_steal_sock(skb, refcounted);
+ struct net *net = dev_net(skb_dst(skb)->dev);
const struct iphdr *iph = ip_hdr(skb);
+ struct sock *sk;
+ sk = inet_steal_sock(net, skb, doff, iph->saddr, sport, iph->daddr, dport,
+ refcounted, inet_ehashfn);
+ if (IS_ERR(sk))
+ return NULL;
if (sk)
return sk;
- return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
+ return __inet_lookup(net, hashinfo, skb,
doff, iph->saddr, sport,
iph->daddr, dport, inet_iif(skb), sdif,
refcounted);
}
-u32 inet6_ehashfn(const struct net *net,
- const struct in6_addr *laddr, const u16 lport,
- const struct in6_addr *faddr, const __be16 fport);
-
static inline void sk_daddr_set(struct sock *sk, __be32 addr)
{
sk->sk_daddr = addr; /* alias of inet_daddr */
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 51857117ac09..2de0e4d4a027 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -107,11 +107,12 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
{
- if (!sk->sk_mark &&
- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
+ u32 mark = READ_ONCE(sk->sk_mark);
+
+ if (!mark && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
return skb->mark;
- return sk->sk_mark;
+ return mark;
}
static inline int inet_request_bound_dev_if(const struct sock *sk,
@@ -193,13 +194,13 @@ struct rtable;
* @inet_rcv_saddr - Bound local IPv4 addr
* @inet_dport - Destination port
* @inet_num - Local port
+ * @inet_flags - various atomic flags
* @inet_saddr - Sending source
* @uc_ttl - Unicast TTL
* @inet_sport - Source port
* @inet_id - ID counter for DF pkts
* @tos - TOS
* @mc_ttl - Multicasting TTL
- * @is_icsk - is this an inet_connection_sock?
* @uc_index - Unicast outgoing device index
* @mc_index - Multicast device index
* @mc_list - Group array
@@ -217,57 +218,88 @@ struct inet_sock {
#define inet_dport sk.__sk_common.skc_dport
#define inet_num sk.__sk_common.skc_num
+ unsigned long inet_flags;
__be32 inet_saddr;
__s16 uc_ttl;
- __u16 cmsg_flags;
- struct ip_options_rcu __rcu *inet_opt;
__be16 inet_sport;
- __u16 inet_id;
+ struct ip_options_rcu __rcu *inet_opt;
+ atomic_t inet_id;
__u8 tos;
__u8 min_ttl;
__u8 mc_ttl;
__u8 pmtudisc;
- __u8 recverr:1,
- is_icsk:1,
- freebind:1,
- hdrincl:1,
- mc_loop:1,
- transparent:1,
- mc_all:1,
- nodefrag:1;
- __u8 bind_address_no_port:1,
- recverr_rfc4884:1,
- defer_connect:1; /* Indicates that fastopen_connect is set
- * and cookie exists so we defer connect
- * until first data frame is written
- */
__u8 rcv_tos;
__u8 convert_csum;
int uc_index;
int mc_index;
__be32 mc_addr;
- struct ip_mc_socklist __rcu *mc_list;
- struct inet_cork_full cork;
struct {
__u16 lo;
__u16 hi;
} local_port_range;
+
+ struct ip_mc_socklist __rcu *mc_list;
+ struct inet_cork_full cork;
};
#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */
#define IPCORK_ALLFRAG 2 /* always fragment (for ipv6 for now) */
+enum {
+ INET_FLAGS_PKTINFO = 0,
+ INET_FLAGS_TTL = 1,
+ INET_FLAGS_TOS = 2,
+ INET_FLAGS_RECVOPTS = 3,
+ INET_FLAGS_RETOPTS = 4,
+ INET_FLAGS_PASSSEC = 5,
+ INET_FLAGS_ORIGDSTADDR = 6,
+ INET_FLAGS_CHECKSUM = 7,
+ INET_FLAGS_RECVFRAGSIZE = 8,
+
+ INET_FLAGS_RECVERR = 9,
+ INET_FLAGS_RECVERR_RFC4884 = 10,
+ INET_FLAGS_FREEBIND = 11,
+ INET_FLAGS_HDRINCL = 12,
+ INET_FLAGS_MC_LOOP = 13,
+ INET_FLAGS_MC_ALL = 14,
+ INET_FLAGS_TRANSPARENT = 15,
+ INET_FLAGS_IS_ICSK = 16,
+ INET_FLAGS_NODEFRAG = 17,
+ INET_FLAGS_BIND_ADDRESS_NO_PORT = 18,
+ INET_FLAGS_DEFER_CONNECT = 19,
+};
+
/* cmsg flags for inet */
-#define IP_CMSG_PKTINFO BIT(0)
-#define IP_CMSG_TTL BIT(1)
-#define IP_CMSG_TOS BIT(2)
-#define IP_CMSG_RECVOPTS BIT(3)
-#define IP_CMSG_RETOPTS BIT(4)
-#define IP_CMSG_PASSSEC BIT(5)
-#define IP_CMSG_ORIGDSTADDR BIT(6)
-#define IP_CMSG_CHECKSUM BIT(7)
-#define IP_CMSG_RECVFRAGSIZE BIT(8)
+#define IP_CMSG_PKTINFO BIT(INET_FLAGS_PKTINFO)
+#define IP_CMSG_TTL BIT(INET_FLAGS_TTL)
+#define IP_CMSG_TOS BIT(INET_FLAGS_TOS)
+#define IP_CMSG_RECVOPTS BIT(INET_FLAGS_RECVOPTS)
+#define IP_CMSG_RETOPTS BIT(INET_FLAGS_RETOPTS)
+#define IP_CMSG_PASSSEC BIT(INET_FLAGS_PASSSEC)
+#define IP_CMSG_ORIGDSTADDR BIT(INET_FLAGS_ORIGDSTADDR)
+#define IP_CMSG_CHECKSUM BIT(INET_FLAGS_CHECKSUM)
+#define IP_CMSG_RECVFRAGSIZE BIT(INET_FLAGS_RECVFRAGSIZE)
+
+#define IP_CMSG_ALL (IP_CMSG_PKTINFO | IP_CMSG_TTL | \
+ IP_CMSG_TOS | IP_CMSG_RECVOPTS | \
+ IP_CMSG_RETOPTS | IP_CMSG_PASSSEC | \
+ IP_CMSG_ORIGDSTADDR | IP_CMSG_CHECKSUM | \
+ IP_CMSG_RECVFRAGSIZE)
+
+static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet)
+{
+ return READ_ONCE(inet->inet_flags) & IP_CMSG_ALL;
+}
+
+#define inet_test_bit(nr, sk) \
+ test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
+#define inet_set_bit(nr, sk) \
+ set_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
+#define inet_clear_bit(nr, sk) \
+ clear_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
+#define inet_assign_bit(nr, sk, val) \
+ assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val)
static inline bool sk_is_inet(struct sock *sk)
{
@@ -305,10 +337,7 @@ static inline struct sock *skb_to_full_sk(const struct sk_buff *skb)
return sk_to_full_sk(skb->sk);
}
-static inline struct inet_sock *inet_sk(const struct sock *sk)
-{
- return (struct inet_sock *)sk;
-}
+#define inet_sk(ptr) container_of_const(ptr, struct inet_sock, sk)
static inline void __inet_sk_copy_descendant(struct sock *sk_to,
const struct sock *sk_from,
@@ -365,7 +394,7 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
{
__u8 flags = 0;
- if (inet_sk(sk)->transparent || inet_sk(sk)->hdrincl)
+ if (inet_test_bit(TRANSPARENT, sk) || inet_test_bit(HDRINCL, sk))
flags |= FLOWI_FLAG_ANYSRC;
return flags;
}
@@ -391,7 +420,8 @@ static inline bool inet_can_nonlocal_bind(struct net *net,
struct inet_sock *inet)
{
return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) ||
- inet->freebind || inet->transparent;
+ test_bit(INET_FLAGS_FREEBIND, &inet->inet_flags) ||
+ test_bit(INET_FLAGS_TRANSPARENT, &inet->inet_flags);
}
static inline bool inet_addr_valid_or_nonlocal(struct net *net,
diff --git a/include/net/ip.h b/include/net/ip.h
index c3fffaa92d6e..19adacd5ece0 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -76,6 +76,7 @@ struct ipcm_cookie {
__be32 addr;
int oif;
struct ip_options_rcu *opt;
+ __u8 protocol;
__u8 ttl;
__s16 tos;
char priority;
@@ -92,10 +93,11 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
{
ipcm_init(ipcm);
- ipcm->sockc.mark = inet->sk.sk_mark;
+ ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
ipcm->sockc.tsflags = inet->sk.sk_tsflags;
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr;
+ ipcm->protocol = inet->inet_num;
}
#define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
@@ -220,8 +222,6 @@ int ip_append_data(struct sock *sk, struct flowi4 *fl4,
unsigned int flags);
int ip_generic_getfrag(void *from, char *to, int offset, int len, int odd,
struct sk_buff *skb);
-ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
- int offset, size_t size, int flags);
struct sk_buff *__ip_make_skb(struct sock *sk, struct flowi4 *fl4,
struct sk_buff_head *queue,
struct inet_cork *cork);
@@ -242,14 +242,22 @@ static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4)
return __ip_make_skb(sk, fl4, &sk->sk_write_queue, &inet_sk(sk)->cork.base);
}
-static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet)
+/* Get the route scope that should be used when sending a packet. */
+static inline u8 ip_sendmsg_scope(const struct inet_sock *inet,
+ const struct ipcm_cookie *ipc,
+ const struct msghdr *msg)
{
- return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(inet->tos);
+ if (sock_flag(&inet->sk, SOCK_LOCALROUTE) ||
+ msg->msg_flags & MSG_DONTROUTE ||
+ (ipc->opt && ipc->opt->opt.is_strictroute))
+ return RT_SCOPE_LINK;
+
+ return RT_SCOPE_UNIVERSE;
}
-static inline __u8 get_rtconn_flags(struct ipcm_cookie* ipc, struct sock* sk)
+static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet)
{
- return (ipc->tos != -1) ? RT_CONN_FLAGS_TOS(sk, ipc->tos) : RT_CONN_FLAGS(sk);
+ return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(inet->tos);
}
/* datagram.c */
@@ -280,7 +288,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
const struct ip_options *sopt,
__be32 daddr, __be32 saddr,
const struct ip_reply_arg *arg,
- unsigned int len, u64 transmit_time);
+ unsigned int len, u64 transmit_time, u32 txhash);
#define IP_INC_STATS(net, field) SNMP_INC_STATS64((net)->mib.ip_statistics, field)
#define __IP_INC_STATS(net, field) __SNMP_INC_STATS64((net)->mib.ip_statistics, field)
@@ -530,8 +538,19 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb,
* generator as much as we can.
*/
if (sk && inet_sk(sk)->inet_daddr) {
- iph->id = htons(inet_sk(sk)->inet_id);
- inet_sk(sk)->inet_id += segs;
+ int val;
+
+ /* avoid atomic operations for TCP,
+ * as we hold socket lock at this point.
+ */
+ if (sk_is_tcp(sk)) {
+ sock_owned_by_me(sk);
+ val = atomic_read(&inet_sk(sk)->inet_id);
+ atomic_set(&inet_sk(sk)->inet_id, val + segs);
+ } else {
+ val = atomic_add_return(segs, &inet_sk(sk)->inet_id);
+ }
+ iph->id = htons(val);
return;
}
if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 6268963d9599..c9ff23cf313e 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -179,6 +179,9 @@ struct fib6_info {
refcount_t fib6_ref;
unsigned long expires;
+
+ struct hlist_node gc_link;
+
struct dst_metrics *fib6_metrics;
#define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1]
@@ -217,9 +220,6 @@ struct rt6_info {
struct inet6_dev *rt6i_idev;
u32 rt6i_flags;
- struct list_head rt6i_uncached;
- struct uncached_list *rt6i_uncached_list;
-
/* more non-fragment space at head required */
unsigned short rt6i_nfheader_len;
};
@@ -250,19 +250,6 @@ static inline bool fib6_requires_src(const struct fib6_info *rt)
return rt->fib6_src.plen > 0;
}
-static inline void fib6_clean_expires(struct fib6_info *f6i)
-{
- f6i->fib6_flags &= ~RTF_EXPIRES;
- f6i->expires = 0;
-}
-
-static inline void fib6_set_expires(struct fib6_info *f6i,
- unsigned long expires)
-{
- f6i->expires = expires;
- f6i->fib6_flags |= RTF_EXPIRES;
-}
-
static inline bool fib6_check_expired(const struct fib6_info *f6i)
{
if (f6i->fib6_flags & RTF_EXPIRES)
@@ -270,6 +257,11 @@ static inline bool fib6_check_expired(const struct fib6_info *f6i)
return false;
}
+static inline bool fib6_has_expires(const struct fib6_info *f6i)
+{
+ return f6i->fib6_flags & RTF_EXPIRES;
+}
+
/* Function to safely get fn->fn_sernum for passed in rt
* and store result in passed in cookie.
* Return true if we can get cookie safely
@@ -391,6 +383,7 @@ struct fib6_table {
struct inet_peer_base tb6_peers;
unsigned int flags;
unsigned int fib_seq;
+ struct hlist_head tb6_gc_hlist; /* GC candidates */
#define RT6_TABLE_HAS_DFLT_ROUTER BIT(0)
};
@@ -472,13 +465,10 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr)
rcu_read_lock();
from = rcu_dereference(rt->from);
- if (from) {
+ if (from)
*addr = from->fib6_prefsrc.addr;
- } else {
- struct in6_addr in6_zero = {};
-
- *addr = in6_zero;
- }
+ else
+ *addr = in6addr_any;
rcu_read_unlock();
}
@@ -510,6 +500,48 @@ void fib6_gc_cleanup(void);
int fib6_init(void);
+/* fib6_info must be locked by the caller, and fib6_info->fib6_table can be
+ * NULL.
+ */
+static inline void fib6_set_expires_locked(struct fib6_info *f6i,
+ unsigned long expires)
+{
+ struct fib6_table *tb6;
+
+ tb6 = f6i->fib6_table;
+ f6i->expires = expires;
+ if (tb6 && !fib6_has_expires(f6i))
+ hlist_add_head(&f6i->gc_link, &tb6->tb6_gc_hlist);
+ f6i->fib6_flags |= RTF_EXPIRES;
+}
+
+/* fib6_info must be locked by the caller, and fib6_info->fib6_table can be
+ * NULL. If fib6_table is NULL, the fib6_info will no be inserted into the
+ * list of GC candidates until it is inserted into a table.
+ */
+static inline void fib6_set_expires(struct fib6_info *f6i,
+ unsigned long expires)
+{
+ spin_lock_bh(&f6i->fib6_table->tb6_lock);
+ fib6_set_expires_locked(f6i, expires);
+ spin_unlock_bh(&f6i->fib6_table->tb6_lock);
+}
+
+static inline void fib6_clean_expires_locked(struct fib6_info *f6i)
+{
+ if (fib6_has_expires(f6i))
+ hlist_del_init(&f6i->gc_link);
+ f6i->fib6_flags &= ~RTF_EXPIRES;
+ f6i->expires = 0;
+}
+
+static inline void fib6_clean_expires(struct fib6_info *f6i)
+{
+ spin_lock_bh(&f6i->fib6_table->tb6_lock);
+ fib6_clean_expires_locked(f6i);
+ spin_unlock_bh(&f6i->fib6_table->tb6_lock);
+}
+
struct ipv6_route_iter {
struct seq_net_private p;
struct fib6_walker w;
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 81ee387a1fc4..b32539bb0fb0 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -100,7 +100,7 @@ static inline struct dst_entry *ip6_route_output(struct net *net,
static inline void ip6_rt_put_flags(struct rt6_info *rt, int flags)
{
if (!(flags & RT6_LOOKUP_F_DST_NOREF) ||
- !list_empty(&rt->rt6i_uncached))
+ !list_empty(&rt->dst.rt_uncached))
ip6_rt_put(rt);
}
@@ -156,7 +156,7 @@ void fib6_force_start_gc(struct net *net);
struct fib6_info *addrconf_f6i_alloc(struct net *net, struct inet6_dev *idev,
const struct in6_addr *addr, bool anycast,
- gfp_t gfp_flags);
+ gfp_t gfp_flags, struct netlink_ext_ack *extack);
struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
int flags);
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index fca357679816..e8750b4ef7e1 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -52,11 +52,19 @@ struct ip_tunnel_key {
u8 tos; /* TOS for IPv4, TC for IPv6 */
u8 ttl; /* TTL for IPv4, HL for IPv6 */
__be32 label; /* Flow Label for IPv6 */
+ u32 nhid;
__be16 tp_src;
__be16 tp_dst;
__u8 flow_flags;
};
+struct ip_tunnel_encap {
+ u16 type;
+ u16 flags;
+ __be16 sport;
+ __be16 dport;
+};
+
/* Flags for ip_tunnel_info mode. */
#define IP_TUNNEL_INFO_TX 0x01 /* represents tx tunnel parameters */
#define IP_TUNNEL_INFO_IPV6 0x02 /* key contains IPv6 addresses */
@@ -67,8 +75,15 @@ struct ip_tunnel_key {
GENMASK((sizeof_field(struct ip_tunnel_info, \
options_len) * BITS_PER_BYTE) - 1, 0)
+#define ip_tunnel_info_opts(info) \
+ _Generic(info, \
+ const struct ip_tunnel_info * : ((const void *)((info) + 1)),\
+ struct ip_tunnel_info * : ((void *)((info) + 1))\
+ )
+
struct ip_tunnel_info {
struct ip_tunnel_key key;
+ struct ip_tunnel_encap encap;
#ifdef CONFIG_DST_CACHE
struct dst_cache dst_cache;
#endif
@@ -86,13 +101,6 @@ struct ip_tunnel_6rd_parm {
};
#endif
-struct ip_tunnel_encap {
- u16 type;
- u16 flags;
- __be16 sport;
- __be16 dport;
-};
-
struct ip_tunnel_prl_entry {
struct ip_tunnel_prl_entry __rcu *next;
__be32 addr;
@@ -293,6 +301,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
__be32 remote, __be32 local,
__be32 key);
+void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info);
int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
bool log_ecn_error);
@@ -371,22 +380,23 @@ static inline int ip_encap_hlen(struct ip_tunnel_encap *e)
return hlen;
}
-static inline int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
+static inline int ip_tunnel_encap(struct sk_buff *skb,
+ struct ip_tunnel_encap *e,
u8 *protocol, struct flowi4 *fl4)
{
const struct ip_tunnel_encap_ops *ops;
int ret = -EINVAL;
- if (t->encap.type == TUNNEL_ENCAP_NONE)
+ if (e->type == TUNNEL_ENCAP_NONE)
return 0;
- if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
+ if (e->type >= MAX_IPTUN_ENCAP_OPS)
return -EINVAL;
rcu_read_lock();
- ops = rcu_dereference(iptun_encaps[t->encap.type]);
+ ops = rcu_dereference(iptun_encaps[e->type]);
if (likely(ops && ops->build_header))
- ret = ops->build_header(skb, &t->encap, protocol, fl4);
+ ret = ops->build_header(skb, e, protocol, fl4);
rcu_read_unlock();
return ret;
@@ -485,11 +495,6 @@ static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
}
}
-static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info)
-{
- return info + 1;
-}
-
static inline void ip_tunnel_info_opts_get(void *to,
const struct ip_tunnel_info *info)
{
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 6d71a5ff52df..ff406ef4fd4a 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -265,26 +265,6 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
pr_err(msg, ##__VA_ARGS__); \
} while (0)
-#ifdef CONFIG_IP_VS_DEBUG
-#define EnterFunction(level) \
- do { \
- if (level <= ip_vs_get_debug_level()) \
- printk(KERN_DEBUG \
- pr_fmt("Enter: %s, %s line %i\n"), \
- __func__, __FILE__, __LINE__); \
- } while (0)
-#define LeaveFunction(level) \
- do { \
- if (level <= ip_vs_get_debug_level()) \
- printk(KERN_DEBUG \
- pr_fmt("Leave: %s, %s line %i\n"), \
- __func__, __FILE__, __LINE__); \
- } while (0)
-#else
-#define EnterFunction(level) do {} while (0)
-#define LeaveFunction(level) do {} while (0)
-#endif
-
/* The port number of FTP service (in network order). */
#define FTPPORT cpu_to_be16(21)
#define FTPDATA cpu_to_be16(20)
@@ -604,7 +584,7 @@ struct ip_vs_conn {
spinlock_t lock; /* lock for state transition */
volatile __u16 state; /* state info */
volatile __u16 old_state; /* old state, to be used for
- * state transition triggerd
+ * state transition triggered
* synchronization
*/
__u32 fwmark; /* Fire wall mark from skb */
@@ -630,8 +610,10 @@ struct ip_vs_conn {
*/
struct ip_vs_app *app; /* bound ip_vs_app object */
void *app_data; /* Application private data */
- struct ip_vs_seq in_seq; /* incoming seq. struct */
- struct ip_vs_seq out_seq; /* outgoing seq. struct */
+ struct_group(sync_conn_opt,
+ struct ip_vs_seq in_seq; /* incoming seq. struct */
+ struct ip_vs_seq out_seq; /* outgoing seq. struct */
+ );
const struct ip_vs_pe *pe;
char *pe_data;
@@ -653,7 +635,7 @@ struct ip_vs_service_user_kern {
u16 protocol;
union nf_inet_addr addr; /* virtual ip address */
__be16 port;
- u32 fwmark; /* firwall mark of service */
+ u32 fwmark; /* firewall mark of service */
/* virtual service options */
char *sched_name;
@@ -1054,7 +1036,7 @@ struct netns_ipvs {
struct ipvs_sync_daemon_cfg bcfg; /* Backup Configuration */
/* net name space ptr */
struct net *net; /* Needed by timer routines */
- /* Number of heterogeneous destinations, needed becaus heterogeneous
+ /* Number of heterogeneous destinations, needed because heterogeneous
* are not supported when synchronization is enabled.
*/
unsigned int mixed_address_family_dests;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7332296eca44..0675be0f3fa0 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -752,12 +752,8 @@ static inline u32 ipv6_addr_hash(const struct in6_addr *a)
/* more secured version of ipv6_addr_hash() */
static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 initval)
{
- u32 v = (__force u32)a->s6_addr32[0] ^ (__force u32)a->s6_addr32[1];
-
- return jhash_3words(v,
- (__force u32)a->s6_addr32[2],
- (__force u32)a->s6_addr32[3],
- initval);
+ return jhash2((__force const u32 *)a->s6_addr32,
+ ARRAY_SIZE(a->s6_addr32), initval);
}
static inline bool ipv6_addr_loopback(const struct in6_addr *a)
@@ -941,7 +937,8 @@ static inline bool ipv6_can_nonlocal_bind(struct net *net,
struct inet_sock *inet)
{
return net->ipv6.sysctl.ip_nonlocal_bind ||
- inet->freebind || inet->transparent;
+ test_bit(INET_FLAGS_FREEBIND, &inet->inet_flags) ||
+ test_bit(INET_FLAGS_TRANSPARENT, &inet->inet_flags);
}
/* Sysctl settings for net ipv6.auto_flowlabels */
@@ -1220,6 +1217,7 @@ void inet6_cleanup_sock(struct sock *sk);
void inet6_sock_destruct(struct sock *sk);
int inet6_release(struct socket *sock);
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
+int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len);
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
@@ -1274,7 +1272,9 @@ static inline int snmp6_unregister_dev(struct inet6_dev *idev) { return 0; }
#ifdef CONFIG_SYSCTL
struct ctl_table *ipv6_icmp_sysctl_init(struct net *net);
+size_t ipv6_icmp_sysctl_table_size(void);
struct ctl_table *ipv6_route_sysctl_init(struct net *net);
+size_t ipv6_route_sysctl_table_size(struct net *net);
int ipv6_sysctl_register(void);
void ipv6_sysctl_unregister(void);
#endif
diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index d2ea5863eedc..b2cf243ebe44 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -426,17 +426,10 @@ struct iw_public_data {
/**************************** PROTOTYPES ****************************/
/*
- * Functions part of the Wireless Extensions (defined in net/core/wireless.c).
- * Those may be called only within the kernel.
+ * Functions part of the Wireless Extensions (defined in net/wireless/wext-core.c).
+ * Those may be called by driver modules.
*/
-/* First : function strictly used inside the kernel */
-
-/* Handle /proc/net/wireless, called in net/code/dev.c */
-int dev_get_wireless_info(char *buffer, char **start, off_t offset, int length);
-
-/* Second : functions that may be called by driver modules */
-
/* Send a single event to user space */
void wireless_send_event(struct net_device *dev, unsigned int cmd,
union iwreq_data *wrqu, const char *extra);
diff --git a/include/net/kcm.h b/include/net/kcm.h
index 2d704f8f4905..90279e5e09a5 100644
--- a/include/net/kcm.h
+++ b/include/net/kcm.h
@@ -47,9 +47,9 @@ struct kcm_stats {
struct kcm_tx_msg {
unsigned int sent;
- unsigned int fragidx;
unsigned int frag_offset;
unsigned int msg_flags;
+ bool started_tx;
struct sk_buff *frag_skb;
struct sk_buff *last_skb;
};
diff --git a/include/net/llc_c_ac.h b/include/net/llc_c_ac.h
index 3e1f76786d7b..7620a9196922 100644
--- a/include/net/llc_c_ac.h
+++ b/include/net/llc_c_ac.h
@@ -175,7 +175,6 @@ int llc_conn_ac_send_ack_if_needed(struct sock *sk, struct sk_buff *skb);
int llc_conn_ac_adjust_npta_by_rr(struct sock *sk, struct sk_buff *skb);
int llc_conn_ac_adjust_npta_by_rnr(struct sock *sk, struct sk_buff *skb);
int llc_conn_ac_rst_sendack_flag(struct sock *sk, struct sk_buff *skb);
-int llc_conn_ac_send_i_rsp_as_ack(struct sock *sk, struct sk_buff *skb);
int llc_conn_ac_send_i_as_ack(struct sock *sk, struct sk_buff *skb);
void llc_conn_busy_tmr_cb(struct timer_list *t);
diff --git a/include/net/llc_c_ev.h b/include/net/llc_c_ev.h
index 3948cf111dd0..241889955157 100644
--- a/include/net/llc_c_ev.h
+++ b/include/net/llc_c_ev.h
@@ -158,7 +158,6 @@ int llc_conn_ev_p_tmr_exp(struct sock *sk, struct sk_buff *skb);
int llc_conn_ev_ack_tmr_exp(struct sock *sk, struct sk_buff *skb);
int llc_conn_ev_rej_tmr_exp(struct sock *sk, struct sk_buff *skb);
int llc_conn_ev_busy_tmr_exp(struct sock *sk, struct sk_buff *skb);
-int llc_conn_ev_sendack_tmr_exp(struct sock *sk, struct sk_buff *skb);
/* NOT_USED functions and their variations */
int llc_conn_ev_rx_xxx_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb);
int llc_conn_ev_rx_xxx_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb);
diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h
index 2c1ea3414640..374411b3066c 100644
--- a/include/net/llc_conn.h
+++ b/include/net/llc_conn.h
@@ -111,7 +111,7 @@ void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit);
void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit);
int llc_conn_remove_acked_pdus(struct sock *conn, u8 nr, u16 *how_many_unacked);
struct sock *llc_lookup_established(struct llc_sap *sap, struct llc_addr *daddr,
- struct llc_addr *laddr);
+ struct llc_addr *laddr, const struct net *net);
void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk);
void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk);
diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h
index 49aa79c7b278..7e73f8e5e497 100644
--- a/include/net/llc_pdu.h
+++ b/include/net/llc_pdu.h
@@ -269,7 +269,7 @@ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa)
/**
* llc_pdu_decode_da - extracts dest address of input frame
* @skb: input skb that destination address must be extracted from it
- * @sa: pointer to destination address (6 byte array).
+ * @da: pointer to destination address (6 byte array).
*
* This function extracts destination address(MAC) of input frame.
*/
@@ -321,7 +321,7 @@ static inline void llc_pdu_init_as_ui_cmd(struct sk_buff *skb)
/**
* llc_pdu_init_as_test_cmd - sets PDU as TEST
- * @skb - Address of the skb to build
+ * @skb: Address of the skb to build
*
* Sets a PDU as TEST
*/
@@ -369,6 +369,8 @@ struct llc_xid_info {
/**
* llc_pdu_init_as_xid_cmd - sets bytes 3, 4 & 5 of LLC header as XID
* @skb: input skb that header must be set into it.
+ * @svcs_supported: The class of the LLC (I or II)
+ * @rx_window: The size of the receive window of the LLC
*
* This function sets third,fourth,fifth and sixth bytes of LLC header as
* a XID PDU.
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 6f15e6fa154e..53bd2d02a4f0 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -16,9 +16,12 @@
#define LWTUNNEL_STATE_INPUT_REDIRECT BIT(1)
#define LWTUNNEL_STATE_XMIT_REDIRECT BIT(2)
+/* LWTUNNEL_XMIT_CONTINUE should be distinguishable from dst_output return
+ * values (NET_XMIT_xxx and NETDEV_TX_xxx in linux/netdevice.h) for safety.
+ */
enum {
LWTUNNEL_XMIT_DONE,
- LWTUNNEL_XMIT_CONTINUE,
+ LWTUNNEL_XMIT_CONTINUE = 0x100,
};
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 219fd15893b0..7c707358d15c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -7,7 +7,7 @@
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2022 Intel Corporation
+ * Copyright (C) 2018 - 2023 Intel Corporation
*/
#ifndef MAC80211_H
@@ -534,6 +534,7 @@ struct ieee80211_fils_discovery {
* This structure keeps information about a BSS (and an association
* to that BSS) that can change during the lifetime of the BSS.
*
+ * @vif: reference to owning VIF
* @addr: (link) address used locally
* @link_id: link ID, or 0 for non-MLO
* @htc_trig_based_pkt_ext: default PE in 4us units, if BSS supports HE
@@ -656,6 +657,9 @@ struct ieee80211_fils_discovery {
* write-protected by sdata_lock and local->mtx so holding either is fine
* for read access.
* @color_change_color: the bss color that will be used after the change.
+ * @ht_ldpc: in AP mode, indicates interface has HT LDPC capability.
+ * @vht_ldpc: in AP mode, indicates interface has VHT LDPC capability.
+ * @he_ldpc: in AP mode, indicates interface has HE LDPC capability.
* @vht_su_beamformer: in AP mode, does this BSS support operation as an VHT SU
* beamformer
* @vht_su_beamformee: in AP mode, does this BSS support operation as an VHT SU
@@ -673,8 +677,16 @@ struct ieee80211_fils_discovery {
* @he_full_ul_mumimo: does this BSS support the reception (AP) or transmission
* (non-AP STA) of an HE TB PPDU on an RU that spans the entire PPDU
* bandwidth
+ * @eht_su_beamformer: in AP-mode, does this BSS enable operation as an EHT SU
+ * beamformer
+ * @eht_su_beamformee: in AP-mode, does this BSS enable operation as an EHT SU
+ * beamformee
+ * @eht_mu_beamformer: in AP-mode, does this BSS enable operation as an EHT MU
+ * beamformer
*/
struct ieee80211_bss_conf {
+ struct ieee80211_vif *vif;
+
const u8 *bssid;
unsigned int link_id;
u8 addr[ETH_ALEN] __aligned(2);
@@ -750,6 +762,9 @@ struct ieee80211_bss_conf {
bool color_change_active;
u8 color_change_color;
+ bool ht_ldpc;
+ bool vht_ldpc;
+ bool he_ldpc;
bool vht_su_beamformer;
bool vht_su_beamformee;
bool vht_mu_beamformer;
@@ -758,6 +773,9 @@ struct ieee80211_bss_conf {
bool he_su_beamformee;
bool he_mu_beamformer;
bool he_full_ul_mumimo;
+ bool eht_su_beamformer;
+ bool eht_su_beamformee;
+ bool eht_mu_beamformer;
};
/**
@@ -1174,9 +1192,11 @@ struct ieee80211_tx_info {
u8 ampdu_ack_len;
u8 ampdu_len;
u8 antenna;
+ u8 pad;
u16 tx_time;
u8 flags;
- void *status_driver_data[18 / sizeof(void *)];
+ u8 pad2;
+ void *status_driver_data[16 / sizeof(void *)];
} status;
struct {
struct ieee80211_tx_rate driver_rates[
@@ -1372,9 +1392,12 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
* subframes share the same sequence number. Reported subframes can be
* either regular MSDU or singly A-MSDUs. Subframes must not be
* interleaved with other frames.
- * @RX_FLAG_RADIOTAP_VENDOR_DATA: This frame contains vendor-specific
- * radiotap data in the skb->data (before the frame) as described by
- * the &struct ieee80211_vendor_radiotap.
+ * @RX_FLAG_RADIOTAP_TLV_AT_END: This frame contains radiotap TLVs in the
+ * skb->data (before the 802.11 header).
+ * If used, the SKB's mac_header pointer must be set to point
+ * to the 802.11 header after the TLVs, and any padding added after TLV
+ * data to align to 4 must be cleared by the driver putting the TLVs
+ * in the skb.
* @RX_FLAG_ALLOW_SAME_PN: Allow the same PN as same packet before.
* This is used for AMSDU subframes which can have the same PN as
* the first subframe.
@@ -1426,7 +1449,7 @@ enum mac80211_rx_flags {
RX_FLAG_ONLY_MONITOR = BIT(17),
RX_FLAG_SKIP_MONITOR = BIT(18),
RX_FLAG_AMSDU_MORE = BIT(19),
- RX_FLAG_RADIOTAP_VENDOR_DATA = BIT(20),
+ RX_FLAG_RADIOTAP_TLV_AT_END = BIT(20),
RX_FLAG_MIC_STRIPPED = BIT(21),
RX_FLAG_ALLOW_SAME_PN = BIT(22),
RX_FLAG_ICV_STRIPPED = BIT(23),
@@ -1567,39 +1590,6 @@ ieee80211_rx_status_to_khz(struct ieee80211_rx_status *rx_status)
}
/**
- * struct ieee80211_vendor_radiotap - vendor radiotap data information
- * @present: presence bitmap for this vendor namespace
- * (this could be extended in the future if any vendor needs more
- * bits, the radiotap spec does allow for that)
- * @align: radiotap vendor namespace alignment. This defines the needed
- * alignment for the @data field below, not for the vendor namespace
- * description itself (which has a fixed 2-byte alignment)
- * Must be a power of two, and be set to at least 1!
- * @oui: radiotap vendor namespace OUI
- * @subns: radiotap vendor sub namespace
- * @len: radiotap vendor sub namespace skip length, if alignment is done
- * then that's added to this, i.e. this is only the length of the
- * @data field.
- * @pad: number of bytes of padding after the @data, this exists so that
- * the skb data alignment can be preserved even if the data has odd
- * length
- * @data: the actual vendor namespace data
- *
- * This struct, including the vendor data, goes into the skb->data before
- * the 802.11 header. It's split up in mac80211 using the align/oui/subns
- * data.
- */
-struct ieee80211_vendor_radiotap {
- u32 present;
- u8 align;
- u8 oui[3];
- u8 subns;
- u8 pad;
- u16 len;
- u8 data[];
-} __packed;
-
-/**
* enum ieee80211_conf_flags - configuration flags
*
* Flags to define PHY configuration options
@@ -1767,12 +1757,15 @@ struct ieee80211_channel_switch {
* @IEEE80211_VIF_GET_NOA_UPDATE: request to handle NOA attributes
* and send P2P_PS notification to the driver if NOA changed, even
* this is not pure P2P vif.
+ * @IEEE80211_VIF_DISABLE_SMPS_OVERRIDE: disable user configuration of
+ * SMPS mode via debugfs.
*/
enum ieee80211_vif_flags {
IEEE80211_VIF_BEACON_FILTER = BIT(0),
IEEE80211_VIF_SUPPORTS_CQM_RSSI = BIT(1),
IEEE80211_VIF_SUPPORTS_UAPSD = BIT(2),
IEEE80211_VIF_GET_NOA_UPDATE = BIT(3),
+ IEEE80211_VIF_DISABLE_SMPS_OVERRIDE = BIT(4),
};
@@ -1802,6 +1795,9 @@ enum ieee80211_offload_flags {
* @ps: power-save mode (STA only). This flag is NOT affected by
* offchannel/dynamic_ps operations.
* @aid: association ID number, valid only when @assoc is true
+ * @eml_cap: EML capabilities as described in P802.11be_D2.2 Figure 9-1002k.
+ * @eml_med_sync_delay: Medium Synchronization delay as described in
+ * P802.11be_D2.2 Figure 9-1002j.
* @arp_addr_list: List of IPv4 addresses for hardware ARP filtering. The
* may filter ARP queries targeted for other addresses than listed here.
* The driver must allow ARP queries targeted for all address listed here
@@ -1824,6 +1820,8 @@ struct ieee80211_vif_cfg {
bool ibss_creator;
bool ps;
u16 aid;
+ u16 eml_cap;
+ u16 eml_med_sync_delay;
__be32 arp_addr_list[IEEE80211_BSS_ARP_ADDR_LIST_LEN];
int arp_addr_cnt;
@@ -1850,6 +1848,8 @@ struct ieee80211_vif_cfg {
* @active_links: The bitmap of active links, or 0 for non-MLO.
* The driver shouldn't change this directly, but use the
* API calls meant for that purpose.
+ * @dormant_links: bitmap of valid but disabled links, or 0 for non-MLO.
+ * Must be a subset of valid_links.
* @addr: address of this interface
* @p2p: indicates whether this AP or STA interface is a p2p
* interface, i.e. a GO or p2p-sta respectively
@@ -1887,7 +1887,7 @@ struct ieee80211_vif {
struct ieee80211_vif_cfg cfg;
struct ieee80211_bss_conf bss_conf;
struct ieee80211_bss_conf __rcu *link_conf[IEEE80211_MLD_MAX_NUM_LINKS];
- u16 valid_links, active_links;
+ u16 valid_links, active_links, dormant_links;
u8 addr[ETH_ALEN] __aligned(2);
bool p2p;
@@ -1913,6 +1913,27 @@ struct ieee80211_vif {
u8 drv_priv[] __aligned(sizeof(void *));
};
+/**
+ * ieee80211_vif_usable_links - Return the usable links for the vif
+ * @vif: the vif for which the usable links are requested
+ * Return: the usable link bitmap
+ */
+static inline u16 ieee80211_vif_usable_links(const struct ieee80211_vif *vif)
+{
+ return vif->valid_links & ~vif->dormant_links;
+}
+
+/**
+ * ieee80211_vif_is_mld - Returns true iff the vif is an MLD one
+ * @vif: the vif
+ * Return: %true if the vif is an MLD, %false otherwise.
+ */
+static inline bool ieee80211_vif_is_mld(const struct ieee80211_vif *vif)
+{
+ /* valid_links != 0 indicates this vif is an MLD */
+ return vif->valid_links != 0;
+}
+
#define for_each_vif_active_link(vif, link, link_id) \
for (link_id = 0; link_id < ARRAY_SIZE((vif)->link_conf); link_id++) \
if ((!(vif)->active_links || \
@@ -2240,6 +2261,7 @@ struct ieee80211_sta_aggregates {
* @he_cap: HE capabilities of this STA
* @he_6ghz_capa: on 6 GHz, holds the HE 6 GHz band capabilities
* @eht_cap: EHT capabilities of this STA
+ * @agg: per-link data for multi-link aggregation
* @bandwidth: current bandwidth the station can receive with
* @rx_nss: in HT/VHT, the maximum number of spatial streams the
* station can receive at the moment, changed by operating mode
@@ -3841,6 +3863,12 @@ struct ieee80211_prep_tx_info {
* the station. See @sta_pre_rcu_remove if needed.
* This callback can sleep.
*
+ * @link_add_debugfs: Drivers can use this callback to add debugfs files
+ * when a link is added to a mac80211 vif. This callback should be within
+ * a CONFIG_MAC80211_DEBUGFS conditional. This callback can sleep.
+ * For non-MLO the callback will be called once for the default bss_conf
+ * with the vif's directory rather than a separate subdirectory.
+ *
* @sta_add_debugfs: Drivers can use this callback to add debugfs files
* when a station is added to mac80211's station list. This callback
* should be within a CONFIG_MAC80211_DEBUGFS conditional. This
@@ -3848,7 +3876,7 @@ struct ieee80211_prep_tx_info {
*
* @link_sta_add_debugfs: Drivers can use this callback to add debugfs files
* when a link is added to a mac80211 station. This callback
- * should be within a CPTCFG_MAC80211_DEBUGFS conditional. This
+ * should be within a CONFIG_MAC80211_DEBUGFS conditional. This
* callback can sleep.
* For non-MLO the callback will be called once for the deflink with the
* station's directory rather than a separate subdirectory.
@@ -3956,6 +3984,10 @@ struct ieee80211_prep_tx_info {
* Note that vif can be NULL.
* The callback can sleep.
*
+ * @flush_sta: Flush or drop all pending frames from the hardware queue(s) for
+ * the given station, as it's about to be removed.
+ * The callback can sleep.
+ *
* @channel_switch: Drivers that need (or want) to offload the channel
* switch operation for CSAs received from the AP may implement this
* callback. They must then call ieee80211_chswitch_done() to indicate
@@ -4230,6 +4262,13 @@ struct ieee80211_prep_tx_info {
* Note that a sta can also be inserted or removed with valid links,
* i.e. passed to @sta_add/@sta_state with sta->valid_links not zero.
* In fact, cannot change from having valid_links and not having them.
+ * @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames. This is
+ * not restored at HW reset by mac80211 so drivers need to take care of
+ * that.
+ * @net_setup_tc: Called from .ndo_setup_tc in order to prepare hardware
+ * flow offloading for flows originating from the vif.
+ * Note that the driver must not assume that the vif driver_data is valid
+ * at this point, since the callback can be called during netdev teardown.
*/
struct ieee80211_ops {
void (*tx)(struct ieee80211_hw *hw,
@@ -4319,6 +4358,10 @@ struct ieee80211_ops {
int (*sta_remove)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
struct ieee80211_sta *sta);
#ifdef CONFIG_MAC80211_DEBUGFS
+ void (*link_add_debugfs)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf,
+ struct dentry *dir);
void (*sta_add_debugfs)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta,
@@ -4410,6 +4453,8 @@ struct ieee80211_ops {
#endif
void (*flush)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
u32 queues, bool drop);
+ void (*flush_sta)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta);
void (*channel_switch)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_channel_switch *ch_switch);
@@ -4589,6 +4634,14 @@ struct ieee80211_ops {
struct ieee80211_vif *vif,
struct ieee80211_sta *sta,
u16 old_links, u16 new_links);
+ int (*set_hw_timestamp)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct cfg80211_set_hw_timestamp *hwts);
+ int (*net_setup_tc)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct net_device *dev,
+ enum tc_setup_type type,
+ void *type_data);
};
/**
@@ -5197,26 +5250,6 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw,
struct sk_buff *skb);
/**
- * ieee80211_tx_status_8023 - transmit status callback for 802.3 frame format
- *
- * Call this function for all transmitted data frames after their transmit
- * completion. This callback should only be called for data frames which
- * are using driver's (or hardware's) offload capability of encap/decap
- * 802.11 frames.
- *
- * This function may not be called in IRQ context. Calls to this function
- * for a single hardware must be synchronized against each other and all
- * calls in the same tx status family.
- *
- * @hw: the hardware the frame was transmitted by
- * @vif: the interface for which the frame was transmitted
- * @skb: the frame that was transmitted, owned by mac80211 after this call
- */
-void ieee80211_tx_status_8023(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif,
- struct sk_buff *skb);
-
-/**
* ieee80211_report_low_ack - report non-responding station
*
* When operating in AP-mode, call this function to report a non-responding
@@ -5252,7 +5285,8 @@ struct ieee80211_mutable_offsets {
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
* @offs: &struct ieee80211_mutable_offsets pointer to struct that will
* receive the offsets that may be updated by the driver.
- * @link_id: the link id to which the beacon belongs (or 0 for a non-MLD AP)
+ * @link_id: the link id to which the beacon belongs (or 0 for an AP STA
+ * that is not associated with AP MLD).
*
* If the driver implements beaconing modes, it must use this function to
* obtain the beacon template.
@@ -5273,6 +5307,74 @@ ieee80211_beacon_get_template(struct ieee80211_hw *hw,
unsigned int link_id);
/**
+ * ieee80211_beacon_get_template_ema_index - EMA beacon template generation
+ * @hw: pointer obtained from ieee80211_alloc_hw().
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @offs: &struct ieee80211_mutable_offsets pointer to struct that will
+ * receive the offsets that may be updated by the driver.
+ * @link_id: the link id to which the beacon belongs (or 0 for a non-MLD AP).
+ * @ema_index: index of the beacon in the EMA set.
+ *
+ * This function follows the same rules as ieee80211_beacon_get_template()
+ * but returns a beacon template which includes multiple BSSID element at the
+ * requested index.
+ *
+ * Return: The beacon template. %NULL indicates the end of EMA templates.
+ */
+struct sk_buff *
+ieee80211_beacon_get_template_ema_index(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_mutable_offsets *offs,
+ unsigned int link_id, u8 ema_index);
+
+/**
+ * struct ieee80211_ema_beacons - List of EMA beacons
+ * @cnt: count of EMA beacons.
+ *
+ * @bcn: array of EMA beacons.
+ * @bcn.skb: the skb containing this specific beacon
+ * @bcn.offs: &struct ieee80211_mutable_offsets pointer to struct that will
+ * receive the offsets that may be updated by the driver.
+ */
+struct ieee80211_ema_beacons {
+ u8 cnt;
+ struct {
+ struct sk_buff *skb;
+ struct ieee80211_mutable_offsets offs;
+ } bcn[];
+};
+
+/**
+ * ieee80211_beacon_get_template_ema_list - EMA beacon template generation
+ * @hw: pointer obtained from ieee80211_alloc_hw().
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @link_id: the link id to which the beacon belongs (or 0 for a non-MLD AP)
+ *
+ * This function follows the same rules as ieee80211_beacon_get_template()
+ * but allocates and returns a pointer to list of all beacon templates required
+ * to cover all profiles in the multiple BSSID set. Each template includes only
+ * one multiple BSSID element.
+ *
+ * Driver must call ieee80211_beacon_free_ema_list() to free the memory.
+ *
+ * Return: EMA beacon templates of type struct ieee80211_ema_beacons *.
+ * %NULL on error.
+ */
+struct ieee80211_ema_beacons *
+ieee80211_beacon_get_template_ema_list(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ unsigned int link_id);
+
+/**
+ * ieee80211_beacon_free_ema_list - free an EMA beacon template list
+ * @ema_beacons: list of EMA beacons of type &struct ieee80211_ema_beacons pointers.
+ *
+ * This function will free a list previously acquired by calling
+ * ieee80211_beacon_get_template_ema_list()
+ */
+void ieee80211_beacon_free_ema_list(struct ieee80211_ema_beacons *ema_beacons);
+
+/**
* ieee80211_beacon_get_tim - beacon generation function
* @hw: pointer obtained from ieee80211_alloc_hw().
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
@@ -5281,7 +5383,8 @@ ieee80211_beacon_get_template(struct ieee80211_hw *hw,
* @tim_length: pointer to variable that will receive the TIM IE length,
* (including the ID and length bytes!).
* Set to 0 if invalid (in non-AP modes).
- * @link_id: the link id to which the beacon belongs (or 0 for a non-MLD AP)
+ * @link_id: the link id to which the beacon belongs (or 0 for an AP STA
+ * that is not associated with AP MLD).
*
* If the driver implements beaconing modes, it must use this function to
* obtain the beacon frame.
@@ -5304,7 +5407,8 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
* ieee80211_beacon_get - beacon generation function
* @hw: pointer obtained from ieee80211_alloc_hw().
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
- * @link_id: the link id to which the beacon belongs (or 0 for a non-MLD AP)
+ * @link_id: the link id to which the beacon belongs (or 0 for an AP STA
+ * that is not associated with AP MLD).
*
* See ieee80211_beacon_get_tim().
*
@@ -5985,6 +6089,20 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw,
unsigned long delay);
/**
+ * ieee80211_refresh_tx_agg_session_timer - Refresh a tx agg session timer.
+ * @sta: the station for which to start a BA session
+ * @tid: the TID to BA on.
+ *
+ * This function allows low level driver to refresh tx agg session timer
+ * to maintain BA session, the session level will still be managed by the
+ * mac80211.
+ *
+ * Note: must be called in an RCU critical section.
+ */
+void ieee80211_refresh_tx_agg_session_timer(struct ieee80211_sta *sta,
+ u16 tid);
+
+/**
* ieee80211_start_tx_ba_session - Start a tx Block Ack session.
* @sta: the station for which to start a BA session
* @tid: the TID to BA on.
@@ -6497,6 +6615,7 @@ void ieee80211_stop_rx_ba_session(struct ieee80211_vif *vif, u16 ba_rx_bitmap,
* marks frames marked in the bitmap as having been filtered. Afterwards, it
* checks if any frames in the window starting from @ssn can now be released
* (in case they were only waiting for frames that were filtered.)
+ * (Only work correctly if @max_rx_aggregation_subframes <= 64 frames)
*/
void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
u16 ssn, u64 filtered,
@@ -6781,6 +6900,48 @@ ieee80211_vif_type_p2p(struct ieee80211_vif *vif)
}
/**
+ * ieee80211_get_he_iftype_cap_vif - return HE capabilities for sband/vif
+ * @sband: the sband to search for the iftype on
+ * @vif: the vif to get the iftype from
+ *
+ * Return: pointer to the struct ieee80211_sta_he_cap, or %NULL is none found
+ */
+static inline const struct ieee80211_sta_he_cap *
+ieee80211_get_he_iftype_cap_vif(const struct ieee80211_supported_band *sband,
+ struct ieee80211_vif *vif)
+{
+ return ieee80211_get_he_iftype_cap(sband, ieee80211_vif_type_p2p(vif));
+}
+
+/**
+ * ieee80211_get_he_6ghz_capa_vif - return HE 6 GHz capabilities
+ * @sband: the sband to search for the STA on
+ * @vif: the vif to get the iftype from
+ *
+ * Return: the 6GHz capabilities
+ */
+static inline __le16
+ieee80211_get_he_6ghz_capa_vif(const struct ieee80211_supported_band *sband,
+ struct ieee80211_vif *vif)
+{
+ return ieee80211_get_he_6ghz_capa(sband, ieee80211_vif_type_p2p(vif));
+}
+
+/**
+ * ieee80211_get_eht_iftype_cap_vif - return ETH capabilities for sband/vif
+ * @sband: the sband to search for the iftype on
+ * @vif: the vif to get the iftype from
+ *
+ * Return: pointer to the struct ieee80211_sta_eht_cap, or %NULL is none found
+ */
+static inline const struct ieee80211_sta_eht_cap *
+ieee80211_get_eht_iftype_cap_vif(const struct ieee80211_supported_band *sband,
+ struct ieee80211_vif *vif)
+{
+ return ieee80211_get_eht_iftype_cap(sband, ieee80211_vif_type_p2p(vif));
+}
+
+/**
* ieee80211_update_mu_groups - set the VHT MU-MIMO groud data
*
* @vif: the specified virtual interface
diff --git a/include/net/macsec.h b/include/net/macsec.h
index 5b9c61c4d3a6..75a6f4863c83 100644
--- a/include/net/macsec.h
+++ b/include/net/macsec.h
@@ -8,6 +8,7 @@
#define _NET_MACSEC_H_
#include <linux/u64_stats_sync.h>
+#include <linux/if_vlan.h>
#include <uapi/linux/if_link.h>
#include <uapi/linux/if_macsec.h>
@@ -311,5 +312,16 @@ static inline bool macsec_send_sci(const struct macsec_secy *secy)
return tx_sc->send_sci ||
(secy->n_rx_sc > 1 && !tx_sc->end_station && !tx_sc->scb);
}
+struct net_device *macsec_get_real_dev(const struct net_device *dev);
+bool macsec_netdev_is_offloaded(struct net_device *dev);
+
+static inline void *macsec_netdev_priv(const struct net_device *dev)
+{
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+ if (is_vlan_dev(dev))
+ return netdev_priv(vlan_dev_priv(dev)->real_dev);
+#endif
+ return netdev_priv(dev);
+}
#endif /* _NET_MACSEC_H_ */
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 56189e4252da..88b6ef7ce1a6 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -33,6 +33,7 @@ enum gdma_request_type {
GDMA_DESTROY_PD = 30,
GDMA_CREATE_MR = 31,
GDMA_DESTROY_MR = 32,
+ GDMA_QUERY_HWC_TIMEOUT = 84, /* 0x54 */
};
#define GDMA_RESOURCE_DOORBELL_PAGE 27
@@ -57,6 +58,8 @@ enum gdma_eqe_type {
GDMA_EQE_HWC_INIT_EQ_ID_DB = 129,
GDMA_EQE_HWC_INIT_DATA = 130,
GDMA_EQE_HWC_INIT_DONE = 131,
+ GDMA_EQE_HWC_SOC_RECONFIG = 132,
+ GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
};
enum {
@@ -145,6 +148,7 @@ struct gdma_general_req {
}; /* HW DATA */
#define GDMA_MESSAGE_V1 1
+#define GDMA_MESSAGE_V2 2
struct gdma_general_resp {
struct gdma_resp_hdr hdr;
@@ -354,6 +358,9 @@ struct gdma_context {
struct gdma_resource msix_resource;
struct gdma_irq_context *irq_contexts;
+ /* L2 MTU */
+ u16 adapter_mtu;
+
/* This maps a CQ index to the queue structure. */
unsigned int max_num_cqs;
struct gdma_queue **cq_table;
@@ -527,10 +534,12 @@ enum {
* so the driver is able to reliably support features like busy_poll.
*/
#define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2)
+#define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3)
#define GDMA_DRV_CAP_FLAGS1 \
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
- GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX)
+ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
+ GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG)
#define GDMA_DRV_CAP_FLAGS2 0
@@ -660,6 +669,19 @@ struct gdma_disable_queue_req {
u32 alloc_res_id_on_creation;
}; /* HW DATA */
+/* GDMA_QUERY_HWC_TIMEOUT */
+struct gdma_query_hwc_timeout_req {
+ struct gdma_req_hdr hdr;
+ u32 timeout_ms;
+ u32 reserved;
+};
+
+struct gdma_query_hwc_timeout_resp {
+ struct gdma_resp_hdr hdr;
+ u32 timeout_ms;
+ u32 reserved;
+};
+
enum atb_page_size {
ATB_PAGE_SIZE_4K,
ATB_PAGE_SIZE_8K,
diff --git a/include/net/mana/hw_channel.h b/include/net/mana/hw_channel.h
index 6a757a6e2732..3d3b5c881bc1 100644
--- a/include/net/mana/hw_channel.h
+++ b/include/net/mana/hw_channel.h
@@ -23,6 +23,10 @@
#define HWC_INIT_DATA_PF_DEST_RQ_ID 10
#define HWC_INIT_DATA_PF_DEST_CQ_ID 11
+#define HWC_DATA_CFG_HWC_TIMEOUT 1
+
+#define HW_CHANNEL_WAIT_RESOURCE_TIMEOUT_MS 30000
+
/* Structures labeled with "HW DATA" are exchanged with the hardware. All of
* them are naturally aligned and hence don't need __packed.
*/
@@ -182,6 +186,7 @@ struct hw_channel_context {
u32 pf_dest_vrq_id;
u32 pf_dest_vrcq_id;
+ u32 hwc_timeout;
struct hwc_caller_ctx *caller_ctx;
};
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 3bb579962a14..9f70b4332238 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -4,6 +4,8 @@
#ifndef _MANA_H
#define _MANA_H
+#include <net/xdp.h>
+
#include "gdma.h"
#include "hw_channel.h"
@@ -36,10 +38,8 @@ enum TRI_STATE {
#define COMP_ENTRY_SIZE 64
-#define ADAPTER_MTU_SIZE 1500
-#define MAX_FRAME_SIZE (ADAPTER_MTU_SIZE + 14)
-
#define RX_BUFFERS_PER_QUEUE 512
+#define MANA_RX_DATA_ALIGN 64
#define MAX_SEND_BUFFERS_PER_QUEUE 256
@@ -48,6 +48,10 @@ enum TRI_STATE {
#define MAX_PORTS_IN_MANA_DEV 256
+/* Update this count whenever the respective structures are changed */
+#define MANA_STATS_RX_COUNT 5
+#define MANA_STATS_TX_COUNT 11
+
struct mana_stats_rx {
u64 packets;
u64 bytes;
@@ -61,6 +65,14 @@ struct mana_stats_tx {
u64 packets;
u64 bytes;
u64 xdp_xmit;
+ u64 tso_packets;
+ u64 tso_bytes;
+ u64 tso_inner_packets;
+ u64 tso_inner_bytes;
+ u64 short_pkt_fmt;
+ u64 long_pkt_fmt;
+ u64 csum_partial;
+ u64 mana_map_err;
struct u64_stats_sync syncp;
};
@@ -270,7 +282,7 @@ struct mana_recv_buf_oob {
struct gdma_wqe_request wqe_req;
void *buf_va;
- dma_addr_t buf_dma_addr;
+ bool from_pool; /* allocated from a page pool */
/* SGL of the buffer going to be sent has part of the work request. */
u32 num_sge;
@@ -283,6 +295,11 @@ struct mana_recv_buf_oob {
struct gdma_posted_wqe_info wqe_inf;
};
+#define MANA_RXBUF_PAD (SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) \
+ + ETH_HLEN)
+
+#define MANA_XDP_MTU_MAX (PAGE_SIZE - MANA_RXBUF_PAD - XDP_PACKET_HEADROOM)
+
struct mana_rxq {
struct gdma_queue *gdma_rq;
/* Cache the gdma receive queue id */
@@ -292,6 +309,8 @@ struct mana_rxq {
u32 rxq_idx;
u32 datasize;
+ u32 alloc_size;
+ u32 headroom;
mana_handle_t rxobj;
@@ -310,10 +329,12 @@ struct mana_rxq {
struct bpf_prog __rcu *bpf_prog;
struct xdp_rxq_info xdp_rxq;
- struct page *xdp_save_page;
+ void *xdp_save_va; /* for reusing */
bool xdp_flush;
int xdp_rc; /* XDP redirect return code */
+ struct page_pool *page_pool;
+
/* MUST BE THE LAST MEMBER:
* Each receive buffer has an associated mana_recv_buf_oob.
*/
@@ -331,6 +352,17 @@ struct mana_tx_qp {
struct mana_ethtool_stats {
u64 stop_queue;
u64 wake_queue;
+ u64 hc_tx_bytes;
+ u64 hc_tx_ucast_pkts;
+ u64 hc_tx_ucast_bytes;
+ u64 hc_tx_bcast_pkts;
+ u64 hc_tx_bcast_bytes;
+ u64 hc_tx_mcast_pkts;
+ u64 hc_tx_mcast_bytes;
+ u64 tx_cqe_err;
+ u64 tx_cqe_unknown_type;
+ u64 rx_coalesced_err;
+ u64 rx_cqe_unknown_type;
};
struct mana_context {
@@ -369,6 +401,14 @@ struct mana_port_context {
/* This points to an array of num_queues of RQ pointers. */
struct mana_rxq **rxqs;
+ /* pre-allocated rx buffer array */
+ void **rxbufs_pre;
+ dma_addr_t *das_pre;
+ int rxbpre_total;
+ u32 rxbpre_datasize;
+ u32 rxbpre_alloc_size;
+ u32 rxbpre_headroom;
+
struct bpf_prog *bpf_prog;
/* Create num_queues EQs, SQs, SQ-CQs, RQs and RQ-CQs, respectively. */
@@ -409,6 +449,7 @@ u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq,
struct bpf_prog *mana_xdp_get(struct mana_port_context *apc);
void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog);
int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
+void mana_query_gf_stats(struct mana_port_context *apc);
extern const struct ethtool_ops mana_ethtool_ops;
@@ -468,6 +509,11 @@ struct mana_query_device_cfg_resp {
u16 max_num_vports;
u16 reserved;
u32 max_num_eqs;
+
+ /* response v2: */
+ u16 adapter_mtu;
+ u16 reserved2;
+ u32 reserved3;
}; /* HW DATA */
/* Query vPort Configuration */
@@ -545,8 +591,51 @@ struct mana_fence_rq_resp {
struct gdma_resp_hdr hdr;
}; /* HW DATA */
+/* Query stats RQ */
+struct mana_query_gf_stat_req {
+ struct gdma_req_hdr hdr;
+ u64 req_stats;
+}; /* HW DATA */
+
+struct mana_query_gf_stat_resp {
+ struct gdma_resp_hdr hdr;
+ u64 reported_stats;
+ /* rx errors/discards */
+ u64 discard_rx_nowqe;
+ u64 err_rx_vport_disabled;
+ /* rx bytes/packets */
+ u64 hc_rx_bytes;
+ u64 hc_rx_ucast_pkts;
+ u64 hc_rx_ucast_bytes;
+ u64 hc_rx_bcast_pkts;
+ u64 hc_rx_bcast_bytes;
+ u64 hc_rx_mcast_pkts;
+ u64 hc_rx_mcast_bytes;
+ /* tx errors */
+ u64 err_tx_gf_disabled;
+ u64 err_tx_vport_disabled;
+ u64 err_tx_inval_vport_offset_pkt;
+ u64 err_tx_vlan_enforcement;
+ u64 err_tx_ethtype_enforcement;
+ u64 err_tx_SA_enforecement;
+ u64 err_tx_SQPDID_enforcement;
+ u64 err_tx_CQPDID_enforcement;
+ u64 err_tx_mtu_violation;
+ u64 err_tx_inval_oob;
+ /* tx bytes/packets */
+ u64 hc_tx_bytes;
+ u64 hc_tx_ucast_pkts;
+ u64 hc_tx_ucast_bytes;
+ u64 hc_tx_bcast_pkts;
+ u64 hc_tx_bcast_bytes;
+ u64 hc_tx_mcast_pkts;
+ u64 hc_tx_mcast_bytes;
+ /* tx error */
+ u64 err_tx_gdma;
+}; /* HW DATA */
+
/* Configure vPort Rx Steering */
-struct mana_cfg_rx_steer_req {
+struct mana_cfg_rx_steer_req_v2 {
struct gdma_req_hdr hdr;
mana_handle_t vport;
u16 num_indir_entries;
@@ -559,6 +648,8 @@ struct mana_cfg_rx_steer_req {
u8 reserved;
mana_handle_t default_rxobj;
u8 hashkey[MANA_HASH_KEY_SIZE];
+ u8 cqe_coalescing_enable;
+ u8 reserved2[7];
}; /* HW DATA */
struct mana_cfg_rx_steer_resp {
@@ -622,6 +713,42 @@ struct mana_deregister_filter_resp {
struct gdma_resp_hdr hdr;
}; /* HW DATA */
+/* Requested GF stats Flags */
+/* Rx discards/Errors */
+#define STATISTICS_FLAGS_RX_DISCARDS_NO_WQE 0x0000000000000001
+#define STATISTICS_FLAGS_RX_ERRORS_VPORT_DISABLED 0x0000000000000002
+/* Rx bytes/pkts */
+#define STATISTICS_FLAGS_HC_RX_BYTES 0x0000000000000004
+#define STATISTICS_FLAGS_HC_RX_UCAST_PACKETS 0x0000000000000008
+#define STATISTICS_FLAGS_HC_RX_UCAST_BYTES 0x0000000000000010
+#define STATISTICS_FLAGS_HC_RX_MCAST_PACKETS 0x0000000000000020
+#define STATISTICS_FLAGS_HC_RX_MCAST_BYTES 0x0000000000000040
+#define STATISTICS_FLAGS_HC_RX_BCAST_PACKETS 0x0000000000000080
+#define STATISTICS_FLAGS_HC_RX_BCAST_BYTES 0x0000000000000100
+/* Tx errors */
+#define STATISTICS_FLAGS_TX_ERRORS_GF_DISABLED 0x0000000000000200
+#define STATISTICS_FLAGS_TX_ERRORS_VPORT_DISABLED 0x0000000000000400
+#define STATISTICS_FLAGS_TX_ERRORS_INVAL_VPORT_OFFSET_PACKETS \
+ 0x0000000000000800
+#define STATISTICS_FLAGS_TX_ERRORS_VLAN_ENFORCEMENT 0x0000000000001000
+#define STATISTICS_FLAGS_TX_ERRORS_ETH_TYPE_ENFORCEMENT \
+ 0x0000000000002000
+#define STATISTICS_FLAGS_TX_ERRORS_SA_ENFORCEMENT 0x0000000000004000
+#define STATISTICS_FLAGS_TX_ERRORS_SQPDID_ENFORCEMENT 0x0000000000008000
+#define STATISTICS_FLAGS_TX_ERRORS_CQPDID_ENFORCEMENT 0x0000000000010000
+#define STATISTICS_FLAGS_TX_ERRORS_MTU_VIOLATION 0x0000000000020000
+#define STATISTICS_FLAGS_TX_ERRORS_INVALID_OOB 0x0000000000040000
+/* Tx bytes/pkts */
+#define STATISTICS_FLAGS_HC_TX_BYTES 0x0000000000080000
+#define STATISTICS_FLAGS_HC_TX_UCAST_PACKETS 0x0000000000100000
+#define STATISTICS_FLAGS_HC_TX_UCAST_BYTES 0x0000000000200000
+#define STATISTICS_FLAGS_HC_TX_MCAST_PACKETS 0x0000000000400000
+#define STATISTICS_FLAGS_HC_TX_MCAST_BYTES 0x0000000000800000
+#define STATISTICS_FLAGS_HC_TX_BCAST_PACKETS 0x0000000001000000
+#define STATISTICS_FLAGS_HC_TX_BCAST_BYTES 0x0000000002000000
+/* Tx error */
+#define STATISTICS_FLAGS_TX_ERRORS_GDMA_ERROR 0x0000000004000000
+
#define MANA_MAX_NUM_QUEUES 64
#define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1)
diff --git a/include/net/mctp.h b/include/net/mctp.h
index 82800d521c3d..da86e106c91d 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -234,9 +234,9 @@ struct mctp_flow {
struct mctp_route {
mctp_eid_t min, max;
- struct mctp_dev *dev;
- unsigned int mtu;
unsigned char type;
+ unsigned int mtu;
+ struct mctp_dev *dev;
int (*output)(struct mctp_route *route,
struct sk_buff *skb);
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 3c5c68618fcc..fb996124b3d5 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -96,6 +96,27 @@ struct mptcp_out_options {
#endif
};
+#define MPTCP_SCHED_NAME_MAX 16
+#define MPTCP_SUBFLOWS_MAX 8
+
+struct mptcp_sched_data {
+ bool reinject;
+ u8 subflows;
+ struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX];
+};
+
+struct mptcp_sched_ops {
+ int (*get_subflow)(struct mptcp_sock *msk,
+ struct mptcp_sched_data *data);
+
+ char name[MPTCP_SCHED_NAME_MAX];
+ struct module *owner;
+ struct list_head list;
+
+ void (*init)(struct mptcp_sock *msk);
+ void (*release)(struct mptcp_sock *msk);
+} ____cacheline_aligned_in_smp;
+
#ifdef CONFIG_MPTCP
void mptcp_init(void);
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 07e5168cdaf9..9bbdf6eaa942 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -395,11 +395,11 @@ static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, cons
{
struct neighbour *n;
- rcu_read_lock_bh();
+ rcu_read_lock();
n = __ipv6_neigh_lookup_noref(dev, pkey);
if (n && !refcount_inc_not_zero(&n->refcnt))
n = NULL;
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return n;
}
@@ -409,10 +409,10 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev,
{
struct neighbour *n;
- rcu_read_lock_bh();
+ rcu_read_lock();
n = __ipv6_neigh_lookup_noref(dev, pkey);
neigh_confirm(n);
- rcu_read_unlock_bh();
+ rcu_read_unlock();
}
static inline void __ipv6_confirm_neigh_stub(struct net_device *dev,
@@ -420,10 +420,10 @@ static inline void __ipv6_confirm_neigh_stub(struct net_device *dev,
{
struct neighbour *n;
- rcu_read_lock_bh();
+ rcu_read_lock();
n = __ipv6_neigh_lookup_noref_stub(dev, pkey);
neigh_confirm(n);
- rcu_read_unlock_bh();
+ rcu_read_unlock();
}
/* uses ipv6_stub and is meant for use outside of IPv6 core */
@@ -488,9 +488,6 @@ void igmp6_event_report(struct sk_buff *skb);
#ifdef CONFIG_SYSCTL
int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
-int ndisc_ifinfo_sysctl_strategy(struct ctl_table *ctl,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen);
#endif
void inet6_ifinfo_notify(int event, struct inet6_dev *idev);
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 2f2a6023fb0e..6da68886fabb 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -180,7 +180,7 @@ struct pneigh_entry {
netdevice_tracker dev_tracker;
u32 flags;
u8 protocol;
- u8 key[];
+ u32 key[];
};
/*
@@ -299,14 +299,14 @@ static inline struct neighbour *___neigh_lookup_noref(
const void *pkey,
struct net_device *dev)
{
- struct neigh_hash_table *nht = rcu_dereference_bh(tbl->nht);
+ struct neigh_hash_table *nht = rcu_dereference(tbl->nht);
struct neighbour *n;
u32 hash_val;
hash_val = hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
- for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
+ for (n = rcu_dereference(nht->hash_buckets[hash_val]);
n != NULL;
- n = rcu_dereference_bh(n->next)) {
+ n = rcu_dereference(n->next)) {
if (n->dev == dev && key_eq(n, pkey))
return n;
}
@@ -336,8 +336,6 @@ void neigh_table_init(int index, struct neigh_table *tbl);
int neigh_table_clear(int index, struct neigh_table *tbl);
struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
struct net_device *dev);
-struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
- const void *pkey);
struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev, bool want_ref);
static inline struct neighbour *neigh_create(struct neigh_table *tbl,
@@ -396,8 +394,6 @@ void neigh_for_each(struct neigh_table *tbl,
void __neigh_for_each_release(struct neigh_table *tbl,
int (*cb)(struct neighbour *));
int neigh_xmit(int fam, struct net_device *, const void *, struct sk_buff *);
-void pneigh_for_each(struct neigh_table *tbl,
- void (*cb)(struct pneigh_entry *));
struct neigh_seq_state {
struct seq_net_private p;
@@ -466,7 +462,7 @@ static __always_inline int neigh_event_send_probe(struct neighbour *neigh,
if (READ_ONCE(neigh->used) != now)
WRITE_ONCE(neigh->used, now);
- if (!(neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)))
+ if (!(READ_ONCE(neigh->nud_state) & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)))
return __neigh_event_send(neigh, skb, immediate_ok);
return 0;
}
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 78beaa765c73..eb6cd43b1746 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -42,6 +42,7 @@
#include <linux/idr.h>
#include <linux/skbuff.h>
#include <linux/notifier.h>
+#include <linux/xarray.h>
struct user_namespace;
struct proc_dir_entry;
@@ -69,7 +70,7 @@ struct net {
atomic_t dev_unreg_count;
unsigned int dev_base_seq; /* protected by rtnl_mutex */
- int ifindex;
+ u32 ifindex;
spinlock_t nsid_lock;
atomic_t fnhe_genid;
@@ -110,6 +111,7 @@ struct net {
struct hlist_head *dev_name_head;
struct hlist_head *dev_index_head;
+ struct xarray dev_by_index;
struct raw_notifier_head netdev_chain;
/* Note that @hash_mix can be read millions times per second,
@@ -469,15 +471,17 @@ void unregister_pernet_device(struct pernet_operations *);
struct ctl_table;
+#define register_net_sysctl(net, path, table) \
+ register_net_sysctl_sz(net, path, table, ARRAY_SIZE(table))
#ifdef CONFIG_SYSCTL
int net_sysctl_init(void);
-struct ctl_table_header *register_net_sysctl(struct net *net, const char *path,
- struct ctl_table *table);
+struct ctl_table_header *register_net_sysctl_sz(struct net *net, const char *path,
+ struct ctl_table *table, size_t table_size);
void unregister_net_sysctl_table(struct ctl_table_header *header);
#else
static inline int net_sysctl_init(void) { return 0; }
-static inline struct ctl_table_header *register_net_sysctl(struct net *net,
- const char *path, struct ctl_table *table)
+static inline struct ctl_table_header *register_net_sysctl_sz(struct net *net,
+ const char *path, struct ctl_table *table, size_t table_size)
{
return NULL;
}
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
new file mode 100644
index 000000000000..d68b0a483431
--- /dev/null
+++ b/include/net/netdev_queues.h
@@ -0,0 +1,173 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_NET_QUEUES_H
+#define _LINUX_NET_QUEUES_H
+
+#include <linux/netdevice.h>
+
+/**
+ * DOC: Lockless queue stopping / waking helpers.
+ *
+ * The netif_txq_maybe_stop() and __netif_txq_completed_wake()
+ * macros are designed to safely implement stopping
+ * and waking netdev queues without full lock protection.
+ *
+ * We assume that there can be no concurrent stop attempts and no concurrent
+ * wake attempts. The try-stop should happen from the xmit handler,
+ * while wake up should be triggered from NAPI poll context.
+ * The two may run concurrently (single producer, single consumer).
+ *
+ * The try-stop side is expected to run from the xmit handler and therefore
+ * it does not reschedule Tx (netif_tx_start_queue() instead of
+ * netif_tx_wake_queue()). Uses of the ``stop`` macros outside of the xmit
+ * handler may lead to xmit queue being enabled but not run.
+ * The waking side does not have similar context restrictions.
+ *
+ * The macros guarantee that rings will not remain stopped if there's
+ * space available, but they do *not* prevent false wake ups when
+ * the ring is full! Drivers should check for ring full at the start
+ * for the xmit handler.
+ *
+ * All descriptor ring indexes (and other relevant shared state) must
+ * be updated before invoking the macros.
+ */
+
+#define netif_txq_try_stop(txq, get_desc, start_thrs) \
+ ({ \
+ int _res; \
+ \
+ netif_tx_stop_queue(txq); \
+ /* Producer index and stop bit must be visible \
+ * to consumer before we recheck. \
+ * Pairs with a barrier in __netif_txq_completed_wake(). \
+ */ \
+ smp_mb__after_atomic(); \
+ \
+ /* We need to check again in a case another \
+ * CPU has just made room available. \
+ */ \
+ _res = 0; \
+ if (unlikely(get_desc >= start_thrs)) { \
+ netif_tx_start_queue(txq); \
+ _res = -1; \
+ } \
+ _res; \
+ }) \
+
+/**
+ * netif_txq_maybe_stop() - locklessly stop a Tx queue, if needed
+ * @txq: struct netdev_queue to stop/start
+ * @get_desc: get current number of free descriptors (see requirements below!)
+ * @stop_thrs: minimal number of available descriptors for queue to be left
+ * enabled
+ * @start_thrs: minimal number of descriptors to re-enable the queue, can be
+ * equal to @stop_thrs or higher to avoid frequent waking
+ *
+ * All arguments may be evaluated multiple times, beware of side effects.
+ * @get_desc must be a formula or a function call, it must always
+ * return up-to-date information when evaluated!
+ * Expected to be used from ndo_start_xmit, see the comment on top of the file.
+ *
+ * Returns:
+ * 0 if the queue was stopped
+ * 1 if the queue was left enabled
+ * -1 if the queue was re-enabled (raced with waking)
+ */
+#define netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs) \
+ ({ \
+ int _res; \
+ \
+ _res = 1; \
+ if (unlikely(get_desc < stop_thrs)) \
+ _res = netif_txq_try_stop(txq, get_desc, start_thrs); \
+ _res; \
+ }) \
+
+/* Variant of netdev_tx_completed_queue() which guarantees smp_mb() if
+ * @bytes != 0, regardless of kernel config.
+ */
+static inline void
+netdev_txq_completed_mb(struct netdev_queue *dev_queue,
+ unsigned int pkts, unsigned int bytes)
+{
+ if (IS_ENABLED(CONFIG_BQL))
+ netdev_tx_completed_queue(dev_queue, pkts, bytes);
+ else if (bytes)
+ smp_mb();
+}
+
+/**
+ * __netif_txq_completed_wake() - locklessly wake a Tx queue, if needed
+ * @txq: struct netdev_queue to stop/start
+ * @pkts: number of packets completed
+ * @bytes: number of bytes completed
+ * @get_desc: get current number of free descriptors (see requirements below!)
+ * @start_thrs: minimal number of descriptors to re-enable the queue
+ * @down_cond: down condition, predicate indicating that the queue should
+ * not be woken up even if descriptors are available
+ *
+ * All arguments may be evaluated multiple times.
+ * @get_desc must be a formula or a function call, it must always
+ * return up-to-date information when evaluated!
+ * Reports completed pkts/bytes to BQL.
+ *
+ * Returns:
+ * 0 if the queue was woken up
+ * 1 if the queue was already enabled (or disabled but @down_cond is true)
+ * -1 if the queue was left unchanged (@start_thrs not reached)
+ */
+#define __netif_txq_completed_wake(txq, pkts, bytes, \
+ get_desc, start_thrs, down_cond) \
+ ({ \
+ int _res; \
+ \
+ /* Report to BQL and piggy back on its barrier. \
+ * Barrier makes sure that anybody stopping the queue \
+ * after this point sees the new consumer index. \
+ * Pairs with barrier in netif_txq_try_stop(). \
+ */ \
+ netdev_txq_completed_mb(txq, pkts, bytes); \
+ \
+ _res = -1; \
+ if (pkts && likely(get_desc > start_thrs)) { \
+ _res = 1; \
+ if (unlikely(netif_tx_queue_stopped(txq)) && \
+ !(down_cond)) { \
+ netif_tx_wake_queue(txq); \
+ _res = 0; \
+ } \
+ } \
+ _res; \
+ })
+
+#define netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs) \
+ __netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs, false)
+
+/* subqueue variants follow */
+
+#define netif_subqueue_try_stop(dev, idx, get_desc, start_thrs) \
+ ({ \
+ struct netdev_queue *txq; \
+ \
+ txq = netdev_get_tx_queue(dev, idx); \
+ netif_txq_try_stop(txq, get_desc, start_thrs); \
+ })
+
+#define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \
+ ({ \
+ struct netdev_queue *txq; \
+ \
+ txq = netdev_get_tx_queue(dev, idx); \
+ netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs); \
+ })
+
+#define netif_subqueue_completed_wake(dev, idx, pkts, bytes, \
+ get_desc, start_thrs) \
+ ({ \
+ struct netdev_queue *txq; \
+ \
+ txq = netdev_get_tx_queue(dev, idx); \
+ netif_txq_completed_wake(txq, pkts, bytes, \
+ get_desc, start_thrs); \
+ })
+
+#endif
diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
new file mode 100644
index 000000000000..cdcafb30d437
--- /dev/null
+++ b/include/net/netdev_rx_queue.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_NETDEV_RX_QUEUE_H
+#define _LINUX_NETDEV_RX_QUEUE_H
+
+#include <linux/kobject.h>
+#include <linux/netdevice.h>
+#include <linux/sysfs.h>
+#include <net/xdp.h>
+
+/* This structure contains an instance of an RX queue. */
+struct netdev_rx_queue {
+ struct xdp_rxq_info xdp_rxq;
+#ifdef CONFIG_RPS
+ struct rps_map __rcu *rps_map;
+ struct rps_dev_flow_table __rcu *rps_flow_table;
+#endif
+ struct kobject kobj;
+ struct net_device *dev;
+ netdevice_tracker dev_tracker;
+
+#ifdef CONFIG_XDP_SOCKETS
+ struct xsk_buff_pool *pool;
+#endif
+} ____cacheline_aligned_in_smp;
+
+/*
+ * RX queue sysfs structures and functions.
+ */
+struct rx_queue_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct netdev_rx_queue *queue, char *buf);
+ ssize_t (*store)(struct netdev_rx_queue *queue,
+ const char *buf, size_t len);
+};
+
+static inline struct netdev_rx_queue *
+__netif_get_rx_queue(struct net_device *dev, unsigned int rxq)
+{
+ return dev->_rx + rxq;
+}
+
+#ifdef CONFIG_SYSFS
+static inline unsigned int
+get_netdev_rx_queue_index(struct netdev_rx_queue *queue)
+{
+ struct net_device *dev = queue->dev;
+ int index = queue - dev->_rx;
+
+ BUG_ON(index >= dev->num_rx_queues);
+ return index;
+}
+#endif
+#endif
diff --git a/include/net/netfilter/nf_bpf_link.h b/include/net/netfilter/nf_bpf_link.h
new file mode 100644
index 000000000000..6c984b0ea838
--- /dev/null
+++ b/include/net/netfilter/nf_bpf_link.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+struct bpf_nf_ctx {
+ const struct nf_hook_state *state;
+ struct sk_buff *skb;
+};
+
+#if IS_ENABLED(CONFIG_NETFILTER_BPF_LINK)
+int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+#else
+static inline int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ return -EOPNOTSUPP;
+}
+#endif
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index a72028dbef0c..4085765c3370 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -190,10 +190,6 @@ static inline void nf_ct_put(struct nf_conn *ct)
nf_ct_destroy(&ct->ct_general);
}
-/* Protocol module loading */
-int nf_ct_l3proto_try_module_get(unsigned short l3proto);
-void nf_ct_l3proto_module_put(unsigned short l3proto);
-
/* load module; enable/disable conntrack in this namespace */
int nf_ct_netns_get(struct net *net, u8 nfproto);
void nf_ct_netns_put(struct net *net, u8 nfproto);
diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
index 4b2b7f8914ea..a120685cac93 100644
--- a/include/net/netfilter/nf_conntrack_acct.h
+++ b/include/net/netfilter/nf_conntrack_acct.h
@@ -78,6 +78,4 @@ static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir,
void nf_conntrack_acct_pernet_init(struct net *net);
-void nf_conntrack_acct_fini(void);
-
#endif /* _NF_CONNTRACK_ACCT_H */
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 71d1269fe4d4..3384859a8921 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -89,7 +89,11 @@ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
{
if (timeout > INT_MAX)
timeout = INT_MAX;
- WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
+
+ if (nf_ct_is_confirmed(ct))
+ WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
+ else
+ ct->timeout = (u32)timeout;
}
int __nf_ct_change_timeout(struct nf_conn *ct, u64 cta_timeout);
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 0855b60fba17..165e7a03b8e9 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -26,6 +26,15 @@ struct nf_conntrack_expect {
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_mask mask;
+ /* Usage count. */
+ refcount_t use;
+
+ /* Flags */
+ unsigned int flags;
+
+ /* Expectation class */
+ unsigned int class;
+
/* Function to call after setup and insertion */
void (*expectfn)(struct nf_conn *new,
struct nf_conntrack_expect *this);
@@ -39,15 +48,6 @@ struct nf_conntrack_expect {
/* Timer function; deletes the expectation. */
struct timer_list timeout;
- /* Usage count. */
- refcount_t use;
-
- /* Flags */
- unsigned int flags;
-
- /* Expectation class */
- unsigned int class;
-
#if IS_ENABLED(CONFIG_NF_NAT)
union nf_inet_addr saved_addr;
/* This is the original per-proto part, used to map the
@@ -100,7 +100,7 @@ nf_ct_expect_find_get(struct net *net,
struct nf_conntrack_expect *
nf_ct_find_expectation(struct net *net,
const struct nf_conntrack_zone *zone,
- const struct nf_conntrack_tuple *tuple);
+ const struct nf_conntrack_tuple *tuple, bool unlink);
void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
u32 portid, int report);
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index f30b1694b690..de2f956abf34 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -136,8 +136,6 @@ static inline void *nfct_help_data(const struct nf_conn *ct)
return (void *)help->data;
}
-void nf_conntrack_helper_pernet_init(struct net *net);
-
int nf_conntrack_helper_init(void);
void nf_conntrack_helper_fini(void);
@@ -182,5 +180,4 @@ void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat);
int nf_nat_helper_try_module_get(const char *name, u16 l3num,
u8 protonum);
void nf_nat_helper_put(struct nf_conntrack_helper *helper);
-void nf_ct_set_auto_assign_helper_warned(struct net *net);
#endif /*_NF_CONNTRACK_HELPER_H*/
diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h
index 66bab6c60d12..fcb19a4e8f2b 100644
--- a/include/net/netfilter/nf_conntrack_labels.h
+++ b/include/net/netfilter/nf_conntrack_labels.h
@@ -52,7 +52,6 @@ int nf_connlabels_replace(struct nf_conn *ct,
const u32 *data, const u32 *mask, unsigned int words);
#ifdef CONFIG_NF_CONNTRACK_LABELS
-int nf_conntrack_labels_init(void);
int nf_connlabels_get(struct net *net, unsigned int bit);
void nf_connlabels_put(struct net *net);
#else
diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h
index 9334371c94e2..f7dd950ff250 100644
--- a/include/net/netfilter/nf_conntrack_tuple.h
+++ b/include/net/netfilter/nf_conntrack_tuple.h
@@ -67,6 +67,9 @@ struct nf_conntrack_tuple {
/* The protocol. */
u_int8_t protonum;
+ /* The direction must be ignored for the tuplehash */
+ struct { } __nfct_hash_offsetend;
+
/* The direction (for tuplehash) */
u_int8_t dir;
} dst;
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index ebb28ec5b6fa..d466e1a3b0b1 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -263,12 +263,12 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
up_write(&flow_table->flow_block_lock);
}
-int flow_offload_route_init(struct flow_offload *flow,
- const struct nf_flow_route *route);
+void flow_offload_route_init(struct flow_offload *flow,
+ const struct nf_flow_route *route);
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
void flow_offload_refresh(struct nf_flowtable *flow_table,
- struct flow_offload *flow);
+ struct flow_offload *flow, bool force);
struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
struct flow_offload_tuple *tuple);
diff --git a/include/net/netfilter/nf_nat_redirect.h b/include/net/netfilter/nf_nat_redirect.h
index 2418653a66db..279380de904c 100644
--- a/include/net/netfilter/nf_nat_redirect.h
+++ b/include/net/netfilter/nf_nat_redirect.h
@@ -6,8 +6,7 @@
#include <uapi/linux/netfilter/nf_nat.h>
unsigned int
-nf_nat_redirect_ipv4(struct sk_buff *skb,
- const struct nf_nat_ipv4_multi_range_compat *mr,
+nf_nat_redirect_ipv4(struct sk_buff *skb, const struct nf_nat_range2 *range,
unsigned int hooknum);
unsigned int
nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 1b8e305bb54a..dd40c75011d2 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -462,7 +462,8 @@ struct nft_set_ops {
const struct nft_set *set,
const struct nft_set_elem *elem,
unsigned int flags);
-
+ void (*commit)(const struct nft_set *set);
+ void (*abort)(const struct nft_set *set);
u64 (*privsize)(const struct nlattr * const nla[],
const struct nft_set_desc *desc);
bool (*estimate)(const struct nft_set_desc *desc,
@@ -471,7 +472,8 @@ struct nft_set_ops {
int (*init)(const struct nft_set *set,
const struct nft_set_desc *desc,
const struct nlattr * const nla[]);
- void (*destroy)(const struct nft_set *set);
+ void (*destroy)(const struct nft_ctx *ctx,
+ const struct nft_set *set);
void (*gc_init)(const struct nft_set *set);
unsigned int elemsize;
@@ -510,6 +512,7 @@ struct nft_set_elem_expr {
*
* @list: table set list node
* @bindings: list of set bindings
+ * @refs: internal refcounting for async set destruction
* @table: table this set belongs to
* @net: netnamespace this set belongs to
* @name: name of the set
@@ -531,6 +534,7 @@ struct nft_set_elem_expr {
* @expr: stateful expression
* @ops: set ops
* @flags: set flags
+ * @dead: set will be freed, never cleared
* @genmask: generation mask
* @klen: key length
* @dlen: data length
@@ -539,6 +543,7 @@ struct nft_set_elem_expr {
struct nft_set {
struct list_head list;
struct list_head bindings;
+ refcount_t refs;
struct nft_table *table;
possible_net_t net;
char *name;
@@ -557,9 +562,11 @@ struct nft_set {
u16 policy;
u16 udlen;
unsigned char *udata;
+ struct list_head pending_update;
/* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned;
- u16 flags:14,
+ u16 flags:13,
+ dead:1,
genmask:2;
u8 klen;
u8 dlen;
@@ -580,6 +587,11 @@ static inline void *nft_set_priv(const struct nft_set *set)
return (void *)set->data;
}
+static inline bool nft_set_gc_is_pending(const struct nft_set *s)
+{
+ return refcount_read(&s->refs) != 1;
+}
+
static inline struct nft_set *nft_set_container_of(const void *priv)
{
return (void *)priv - offsetof(struct nft_set, data);
@@ -593,7 +605,6 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
const struct nft_set *set);
-void *nft_set_catchall_gc(const struct nft_set *set);
static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
{
@@ -619,6 +630,7 @@ struct nft_set_binding {
};
enum nft_trans_phase;
+void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set);
void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding,
enum nft_trans_phase phase);
@@ -806,62 +818,8 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_expr *expr_array[]);
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
bool destroy_expr);
-
-/**
- * struct nft_set_gc_batch_head - nf_tables set garbage collection batch
- *
- * @rcu: rcu head
- * @set: set the elements belong to
- * @cnt: count of elements
- */
-struct nft_set_gc_batch_head {
- struct rcu_head rcu;
- const struct nft_set *set;
- unsigned int cnt;
-};
-
-#define NFT_SET_GC_BATCH_SIZE ((PAGE_SIZE - \
- sizeof(struct nft_set_gc_batch_head)) / \
- sizeof(void *))
-
-/**
- * struct nft_set_gc_batch - nf_tables set garbage collection batch
- *
- * @head: GC batch head
- * @elems: garbage collection elements
- */
-struct nft_set_gc_batch {
- struct nft_set_gc_batch_head head;
- void *elems[NFT_SET_GC_BATCH_SIZE];
-};
-
-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
- gfp_t gfp);
-void nft_set_gc_batch_release(struct rcu_head *rcu);
-
-static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb)
-{
- if (gcb != NULL)
- call_rcu(&gcb->head.rcu, nft_set_gc_batch_release);
-}
-
-static inline struct nft_set_gc_batch *
-nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb,
- gfp_t gfp)
-{
- if (gcb != NULL) {
- if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems))
- return gcb;
- nft_set_gc_batch_complete(gcb);
- }
- return nft_set_gc_batch_alloc(set, gfp);
-}
-
-static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb,
- void *elem)
-{
- gcb->elems[gcb->head.cnt++] = elem;
-}
+void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set, void *elem);
struct nft_expr_ops;
/**
@@ -898,6 +856,7 @@ struct nft_expr_type {
enum nft_trans_phase {
NFT_TRANS_PREPARE,
+ NFT_TRANS_PREPARE_ERROR,
NFT_TRANS_ABORT,
NFT_TRANS_COMMIT,
NFT_TRANS_RELEASE
@@ -1006,7 +965,10 @@ static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule)
return (void *)&rule->data[rule->dlen];
}
-void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule);
+void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule);
+void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_rule *rule,
+ enum nft_trans_phase phase);
+void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule);
static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext,
struct nft_regs *regs,
@@ -1046,6 +1008,18 @@ struct nft_rule_dp {
__attribute__((aligned(__alignof__(struct nft_expr))));
};
+struct nft_rule_dp_last {
+ struct nft_rule_dp end; /* end of nft_rule_blob marker */
+ struct rcu_head h; /* call_rcu head */
+ struct nft_rule_blob *blob; /* ptr to free via call_rcu */
+ const struct nft_chain *chain; /* for nftables tracing */
+};
+
+static inline const struct nft_rule_dp *nft_rule_next(const struct nft_rule_dp *rule)
+{
+ return (void *)rule + sizeof(*rule) + rule->dlen;
+}
+
struct nft_rule_blob {
unsigned long size;
unsigned char data[]
@@ -1089,6 +1063,8 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
const struct nft_set_iter *iter,
struct nft_set_elem *elem);
int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set);
+int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
+void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
enum nft_chain_types {
NFT_CHAIN_T_DEFAULT = 0,
@@ -1125,11 +1101,17 @@ int nft_chain_validate_dependency(const struct nft_chain *chain,
int nft_chain_validate_hooks(const struct nft_chain *chain,
unsigned int hook_flags);
+static inline bool nft_chain_binding(const struct nft_chain *chain)
+{
+ return chain->flags & NFT_CHAIN_BINDING;
+}
+
static inline bool nft_chain_is_bound(struct nft_chain *chain)
{
return (chain->flags & NFT_CHAIN_BINDING) && chain->bound;
}
+int nft_chain_add(struct nft_table *table, struct nft_chain *chain);
void nft_chain_del(struct nft_chain *chain);
void nf_tables_chain_destroy(struct nft_ctx *ctx);
@@ -1181,6 +1163,29 @@ int __nft_release_basechain(struct nft_ctx *ctx);
unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
+static inline bool nft_use_inc(u32 *use)
+{
+ if (*use == UINT_MAX)
+ return false;
+
+ (*use)++;
+
+ return true;
+}
+
+static inline void nft_use_dec(u32 *use)
+{
+ WARN_ON_ONCE((*use)-- == 0);
+}
+
+/* For error and abort path: restore use counter to previous state. */
+static inline void nft_use_inc_restore(u32 *use)
+{
+ WARN_ON_ONCE(!nft_use_inc(use));
+}
+
+#define nft_use_dec_restore nft_use_dec
+
/**
* struct nft_table - nf_tables table
*
@@ -1197,6 +1202,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
* @genmask: generation mask
* @afinfo: address family info
* @name: name of the table
+ * @validate_state: internal, set when transaction adds jumps
*/
struct nft_table {
struct list_head list;
@@ -1215,6 +1221,7 @@ struct nft_table {
char *name;
u16 udlen;
u8 *udata;
+ u8 validate_state;
};
static inline bool nft_table_has_owner(const struct nft_table *table)
@@ -1264,8 +1271,8 @@ struct nft_object {
struct list_head list;
struct rhlist_head rhlhead;
struct nft_object_hash_key key;
- u32 genmask:2,
- use:30;
+ u32 genmask:2;
+ u32 use;
u64 handle;
u16 udlen;
u8 *udata;
@@ -1367,8 +1374,8 @@ struct nft_flowtable {
char *name;
int hooknum;
int ops_len;
- u32 genmask:2,
- use:30;
+ u32 genmask:2;
+ u32 use;
u64 handle;
/* runtime data below here */
struct list_head hook_list ____cacheline_aligned;
@@ -1394,11 +1401,7 @@ void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
* @type: event type (enum nft_trace_types)
* @skbid: hash of skb to be used as trace id
* @packet_dumped: packet headers sent in a previous traceinfo message
- * @pkt: pktinfo currently processed
* @basechain: base chain currently processed
- * @chain: chain currently processed
- * @rule: rule that was evaluated
- * @verdict: verdict given by rule
*/
struct nft_traceinfo {
bool trace;
@@ -1406,18 +1409,16 @@ struct nft_traceinfo {
bool packet_dumped;
enum nft_trace_types type:8;
u32 skbid;
- const struct nft_pktinfo *pkt;
const struct nft_base_chain *basechain;
- const struct nft_chain *chain;
- const struct nft_rule_dp *rule;
- const struct nft_verdict *verdict;
};
void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
- const struct nft_verdict *verdict,
const struct nft_chain *basechain);
-void nft_trace_notify(struct nft_traceinfo *info);
+void nft_trace_notify(const struct nft_pktinfo *pkt,
+ const struct nft_verdict *verdict,
+ const struct nft_rule_dp *rule,
+ struct nft_traceinfo *info);
#define MODULE_ALIAS_NFT_CHAIN(family, name) \
MODULE_ALIAS("nft-chain-" __stringify(family) "-" name)
@@ -1508,45 +1509,37 @@ static inline void nft_set_elem_change_active(const struct net *net,
#endif /* IS_ENABLED(CONFIG_NF_TABLES) */
-/*
- * We use a free bit in the genmask field to indicate the element
- * is busy, meaning it is currently being processed either by
- * the netlink API or GC.
- *
- * Even though the genmask is only a single byte wide, this works
- * because the extension structure if fully constant once initialized,
- * so there are no non-atomic write accesses unless it is already
- * marked busy.
- */
-#define NFT_SET_ELEM_BUSY_MASK (1 << 2)
+#define NFT_SET_ELEM_DEAD_MASK (1 << 2)
#if defined(__LITTLE_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_BUSY_BIT 2
+#define NFT_SET_ELEM_DEAD_BIT 2
#elif defined(__BIG_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_BUSY_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2)
+#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2)
#else
#error
#endif
-static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext)
+static inline void nft_set_elem_dead(struct nft_set_ext *ext)
{
unsigned long *word = (unsigned long *)ext;
BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
- return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word);
+ set_bit(NFT_SET_ELEM_DEAD_BIT, word);
}
-static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
+static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext)
{
unsigned long *word = (unsigned long *)ext;
- clear_bit(NFT_SET_ELEM_BUSY_BIT, word);
+ BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
+ return test_bit(NFT_SET_ELEM_DEAD_BIT, word);
}
/**
* struct nft_trans - nf_tables object update in transaction
*
* @list: used internally
+ * @binding_list: list of objects with possible bindings
* @msg_type: message type
* @put_net: ctx->net needs to be put
* @ctx: transaction context
@@ -1554,6 +1547,7 @@ static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
*/
struct nft_trans {
struct list_head list;
+ struct list_head binding_list;
int msg_type;
bool put_net;
struct nft_ctx ctx;
@@ -1564,6 +1558,7 @@ struct nft_trans_rule {
struct nft_rule *rule;
struct nft_flow_rule *flow;
u32 rule_id;
+ bool bound;
};
#define nft_trans_rule(trans) \
@@ -1572,6 +1567,8 @@ struct nft_trans_rule {
(((struct nft_trans_rule *)trans->data)->flow)
#define nft_trans_rule_id(trans) \
(((struct nft_trans_rule *)trans->data)->rule_id)
+#define nft_trans_rule_bound(trans) \
+ (((struct nft_trans_rule *)trans->data)->bound)
struct nft_trans_set {
struct nft_set *set;
@@ -1580,6 +1577,7 @@ struct nft_trans_set {
u64 timeout;
bool update;
bool bound;
+ u32 size;
};
#define nft_trans_set(trans) \
@@ -1594,15 +1592,23 @@ struct nft_trans_set {
(((struct nft_trans_set *)trans->data)->timeout)
#define nft_trans_set_gc_int(trans) \
(((struct nft_trans_set *)trans->data)->gc_int)
+#define nft_trans_set_size(trans) \
+ (((struct nft_trans_set *)trans->data)->size)
struct nft_trans_chain {
+ struct nft_chain *chain;
bool update;
char *name;
struct nft_stats __percpu *stats;
u8 policy;
+ bool bound;
u32 chain_id;
+ struct nft_base_chain *basechain;
+ struct list_head hook_list;
};
+#define nft_trans_chain(trans) \
+ (((struct nft_trans_chain *)trans->data)->chain)
#define nft_trans_chain_update(trans) \
(((struct nft_trans_chain *)trans->data)->update)
#define nft_trans_chain_name(trans) \
@@ -1611,8 +1617,14 @@ struct nft_trans_chain {
(((struct nft_trans_chain *)trans->data)->stats)
#define nft_trans_chain_policy(trans) \
(((struct nft_trans_chain *)trans->data)->policy)
+#define nft_trans_chain_bound(trans) \
+ (((struct nft_trans_chain *)trans->data)->bound)
#define nft_trans_chain_id(trans) \
(((struct nft_trans_chain *)trans->data)->chain_id)
+#define nft_trans_basechain(trans) \
+ (((struct nft_trans_chain *)trans->data)->basechain)
+#define nft_trans_chain_hooks(trans) \
+ (((struct nft_trans_chain *)trans->data)->hook_list)
struct nft_trans_table {
bool update;
@@ -1663,6 +1675,38 @@ struct nft_trans_flowtable {
#define nft_trans_flowtable_flags(trans) \
(((struct nft_trans_flowtable *)trans->data)->flags)
+#define NFT_TRANS_GC_BATCHCOUNT 256
+
+struct nft_trans_gc {
+ struct list_head list;
+ struct net *net;
+ struct nft_set *set;
+ u32 seq;
+ u8 count;
+ void *priv[NFT_TRANS_GC_BATCHCOUNT];
+ struct rcu_head rcu;
+};
+
+struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+ unsigned int gc_seq, gfp_t gfp);
+void nft_trans_gc_destroy(struct nft_trans_gc *trans);
+
+struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+ unsigned int gc_seq, gfp_t gfp);
+void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc);
+
+struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp);
+void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans);
+
+void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv);
+
+struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+ unsigned int gc_seq);
+
+void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem);
+
int __init nft_chain_filter_init(void);
void nft_chain_filter_fini(void);
@@ -1683,11 +1727,13 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re
struct nftables_pernet {
struct list_head tables;
struct list_head commit_list;
+ struct list_head binding_list;
struct list_head module_list;
struct list_head notify_list;
struct mutex commit_mutex;
u64 table_handle;
unsigned int base_seq;
+ unsigned int gc_seq;
u8 validate_state;
};
diff --git a/include/net/netlink.h b/include/net/netlink.h
index b12cd957abb4..8a7cd1170e1f 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -375,12 +375,11 @@ struct nla_policy {
#define NLA_POLICY_BITFIELD32(valid) \
{ .type = NLA_BITFIELD32, .bitfield32_valid = valid }
-#define __NLA_IS_UINT_TYPE(tp) \
- (tp == NLA_U8 || tp == NLA_U16 || tp == NLA_U32 || tp == NLA_U64)
+#define __NLA_IS_UINT_TYPE(tp) \
+ (tp == NLA_U8 || tp == NLA_U16 || tp == NLA_U32 || \
+ tp == NLA_U64 || tp == NLA_BE16 || tp == NLA_BE32)
#define __NLA_IS_SINT_TYPE(tp) \
(tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64)
-#define __NLA_IS_BEINT_TYPE(tp) \
- (tp == NLA_BE16 || tp == NLA_BE32)
#define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition))
#define NLA_ENSURE_UINT_TYPE(tp) \
@@ -394,7 +393,6 @@ struct nla_policy {
#define NLA_ENSURE_INT_OR_BINARY_TYPE(tp) \
(__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp) || \
__NLA_IS_SINT_TYPE(tp) || \
- __NLA_IS_BEINT_TYPE(tp) || \
tp == NLA_MSECS || \
tp == NLA_BINARY) + tp)
#define NLA_ENSURE_NO_VALIDATION_PTR(tp) \
@@ -402,8 +400,6 @@ struct nla_policy {
tp != NLA_REJECT && \
tp != NLA_NESTED && \
tp != NLA_NESTED_ARRAY) + tp)
-#define NLA_ENSURE_BEINT_TYPE(tp) \
- (__NLA_ENSURE(__NLA_IS_BEINT_TYPE(tp)) + tp)
#define NLA_POLICY_RANGE(tp, _min, _max) { \
.type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index db762e35aca9..7a41c4791536 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -65,6 +65,7 @@ struct netns_ipv4 {
#endif
bool fib_has_custom_local_routes;
bool fib_offload_disabled;
+ u8 sysctl_tcp_shrink_window;
#ifdef CONFIG_IP_ROUTE_CLASSID
atomic_t fib_num_tclassid_users;
#endif
@@ -151,7 +152,7 @@ struct netns_ipv4 {
u8 sysctl_tcp_abort_on_overflow;
u8 sysctl_tcp_fack; /* obsolete */
int sysctl_tcp_max_reordering;
- int sysctl_tcp_adv_win_scale;
+ int sysctl_tcp_adv_win_scale; /* obsolete */
u8 sysctl_tcp_dsack;
u8 sysctl_tcp_app_win;
u8 sysctl_tcp_frto;
@@ -194,6 +195,7 @@ struct netns_ipv4 {
int sysctl_udp_rmem_min;
u8 sysctl_fib_notify_on_flag_change;
+ u8 sysctl_tcp_syn_linear_timeouts;
#ifdef CONFIG_NET_L3_MASTER_DEV
u8 sysctl_udp_l3mdev_accept;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index b4af4837d80b..5f2cfd84570a 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -53,8 +53,9 @@ struct netns_sysctl_ipv6 {
int seg6_flowlabel;
u32 ioam6_id;
u64 ioam6_id_wide;
- bool skip_notify_on_dev_down;
+ u8 skip_notify_on_dev_down;
u8 fib_notify_on_flag_change;
+ u8 icmpv6_error_anycast_as_unicast;
};
struct netns_ipv6 {
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index 8c77832d0240..cc8060c017d5 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -2,8 +2,6 @@
#ifndef _NETNS_NFTABLES_H_
#define _NETNS_NFTABLES_H_
-#include <linux/list.h>
-
struct netns_nftables {
u8 gencursor;
};
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 28085b995ddc..2b12725de9c0 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -497,29 +497,6 @@ static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
return NULL;
}
-/* Variant of nexthop_fib6_nh().
- * Caller should either hold rcu_read_lock_bh(), or RTNL.
- */
-static inline struct fib6_nh *nexthop_fib6_nh_bh(struct nexthop *nh)
-{
- struct nh_info *nhi;
-
- if (nh->is_group) {
- struct nh_group *nh_grp;
-
- nh_grp = rcu_dereference_bh_rtnl(nh->nh_grp);
- nh = nexthop_mpath_select(nh_grp, 0);
- if (!nh)
- return NULL;
- }
-
- nhi = rcu_dereference_bh_rtnl(nh->nh_info);
- if (nhi->family == AF_INET6)
- return &nhi->fib6_nh;
-
- return NULL;
-}
-
static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
{
struct fib6_nh *fib6_nh;
diff --git a/include/net/nsh.h b/include/net/nsh.h
index 350b1ad11c7f..16a751093896 100644
--- a/include/net/nsh.h
+++ b/include/net/nsh.h
@@ -192,7 +192,7 @@
/**
* struct nsh_md1_ctx - Keeps track of NSH context data
- * @nshc<1-4>: NSH Contexts.
+ * @context: NSH Contexts.
*/
struct nsh_md1_ctx {
__be32 context[4];
diff --git a/include/net/p8022.h b/include/net/p8022.h
index b690ffcad66b..a29e224ac498 100644
--- a/include/net/p8022.h
+++ b/include/net/p8022.h
@@ -13,7 +13,4 @@ register_8022_client(unsigned char type,
struct packet_type *pt,
struct net_device *orig_dev));
void unregister_8022_client(struct datalink_proto *proto);
-
-struct datalink_proto *make_8023_client(void);
-void destroy_8023_client(struct datalink_proto *dl);
#endif
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
deleted file mode 100644
index ddfa0b328677..000000000000
--- a/include/net/page_pool.h
+++ /dev/null
@@ -1,414 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * page_pool.h
- * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com>
- * Copyright (C) 2016 Red Hat, Inc.
- */
-
-/**
- * DOC: page_pool allocator
- *
- * This page_pool allocator is optimized for the XDP mode that
- * uses one-frame-per-page, but have fallbacks that act like the
- * regular page allocator APIs.
- *
- * Basic use involve replacing alloc_pages() calls with the
- * page_pool_alloc_pages() call. Drivers should likely use
- * page_pool_dev_alloc_pages() replacing dev_alloc_pages().
- *
- * API keeps track of in-flight pages, in-order to let API user know
- * when it is safe to dealloactor page_pool object. Thus, API users
- * must make sure to call page_pool_release_page() when a page is
- * "leaving" the page_pool. Or call page_pool_put_page() where
- * appropiate. For maintaining correct accounting.
- *
- * API user must only call page_pool_put_page() once on a page, as it
- * will either recycle the page, or in case of elevated refcnt, it
- * will release the DMA mapping and in-flight state accounting. We
- * hope to lift this requirement in the future.
- */
-#ifndef _NET_PAGE_POOL_H
-#define _NET_PAGE_POOL_H
-
-#include <linux/mm.h> /* Needed by ptr_ring */
-#include <linux/ptr_ring.h>
-#include <linux/dma-direction.h>
-
-#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA
- * map/unmap
- */
-#define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets
- * from page_pool will be
- * DMA-synced-for-device according to
- * the length provided by the device
- * driver.
- * Please note DMA-sync-for-CPU is still
- * device driver responsibility
- */
-#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */
-#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\
- PP_FLAG_DMA_SYNC_DEV |\
- PP_FLAG_PAGE_FRAG)
-
-/*
- * Fast allocation side cache array/stack
- *
- * The cache size and refill watermark is related to the network
- * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX
- * ring is usually refilled and the max consumed elements will be 64,
- * thus a natural max size of objects needed in the cache.
- *
- * Keeping room for more objects, is due to XDP_DROP use-case. As
- * XDP_DROP allows the opportunity to recycle objects directly into
- * this array, as it shares the same softirq/NAPI protection. If
- * cache is already full (or partly full) then the XDP_DROP recycles
- * would have to take a slower code path.
- */
-#define PP_ALLOC_CACHE_SIZE 128
-#define PP_ALLOC_CACHE_REFILL 64
-struct pp_alloc_cache {
- u32 count;
- struct page *cache[PP_ALLOC_CACHE_SIZE];
-};
-
-struct page_pool_params {
- unsigned int flags;
- unsigned int order;
- unsigned int pool_size;
- int nid; /* Numa node id to allocate from pages from */
- struct device *dev; /* device, for DMA pre-mapping purposes */
- enum dma_data_direction dma_dir; /* DMA mapping direction */
- unsigned int max_len; /* max DMA sync memory size */
- unsigned int offset; /* DMA addr offset */
- void (*init_callback)(struct page *page, void *arg);
- void *init_arg;
-};
-
-#ifdef CONFIG_PAGE_POOL_STATS
-struct page_pool_alloc_stats {
- u64 fast; /* fast path allocations */
- u64 slow; /* slow-path order 0 allocations */
- u64 slow_high_order; /* slow-path high order allocations */
- u64 empty; /* failed refills due to empty ptr ring, forcing
- * slow path allocation
- */
- u64 refill; /* allocations via successful refill */
- u64 waive; /* failed refills due to numa zone mismatch */
-};
-
-struct page_pool_recycle_stats {
- u64 cached; /* recycling placed page in the cache. */
- u64 cache_full; /* cache was full */
- u64 ring; /* recycling placed page back into ptr ring */
- u64 ring_full; /* page was released from page-pool because
- * PTR ring was full.
- */
- u64 released_refcnt; /* page released because of elevated
- * refcnt
- */
-};
-
-/* This struct wraps the above stats structs so users of the
- * page_pool_get_stats API can pass a single argument when requesting the
- * stats for the page pool.
- */
-struct page_pool_stats {
- struct page_pool_alloc_stats alloc_stats;
- struct page_pool_recycle_stats recycle_stats;
-};
-
-int page_pool_ethtool_stats_get_count(void);
-u8 *page_pool_ethtool_stats_get_strings(u8 *data);
-u64 *page_pool_ethtool_stats_get(u64 *data, void *stats);
-
-/*
- * Drivers that wish to harvest page pool stats and report them to users
- * (perhaps via ethtool, debugfs, or another mechanism) can allocate a
- * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool.
- */
-bool page_pool_get_stats(struct page_pool *pool,
- struct page_pool_stats *stats);
-#else
-
-static inline int page_pool_ethtool_stats_get_count(void)
-{
- return 0;
-}
-
-static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data)
-{
- return data;
-}
-
-static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats)
-{
- return data;
-}
-
-#endif
-
-struct page_pool {
- struct page_pool_params p;
-
- struct delayed_work release_dw;
- void (*disconnect)(void *);
- unsigned long defer_start;
- unsigned long defer_warn;
-
- u32 pages_state_hold_cnt;
- unsigned int frag_offset;
- struct page *frag_page;
- long frag_users;
-
-#ifdef CONFIG_PAGE_POOL_STATS
- /* these stats are incremented while in softirq context */
- struct page_pool_alloc_stats alloc_stats;
-#endif
- u32 xdp_mem_id;
-
- /*
- * Data structure for allocation side
- *
- * Drivers allocation side usually already perform some kind
- * of resource protection. Piggyback on this protection, and
- * require driver to protect allocation side.
- *
- * For NIC drivers this means, allocate a page_pool per
- * RX-queue. As the RX-queue is already protected by
- * Softirq/BH scheduling and napi_schedule. NAPI schedule
- * guarantee that a single napi_struct will only be scheduled
- * on a single CPU (see napi_schedule).
- */
- struct pp_alloc_cache alloc ____cacheline_aligned_in_smp;
-
- /* Data structure for storing recycled pages.
- *
- * Returning/freeing pages is more complicated synchronization
- * wise, because free's can happen on remote CPUs, with no
- * association with allocation resource.
- *
- * Use ptr_ring, as it separates consumer and producer
- * effeciently, it a way that doesn't bounce cache-lines.
- *
- * TODO: Implement bulk return pages into this structure.
- */
- struct ptr_ring ring;
-
-#ifdef CONFIG_PAGE_POOL_STATS
- /* recycle stats are per-cpu to avoid locking */
- struct page_pool_recycle_stats __percpu *recycle_stats;
-#endif
- atomic_t pages_state_release_cnt;
-
- /* A page_pool is strictly tied to a single RX-queue being
- * protected by NAPI, due to above pp_alloc_cache. This
- * refcnt serves purpose is to simplify drivers error handling.
- */
- refcount_t user_cnt;
-
- u64 destroy_cnt;
-};
-
-struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
-
-static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
-{
- gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
-
- return page_pool_alloc_pages(pool, gfp);
-}
-
-struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
- unsigned int size, gfp_t gfp);
-
-static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
- unsigned int *offset,
- unsigned int size)
-{
- gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
-
- return page_pool_alloc_frag(pool, offset, size, gfp);
-}
-
-/* get the stored dma direction. A driver might decide to treat this locally and
- * avoid the extra cache line from page_pool to determine the direction
- */
-static
-inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
-{
- return pool->p.dma_dir;
-}
-
-bool page_pool_return_skb_page(struct page *page);
-
-struct page_pool *page_pool_create(const struct page_pool_params *params);
-
-struct xdp_mem_info;
-
-#ifdef CONFIG_PAGE_POOL
-void page_pool_destroy(struct page_pool *pool);
-void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
- struct xdp_mem_info *mem);
-void page_pool_release_page(struct page_pool *pool, struct page *page);
-void page_pool_put_page_bulk(struct page_pool *pool, void **data,
- int count);
-#else
-static inline void page_pool_destroy(struct page_pool *pool)
-{
-}
-
-static inline void page_pool_use_xdp_mem(struct page_pool *pool,
- void (*disconnect)(void *),
- struct xdp_mem_info *mem)
-{
-}
-static inline void page_pool_release_page(struct page_pool *pool,
- struct page *page)
-{
-}
-
-static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
- int count)
-{
-}
-#endif
-
-void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
- unsigned int dma_sync_size,
- bool allow_direct);
-
-/* pp_frag_count represents the number of writers who can update the page
- * either by updating skb->data or via DMA mappings for the device.
- * We can't rely on the page refcnt for that as we don't know who might be
- * holding page references and we can't reliably destroy or sync DMA mappings
- * of the fragments.
- *
- * When pp_frag_count reaches 0 we can either recycle the page if the page
- * refcnt is 1 or return it back to the memory allocator and destroy any
- * mappings we have.
- */
-static inline void page_pool_fragment_page(struct page *page, long nr)
-{
- atomic_long_set(&page->pp_frag_count, nr);
-}
-
-static inline long page_pool_defrag_page(struct page *page, long nr)
-{
- long ret;
-
- /* If nr == pp_frag_count then we have cleared all remaining
- * references to the page. No need to actually overwrite it, instead
- * we can leave this to be overwritten by the calling function.
- *
- * The main advantage to doing this is that an atomic_read is
- * generally a much cheaper operation than an atomic update,
- * especially when dealing with a page that may be partitioned
- * into only 2 or 3 pieces.
- */
- if (atomic_long_read(&page->pp_frag_count) == nr)
- return 0;
-
- ret = atomic_long_sub_return(nr, &page->pp_frag_count);
- WARN_ON(ret < 0);
- return ret;
-}
-
-static inline bool page_pool_is_last_frag(struct page_pool *pool,
- struct page *page)
-{
- /* If fragments aren't enabled or count is 0 we were the last user */
- return !(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
- (page_pool_defrag_page(page, 1) == 0);
-}
-
-static inline void page_pool_put_page(struct page_pool *pool,
- struct page *page,
- unsigned int dma_sync_size,
- bool allow_direct)
-{
- /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
- * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
- */
-#ifdef CONFIG_PAGE_POOL
- if (!page_pool_is_last_frag(pool, page))
- return;
-
- page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct);
-#endif
-}
-
-/* Same as above but will try to sync the entire area pool->max_len */
-static inline void page_pool_put_full_page(struct page_pool *pool,
- struct page *page, bool allow_direct)
-{
- page_pool_put_page(pool, page, -1, allow_direct);
-}
-
-/* Same as above but the caller must guarantee safe context. e.g NAPI */
-static inline void page_pool_recycle_direct(struct page_pool *pool,
- struct page *page)
-{
- page_pool_put_full_page(pool, page, true);
-}
-
-#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \
- (sizeof(dma_addr_t) > sizeof(unsigned long))
-
-static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
-{
- dma_addr_t ret = page->dma_addr;
-
- if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
- ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
-
- return ret;
-}
-
-static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
-{
- page->dma_addr = addr;
- if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
- page->dma_addr_upper = upper_32_bits(addr);
-}
-
-static inline bool is_page_pool_compiled_in(void)
-{
-#ifdef CONFIG_PAGE_POOL
- return true;
-#else
- return false;
-#endif
-}
-
-static inline bool page_pool_put(struct page_pool *pool)
-{
- return refcount_dec_and_test(&pool->user_cnt);
-}
-
-/* Caller must provide appropriate safe context, e.g. NAPI. */
-void page_pool_update_nid(struct page_pool *pool, int new_nid);
-static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
-{
- if (unlikely(pool->p.nid != new_nid))
- page_pool_update_nid(pool, new_nid);
-}
-
-static inline void page_pool_ring_lock(struct page_pool *pool)
- __acquires(&pool->ring.producer_lock)
-{
- if (in_softirq())
- spin_lock(&pool->ring.producer_lock);
- else
- spin_lock_bh(&pool->ring.producer_lock);
-}
-
-static inline void page_pool_ring_unlock(struct page_pool *pool)
- __releases(&pool->ring.producer_lock)
-{
- if (in_softirq())
- spin_unlock(&pool->ring.producer_lock);
- else
- spin_unlock_bh(&pool->ring.producer_lock);
-}
-
-#endif /* _NET_PAGE_POOL_H */
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
new file mode 100644
index 000000000000..94231533a369
--- /dev/null
+++ b/include/net/page_pool/helpers.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * page_pool/helpers.h
+ * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com>
+ * Copyright (C) 2016 Red Hat, Inc.
+ */
+
+/**
+ * DOC: page_pool allocator
+ *
+ * The page_pool allocator is optimized for the XDP mode that
+ * uses one frame per-page, but it can fallback on the
+ * regular page allocator APIs.
+ *
+ * Basic use involves replacing alloc_pages() calls with the
+ * page_pool_alloc_pages() call. Drivers should use
+ * page_pool_dev_alloc_pages() replacing dev_alloc_pages().
+ *
+ * API keeps track of in-flight pages, in order to let API user know
+ * when it is safe to free a page_pool object. Thus, API users
+ * must call page_pool_put_page() to free the page, or attach
+ * the page to a page_pool-aware objects like skbs marked with
+ * skb_mark_for_recycle().
+ *
+ * API user must call page_pool_put_page() once on a page, as it
+ * will either recycle the page, or in case of refcnt > 1, it will
+ * release the DMA mapping and in-flight state accounting.
+ */
+#ifndef _NET_PAGE_POOL_HELPERS_H
+#define _NET_PAGE_POOL_HELPERS_H
+
+#include <net/page_pool/types.h>
+
+#ifdef CONFIG_PAGE_POOL_STATS
+int page_pool_ethtool_stats_get_count(void);
+u8 *page_pool_ethtool_stats_get_strings(u8 *data);
+u64 *page_pool_ethtool_stats_get(u64 *data, void *stats);
+
+/*
+ * Drivers that wish to harvest page pool stats and report them to users
+ * (perhaps via ethtool, debugfs, or another mechanism) can allocate a
+ * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool.
+ */
+bool page_pool_get_stats(struct page_pool *pool,
+ struct page_pool_stats *stats);
+#else
+static inline int page_pool_ethtool_stats_get_count(void)
+{
+ return 0;
+}
+
+static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data)
+{
+ return data;
+}
+
+static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats)
+{
+ return data;
+}
+#endif
+
+/**
+ * page_pool_dev_alloc_pages() - allocate a page.
+ * @pool: pool from which to allocate
+ *
+ * Get a page from the page allocator or page_pool caches.
+ */
+static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
+{
+ gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
+
+ return page_pool_alloc_pages(pool, gfp);
+}
+
+static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
+ unsigned int *offset,
+ unsigned int size)
+{
+ gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
+
+ return page_pool_alloc_frag(pool, offset, size, gfp);
+}
+
+/**
+ * page_pool_get_dma_dir() - Retrieve the stored DMA direction.
+ * @pool: pool from which page was allocated
+ *
+ * Get the stored dma direction. A driver might decide to store this locally
+ * and avoid the extra cache line from page_pool to determine the direction.
+ */
+static
+inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
+{
+ return pool->p.dma_dir;
+}
+
+/* pp_frag_count represents the number of writers who can update the page
+ * either by updating skb->data or via DMA mappings for the device.
+ * We can't rely on the page refcnt for that as we don't know who might be
+ * holding page references and we can't reliably destroy or sync DMA mappings
+ * of the fragments.
+ *
+ * When pp_frag_count reaches 0 we can either recycle the page if the page
+ * refcnt is 1 or return it back to the memory allocator and destroy any
+ * mappings we have.
+ */
+static inline void page_pool_fragment_page(struct page *page, long nr)
+{
+ atomic_long_set(&page->pp_frag_count, nr);
+}
+
+static inline long page_pool_defrag_page(struct page *page, long nr)
+{
+ long ret;
+
+ /* If nr == pp_frag_count then we have cleared all remaining
+ * references to the page. No need to actually overwrite it, instead
+ * we can leave this to be overwritten by the calling function.
+ *
+ * The main advantage to doing this is that an atomic_read is
+ * generally a much cheaper operation than an atomic update,
+ * especially when dealing with a page that may be partitioned
+ * into only 2 or 3 pieces.
+ */
+ if (atomic_long_read(&page->pp_frag_count) == nr)
+ return 0;
+
+ ret = atomic_long_sub_return(nr, &page->pp_frag_count);
+ WARN_ON(ret < 0);
+ return ret;
+}
+
+static inline bool page_pool_is_last_frag(struct page_pool *pool,
+ struct page *page)
+{
+ /* If fragments aren't enabled or count is 0 we were the last user */
+ return !(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
+ (page_pool_defrag_page(page, 1) == 0);
+}
+
+/**
+ * page_pool_put_page() - release a reference to a page pool page
+ * @pool: pool from which page was allocated
+ * @page: page to release a reference on
+ * @dma_sync_size: how much of the page may have been touched by the device
+ * @allow_direct: released by the consumer, allow lockless caching
+ *
+ * The outcome of this depends on the page refcnt. If the driver bumps
+ * the refcnt > 1 this will unmap the page. If the page refcnt is 1
+ * the allocator owns the page and will try to recycle it in one of the pool
+ * caches. If PP_FLAG_DMA_SYNC_DEV is set, the page will be synced for_device
+ * using dma_sync_single_range_for_device().
+ */
+static inline void page_pool_put_page(struct page_pool *pool,
+ struct page *page,
+ unsigned int dma_sync_size,
+ bool allow_direct)
+{
+ /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
+ * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
+ */
+#ifdef CONFIG_PAGE_POOL
+ if (!page_pool_is_last_frag(pool, page))
+ return;
+
+ page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct);
+#endif
+}
+
+/**
+ * page_pool_put_full_page() - release a reference on a page pool page
+ * @pool: pool from which page was allocated
+ * @page: page to release a reference on
+ * @allow_direct: released by the consumer, allow lockless caching
+ *
+ * Similar to page_pool_put_page(), but will DMA sync the entire memory area
+ * as configured in &page_pool_params.max_len.
+ */
+static inline void page_pool_put_full_page(struct page_pool *pool,
+ struct page *page, bool allow_direct)
+{
+ page_pool_put_page(pool, page, -1, allow_direct);
+}
+
+/**
+ * page_pool_recycle_direct() - release a reference on a page pool page
+ * @pool: pool from which page was allocated
+ * @page: page to release a reference on
+ *
+ * Similar to page_pool_put_full_page() but caller must guarantee safe context
+ * (e.g NAPI), since it will recycle the page directly into the pool fast cache.
+ */
+static inline void page_pool_recycle_direct(struct page_pool *pool,
+ struct page *page)
+{
+ page_pool_put_full_page(pool, page, true);
+}
+
+#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \
+ (sizeof(dma_addr_t) > sizeof(unsigned long))
+
+/**
+ * page_pool_get_dma_addr() - Retrieve the stored DMA address.
+ * @page: page allocated from a page pool
+ *
+ * Fetch the DMA address of the page. The page pool to which the page belongs
+ * must had been created with PP_FLAG_DMA_MAP.
+ */
+static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
+{
+ dma_addr_t ret = page->dma_addr;
+
+ if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+ ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
+
+ return ret;
+}
+
+static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+{
+ page->dma_addr = addr;
+ if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+ page->dma_addr_upper = upper_32_bits(addr);
+}
+
+static inline bool page_pool_put(struct page_pool *pool)
+{
+ return refcount_dec_and_test(&pool->user_cnt);
+}
+
+static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
+{
+ if (unlikely(pool->p.nid != new_nid))
+ page_pool_update_nid(pool, new_nid);
+}
+
+#endif /* _NET_PAGE_POOL_HELPERS_H */
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
new file mode 100644
index 000000000000..887e7946a597
--- /dev/null
+++ b/include/net/page_pool/types.h
@@ -0,0 +1,236 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _NET_PAGE_POOL_TYPES_H
+#define _NET_PAGE_POOL_TYPES_H
+
+#include <linux/dma-direction.h>
+#include <linux/ptr_ring.h>
+
+#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA
+ * map/unmap
+ */
+#define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets
+ * from page_pool will be
+ * DMA-synced-for-device according to
+ * the length provided by the device
+ * driver.
+ * Please note DMA-sync-for-CPU is still
+ * device driver responsibility
+ */
+#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */
+#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\
+ PP_FLAG_DMA_SYNC_DEV |\
+ PP_FLAG_PAGE_FRAG)
+
+/*
+ * Fast allocation side cache array/stack
+ *
+ * The cache size and refill watermark is related to the network
+ * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX
+ * ring is usually refilled and the max consumed elements will be 64,
+ * thus a natural max size of objects needed in the cache.
+ *
+ * Keeping room for more objects, is due to XDP_DROP use-case. As
+ * XDP_DROP allows the opportunity to recycle objects directly into
+ * this array, as it shares the same softirq/NAPI protection. If
+ * cache is already full (or partly full) then the XDP_DROP recycles
+ * would have to take a slower code path.
+ */
+#define PP_ALLOC_CACHE_SIZE 128
+#define PP_ALLOC_CACHE_REFILL 64
+struct pp_alloc_cache {
+ u32 count;
+ struct page *cache[PP_ALLOC_CACHE_SIZE];
+};
+
+/**
+ * struct page_pool_params - page pool parameters
+ * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_PAGE_FRAG
+ * @order: 2^order pages on allocation
+ * @pool_size: size of the ptr_ring
+ * @nid: NUMA node id to allocate from pages from
+ * @dev: device, for DMA pre-mapping purposes
+ * @napi: NAPI which is the sole consumer of pages, otherwise NULL
+ * @dma_dir: DMA mapping direction
+ * @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV
+ * @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
+ */
+struct page_pool_params {
+ unsigned int flags;
+ unsigned int order;
+ unsigned int pool_size;
+ int nid;
+ struct device *dev;
+ struct napi_struct *napi;
+ enum dma_data_direction dma_dir;
+ unsigned int max_len;
+ unsigned int offset;
+/* private: used by test code only */
+ void (*init_callback)(struct page *page, void *arg);
+ void *init_arg;
+};
+
+#ifdef CONFIG_PAGE_POOL_STATS
+/**
+ * struct page_pool_alloc_stats - allocation statistics
+ * @fast: successful fast path allocations
+ * @slow: slow path order-0 allocations
+ * @slow_high_order: slow path high order allocations
+ * @empty: ptr ring is empty, so a slow path allocation was forced
+ * @refill: an allocation which triggered a refill of the cache
+ * @waive: pages obtained from the ptr ring that cannot be added to
+ * the cache due to a NUMA mismatch
+ */
+struct page_pool_alloc_stats {
+ u64 fast;
+ u64 slow;
+ u64 slow_high_order;
+ u64 empty;
+ u64 refill;
+ u64 waive;
+};
+
+/**
+ * struct page_pool_recycle_stats - recycling (freeing) statistics
+ * @cached: recycling placed page in the page pool cache
+ * @cache_full: page pool cache was full
+ * @ring: page placed into the ptr ring
+ * @ring_full: page released from page pool because the ptr ring was full
+ * @released_refcnt: page released (and not recycled) because refcnt > 1
+ */
+struct page_pool_recycle_stats {
+ u64 cached;
+ u64 cache_full;
+ u64 ring;
+ u64 ring_full;
+ u64 released_refcnt;
+};
+
+/**
+ * struct page_pool_stats - combined page pool use statistics
+ * @alloc_stats: see struct page_pool_alloc_stats
+ * @recycle_stats: see struct page_pool_recycle_stats
+ *
+ * Wrapper struct for combining page pool stats with different storage
+ * requirements.
+ */
+struct page_pool_stats {
+ struct page_pool_alloc_stats alloc_stats;
+ struct page_pool_recycle_stats recycle_stats;
+};
+#endif
+
+struct page_pool {
+ struct page_pool_params p;
+
+ long frag_users;
+ struct page *frag_page;
+ unsigned int frag_offset;
+ u32 pages_state_hold_cnt;
+
+ struct delayed_work release_dw;
+ void (*disconnect)(void *pool);
+ unsigned long defer_start;
+ unsigned long defer_warn;
+
+#ifdef CONFIG_PAGE_POOL_STATS
+ /* these stats are incremented while in softirq context */
+ struct page_pool_alloc_stats alloc_stats;
+#endif
+ u32 xdp_mem_id;
+
+ /*
+ * Data structure for allocation side
+ *
+ * Drivers allocation side usually already perform some kind
+ * of resource protection. Piggyback on this protection, and
+ * require driver to protect allocation side.
+ *
+ * For NIC drivers this means, allocate a page_pool per
+ * RX-queue. As the RX-queue is already protected by
+ * Softirq/BH scheduling and napi_schedule. NAPI schedule
+ * guarantee that a single napi_struct will only be scheduled
+ * on a single CPU (see napi_schedule).
+ */
+ struct pp_alloc_cache alloc ____cacheline_aligned_in_smp;
+
+ /* Data structure for storing recycled pages.
+ *
+ * Returning/freeing pages is more complicated synchronization
+ * wise, because free's can happen on remote CPUs, with no
+ * association with allocation resource.
+ *
+ * Use ptr_ring, as it separates consumer and producer
+ * efficiently, it a way that doesn't bounce cache-lines.
+ *
+ * TODO: Implement bulk return pages into this structure.
+ */
+ struct ptr_ring ring;
+
+#ifdef CONFIG_PAGE_POOL_STATS
+ /* recycle stats are per-cpu to avoid locking */
+ struct page_pool_recycle_stats __percpu *recycle_stats;
+#endif
+ atomic_t pages_state_release_cnt;
+
+ /* A page_pool is strictly tied to a single RX-queue being
+ * protected by NAPI, due to above pp_alloc_cache. This
+ * refcnt serves purpose is to simplify drivers error handling.
+ */
+ refcount_t user_cnt;
+
+ u64 destroy_cnt;
+};
+
+struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
+struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
+ unsigned int size, gfp_t gfp);
+struct page_pool *page_pool_create(const struct page_pool_params *params);
+
+struct xdp_mem_info;
+
+#ifdef CONFIG_PAGE_POOL
+void page_pool_unlink_napi(struct page_pool *pool);
+void page_pool_destroy(struct page_pool *pool);
+void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
+ struct xdp_mem_info *mem);
+void page_pool_put_page_bulk(struct page_pool *pool, void **data,
+ int count);
+#else
+static inline void page_pool_unlink_napi(struct page_pool *pool)
+{
+}
+
+static inline void page_pool_destroy(struct page_pool *pool)
+{
+}
+
+static inline void page_pool_use_xdp_mem(struct page_pool *pool,
+ void (*disconnect)(void *),
+ struct xdp_mem_info *mem)
+{
+}
+
+static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
+ int count)
+{
+}
+#endif
+
+void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
+ unsigned int dma_sync_size,
+ bool allow_direct);
+
+static inline bool is_page_pool_compiled_in(void)
+{
+#ifdef CONFIG_PAGE_POOL
+ return true;
+#else
+ return false;
+#endif
+}
+
+/* Caller must provide appropriate safe context, e.g. NAPI. */
+void page_pool_update_nid(struct page_pool *pool, int new_nid);
+
+#endif /* _NET_PAGE_POOL_H */
diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h
index 862f1719b523..cf5ecae4a2fc 100644
--- a/include/net/phonet/phonet.h
+++ b/include/net/phonet/phonet.h
@@ -109,4 +109,25 @@ void phonet_sysctl_exit(void);
int isi_register(void);
void isi_unregister(void);
+static inline bool sk_is_phonet(struct sock *sk)
+{
+ return sk->sk_family == PF_PHONET;
+}
+
+static inline int phonet_sk_ioctl(struct sock *sk, unsigned int cmd,
+ void __user *arg)
+{
+ int karg;
+
+ switch (cmd) {
+ case SIOCPNADDRESOURCE:
+ case SIOCPNDELRESOURCE:
+ if (get_user(karg, (int __user *)arg))
+ return -EFAULT;
+
+ return sk->sk_prot->ioctl(sk, cmd, &karg);
+ }
+ /* A positive return value means that the ioctl was not processed */
+ return 1;
+}
#endif
diff --git a/include/net/pie.h b/include/net/pie.h
index 3fe2361e03b4..01cbc66825a4 100644
--- a/include/net/pie.h
+++ b/include/net/pie.h
@@ -17,7 +17,7 @@
/**
* struct pie_params - contains pie parameters
* @target: target delay in pschedtime
- * @tudpate: interval at which drop probability is calculated
+ * @tupdate: interval at which drop probability is calculated
* @limit: total number of packets that can be in the queue
* @alpha: parameter to control drop probability
* @beta: parameter to control drop probability
diff --git a/include/net/ping.h b/include/net/ping.h
index 9233ad3de0ad..bc7779262e60 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -16,11 +16,7 @@
#define PING_HTABLE_SIZE 64
#define PING_HTABLE_MASK (PING_HTABLE_SIZE-1)
-/*
- * gid_t is either uint or ushort. We want to pass it to
- * proc_dointvec_minmax(), so it must not be larger than MAX_INT
- */
-#define GID_T_MAX (((gid_t)~0U) >> 1)
+#define GID_T_MAX (((gid_t)~0U) - 1)
/* Compatibility glue so we can support IPv6 when it's compiled as a module */
struct pingv6_ops {
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index b3b5b0b62f16..f308e8268651 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -138,19 +138,6 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
return NULL;
}
-static inline
-int tc_setup_cb_block_register(struct tcf_block *block, flow_setup_cb_t *cb,
- void *cb_priv)
-{
- return 0;
-}
-
-static inline
-void tc_setup_cb_block_unregister(struct tcf_block *block, flow_setup_cb_t *cb,
- void *cb_priv)
-{
-}
-
static inline int tcf_classify(struct sk_buff *skb,
const struct tcf_block *block,
const struct tcf_proto *tp,
@@ -866,8 +853,10 @@ struct tc_htb_qopt_offload {
u32 parent_classid;
u16 classid;
u16 qid;
+ u32 quantum;
u64 rate;
u64 ceil;
+ u8 prio;
};
#define TC_HTB_CLASSID_ROOT U32_MAX
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 2016839991a4..15960564e0c3 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -64,7 +64,6 @@ static inline psched_time_t psched_get_time(void)
}
struct qdisc_watchdog {
- u64 last_expires;
struct hrtimer timer;
struct Qdisc *qdisc;
};
@@ -128,12 +127,14 @@ static inline void qdisc_run(struct Qdisc *q)
}
}
+extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
+
/* Calculate maximal size of packet seen by hard_start_xmit
routine of this device.
*/
static inline unsigned int psched_mtu(const struct net_device *dev)
{
- return dev->mtu + dev->hard_header_len;
+ return READ_ONCE(dev->mtu) + dev->hard_header_len;
}
static inline struct net *qdisc_net(struct Qdisc *q)
@@ -167,11 +168,13 @@ struct tc_mqprio_caps {
struct tc_mqprio_qopt_offload {
/* struct tc_mqprio_qopt must always be the first element */
struct tc_mqprio_qopt qopt;
+ struct netlink_ext_ack *extack;
u16 mode;
u16 shaper;
u32 flags;
u64 min_rate[TC_QOPT_MAX_QUEUE];
u64 max_rate[TC_QOPT_MAX_QUEUE];
+ unsigned long preemptible_tcs;
};
struct tc_taprio_caps {
@@ -184,6 +187,32 @@ struct tc_taprio_caps {
bool broken_mqprio:1;
};
+enum tc_taprio_qopt_cmd {
+ TAPRIO_CMD_REPLACE,
+ TAPRIO_CMD_DESTROY,
+ TAPRIO_CMD_STATS,
+ TAPRIO_CMD_QUEUE_STATS,
+};
+
+/**
+ * struct tc_taprio_qopt_stats - IEEE 802.1Qbv statistics
+ * @window_drops: Frames that were dropped because they were too large to be
+ * transmitted in any of the allotted time windows (open gates) for their
+ * traffic class.
+ * @tx_overruns: Frames still being transmitted by the MAC after the
+ * transmission gate associated with their traffic class has closed.
+ * Equivalent to `12.29.1.1.2 TransmissionOverrun` from 802.1Q-2018.
+ */
+struct tc_taprio_qopt_stats {
+ u64 window_drops;
+ u64 tx_overruns;
+};
+
+struct tc_taprio_qopt_queue_stats {
+ int queue;
+ struct tc_taprio_qopt_stats stats;
+};
+
struct tc_taprio_sched_entry {
u8 command; /* TC_TAPRIO_CMD_* */
@@ -193,15 +222,26 @@ struct tc_taprio_sched_entry {
};
struct tc_taprio_qopt_offload {
- struct tc_mqprio_qopt_offload mqprio;
- u8 enable;
- ktime_t base_time;
- u64 cycle_time;
- u64 cycle_time_extension;
- u32 max_sdu[TC_MAX_QUEUE];
-
- size_t num_entries;
- struct tc_taprio_sched_entry entries[];
+ enum tc_taprio_qopt_cmd cmd;
+
+ union {
+ /* TAPRIO_CMD_STATS */
+ struct tc_taprio_qopt_stats stats;
+ /* TAPRIO_CMD_QUEUE_STATS */
+ struct tc_taprio_qopt_queue_stats queue_stats;
+ /* TAPRIO_CMD_REPLACE */
+ struct {
+ struct tc_mqprio_qopt_offload mqprio;
+ struct netlink_ext_ack *extack;
+ ktime_t base_time;
+ u64 cycle_time;
+ u64 cycle_time_extension;
+ u32 max_sdu[TC_MAX_QUEUE];
+
+ size_t num_entries;
+ struct tc_taprio_sched_entry entries[];
+ };
+ };
};
#if IS_ENABLED(CONFIG_NET_SCH_TAPRIO)
diff --git a/include/net/raw.h b/include/net/raw.h
index 3af5289fdead..32a61481a253 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -22,7 +22,7 @@
extern struct proto raw_prot;
extern struct raw_hashinfo raw_v4_hashinfo;
-bool raw_v4_match(struct net *net, struct sock *sk, unsigned short num,
+bool raw_v4_match(struct net *net, const struct sock *sk, unsigned short num,
__be32 raddr, __be32 laddr, int dif, int sdif);
int raw_abort(struct sock *sk, int err);
@@ -83,10 +83,7 @@ struct raw_sock {
u32 ipmr_table;
};
-static inline struct raw_sock *raw_sk(const struct sock *sk)
-{
- return (struct raw_sock *)sk;
-}
+#define raw_sk(ptr) container_of_const(ptr, struct raw_sock, inet.sk)
static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if,
int dif, int sdif)
diff --git a/include/net/rawv6.h b/include/net/rawv6.h
index bc70909625f6..82810cbe3798 100644
--- a/include/net/rawv6.h
+++ b/include/net/rawv6.h
@@ -6,7 +6,7 @@
#include <net/raw.h>
extern struct raw_hashinfo raw_v6_hashinfo;
-bool raw_v6_match(struct net *net, struct sock *sk, unsigned short num,
+bool raw_v6_match(struct net *net, const struct sock *sk, unsigned short num,
const struct in6_addr *loc_addr,
const struct in6_addr *rmt_addr, int dif, int sdif);
diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index 896191f420d5..b2cb4a9eb04d 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -140,17 +140,6 @@ struct regulatory_request {
* otherwise initiating radiation is not allowed. This will enable the
* relaxations enabled under the CFG80211_REG_RELAX_NO_IR configuration
* option
- * @REGULATORY_IGNORE_STALE_KICKOFF: the regulatory core will _not_ make sure
- * all interfaces on this wiphy reside on allowed channels. If this flag
- * is not set, upon a regdomain change, the interfaces are given a grace
- * period (currently 60 seconds) to disconnect or move to an allowed
- * channel. Interfaces on forbidden channels are forcibly disconnected.
- * Currently these types of interfaces are supported for enforcement:
- * NL80211_IFTYPE_ADHOC, NL80211_IFTYPE_STATION, NL80211_IFTYPE_AP,
- * NL80211_IFTYPE_AP_VLAN, NL80211_IFTYPE_MONITOR,
- * NL80211_IFTYPE_P2P_CLIENT, NL80211_IFTYPE_P2P_GO,
- * NL80211_IFTYPE_P2P_DEVICE. The flag will be set by default if a device
- * includes any modes unsupported for enforcement checking.
* @REGULATORY_WIPHY_SELF_MANAGED: for devices that employ wiphy-specific
* regdom management. These devices will ignore all regdom changes not
* originating from their own wiphy.
@@ -177,7 +166,7 @@ enum ieee80211_regulatory_flags {
REGULATORY_COUNTRY_IE_FOLLOW_POWER = BIT(3),
REGULATORY_COUNTRY_IE_IGNORE = BIT(4),
REGULATORY_ENABLE_RELAX_NO_IR = BIT(5),
- REGULATORY_IGNORE_STALE_KICKOFF = BIT(6),
+ /* reuse bit 6 next time */
REGULATORY_WIPHY_SELF_MANAGED = BIT(7),
};
diff --git a/include/net/route.h b/include/net/route.h
index fe00b0a2e475..51a45b1887b5 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -78,9 +78,6 @@ struct rtable {
/* Miscellaneous cached information */
u32 rt_mtu_locked:1,
rt_pmtu:31;
-
- struct list_head rt_uncached;
- struct uncached_list *rt_uncached_list;
};
static inline bool rt_is_input_route(const struct rtable *rt)
@@ -166,12 +163,12 @@ static inline struct rtable *ip_route_output(struct net *net, __be32 daddr,
}
static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi4 *fl4,
- struct sock *sk,
+ const struct sock *sk,
__be32 daddr, __be32 saddr,
__be16 dport, __be16 sport,
__u8 proto, __u8 tos, int oif)
{
- flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos,
+ flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos,
RT_SCOPE_UNIVERSE, proto,
sk ? inet_sk_flowi_flags(sk) : 0,
daddr, saddr, dport, sport, sock_net_uid(net, sk));
@@ -301,10 +298,10 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst,
{
__u8 flow_flags = 0;
- if (inet_sk(sk)->transparent)
+ if (inet_test_bit(TRANSPARENT, sk))
flow_flags |= FLOWI_FLAG_ANYSRC;
- flowi4_init_output(fl4, oif, sk->sk_mark, ip_sock_rt_tos(sk),
+ flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk),
ip_sock_rt_scope(sk), protocol, flow_flags, dst,
src, dport, sport, sk->sk_uid);
}
@@ -312,7 +309,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst,
static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst,
__be32 src, int oif, u8 protocol,
__be16 sport, __be16 dport,
- struct sock *sk)
+ const struct sock *sk)
{
struct net *net = sock_net(sk);
struct rtable *rt;
@@ -324,8 +321,7 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst,
if (IS_ERR(rt))
return rt;
ip_rt_put(rt);
- flowi4_update_output(fl4, oif, fl4->flowi4_tos, fl4->daddr,
- fl4->saddr);
+ flowi4_update_output(fl4, oif, fl4->daddr, fl4->saddr);
}
security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
return ip_route_output_flow(net, fl4, sk);
@@ -334,14 +330,13 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst,
static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable *rt,
__be16 orig_sport, __be16 orig_dport,
__be16 sport, __be16 dport,
- struct sock *sk)
+ const struct sock *sk)
{
if (sport != orig_sport || dport != orig_dport) {
fl4->fl4_dport = dport;
fl4->fl4_sport = sport;
ip_rt_put(rt);
- flowi4_update_output(fl4, sk->sk_bound_dev_if,
- RT_CONN_FLAGS(sk), fl4->daddr,
+ flowi4_update_output(fl4, sk->sk_bound_dev_if, fl4->daddr,
fl4->saddr);
security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
return ip_route_output_flow(sock_net(sk), fl4, sk);
diff --git a/include/net/rpl.h b/include/net/rpl.h
index 308ef0a05cae..74734191c458 100644
--- a/include/net/rpl.h
+++ b/include/net/rpl.h
@@ -23,12 +23,6 @@ static inline int rpl_init(void)
static inline void rpl_exit(void) {}
#endif
-/* Worst decompression memory usage ipv6 address (16) + pad 7 */
-#define IPV6_RPL_SRH_WORST_SWAP_SIZE (sizeof(struct in6_addr) + 7)
-
-size_t ipv6_rpl_srh_size(unsigned char n, unsigned char cmpri,
- unsigned char cmpre);
-
void ipv6_rpl_srh_decompress(struct ipv6_rpl_sr_hdr *outhdr,
const struct ipv6_rpl_sr_hdr *inhdr,
const struct in6_addr *daddr, unsigned char n);
diff --git a/include/net/rsi_91x.h b/include/net/rsi_91x.h
index 040f07b47f1f..b2283762e446 100644
--- a/include/net/rsi_91x.h
+++ b/include/net/rsi_91x.h
@@ -1,4 +1,4 @@
-/**
+/*
* Copyright (c) 2017 Redpine Signals Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index d9076a7a430c..6506221c5fe3 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -190,8 +190,8 @@ int rtnl_delete_link(struct net_device *dev, u32 portid, const struct nlmsghdr *
int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm,
u32 portid, const struct nlmsghdr *nlh);
-int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
- struct netlink_ext_ack *exterr);
+int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer,
+ struct netlink_ext_ack *exterr);
struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid);
#define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index fab5ba3e61b7..f232512505f8 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -137,6 +137,13 @@ static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
refcount_inc(&qdisc->refcnt);
}
+static inline bool qdisc_refcount_dec_if_one(struct Qdisc *qdisc)
+{
+ if (qdisc->flags & TCQ_F_BUILTIN)
+ return true;
+ return refcount_dec_if_one(&qdisc->refcnt);
+}
+
/* Intended to be used by unlocked users, when concurrent qdisc release is
* possible.
*/
@@ -545,7 +552,7 @@ static inline struct Qdisc *qdisc_root_bh(const struct Qdisc *qdisc)
static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc)
{
- return qdisc->dev_queue->qdisc_sleeping;
+ return rcu_dereference_rtnl(qdisc->dev_queue->qdisc_sleeping);
}
static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
@@ -592,6 +599,7 @@ get_default_qdisc_ops(const struct net_device *dev, int ntx)
struct Qdisc_class_common {
u32 classid;
+ unsigned int filter_cnt;
struct hlist_node hnode;
};
@@ -626,6 +634,31 @@ qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id)
return NULL;
}
+static inline bool qdisc_class_in_use(const struct Qdisc_class_common *cl)
+{
+ return cl->filter_cnt > 0;
+}
+
+static inline void qdisc_class_get(struct Qdisc_class_common *cl)
+{
+ unsigned int res;
+
+ if (check_add_overflow(cl->filter_cnt, 1, &res))
+ WARN(1, "Qdisc class overflow");
+
+ cl->filter_cnt = res;
+}
+
+static inline void qdisc_class_put(struct Qdisc_class_common *cl)
+{
+ unsigned int res;
+
+ if (check_sub_overflow(cl->filter_cnt, 1, &res))
+ WARN(1, "Qdisc class underflow");
+
+ cl->filter_cnt = res;
+}
+
static inline int tc_classid_to_hwtc(struct net_device *dev, u32 classid)
{
u32 hwtc = TC_H_MIN(classid) - TC_H_MIN_PRIORITY;
@@ -652,6 +685,7 @@ void dev_deactivate_many(struct list_head *head);
struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
struct Qdisc *qdisc);
void qdisc_reset(struct Qdisc *qdisc);
+void qdisc_destroy(struct Qdisc *qdisc);
void qdisc_put(struct Qdisc *qdisc);
void qdisc_put_unlocked(struct Qdisc *qdisc);
void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len);
@@ -695,7 +729,7 @@ int skb_do_redirect(struct sk_buff *);
static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
{
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_XGRESS
return skb->tc_at_ingress;
#else
return false;
@@ -754,7 +788,9 @@ static inline bool qdisc_tx_changing(const struct net_device *dev)
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
- if (rcu_access_pointer(txq->qdisc) != txq->qdisc_sleeping)
+
+ if (rcu_access_pointer(txq->qdisc) !=
+ rcu_access_pointer(txq->qdisc_sleeping))
return true;
}
return false;
@@ -1180,20 +1216,6 @@ static inline int qdisc_drop_all(struct sk_buff *skb, struct Qdisc *sch,
return NET_XMIT_DROP;
}
-/* Length to Time (L2T) lookup in a qdisc_rate_table, to determine how
- long it will take to send a packet given its size.
- */
-static inline u32 qdisc_l2t(struct qdisc_rate_table* rtab, unsigned int pktlen)
-{
- int slot = pktlen + rtab->rate.cell_align + rtab->rate.overhead;
- if (slot < 0)
- slot = 0;
- slot >>= rtab->rate.cell_log;
- if (slot > 255)
- return rtab->data[255]*(slot >> 8) + rtab->data[slot & 0xFF];
- return rtab->data[slot];
-}
-
struct psched_ratecfg {
u64 rate_bytes_ps; /* bytes per second */
u32 mult;
diff --git a/include/net/scm.h b/include/net/scm.h
index 1ce365f4c256..c5bcdf65f55c 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -105,19 +105,64 @@ static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct sc
}
}
}
+
+static inline bool scm_has_secdata(struct socket *sock)
+{
+ return test_bit(SOCK_PASSSEC, &sock->flags);
+}
#else
static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm)
{ }
+
+static inline bool scm_has_secdata(struct socket *sock)
+{
+ return false;
+}
#endif /* CONFIG_SECURITY_NETWORK */
-static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg,
- struct scm_cookie *scm, int flags)
+static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm)
+{
+ struct file *pidfd_file = NULL;
+ int pidfd;
+
+ /*
+ * put_cmsg() doesn't return an error if CMSG is truncated,
+ * that's why we need to opencode these checks here.
+ */
+ if ((msg->msg_controllen <= sizeof(struct cmsghdr)) ||
+ (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) {
+ msg->msg_flags |= MSG_CTRUNC;
+ return;
+ }
+
+ if (!scm->pid)
+ return;
+
+ pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file);
+
+ if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) {
+ if (pidfd_file) {
+ put_unused_fd(pidfd);
+ fput(pidfd_file);
+ }
+
+ return;
+ }
+
+ if (pidfd_file)
+ fd_install(pidfd, pidfd_file);
+}
+
+static inline bool __scm_recv_common(struct socket *sock, struct msghdr *msg,
+ struct scm_cookie *scm, int flags)
{
if (!msg->msg_control) {
- if (test_bit(SOCK_PASSCRED, &sock->flags) || scm->fp)
+ if (test_bit(SOCK_PASSCRED, &sock->flags) ||
+ test_bit(SOCK_PASSPIDFD, &sock->flags) ||
+ scm->fp || scm_has_secdata(sock))
msg->msg_flags |= MSG_CTRUNC;
scm_destroy(scm);
- return;
+ return false;
}
if (test_bit(SOCK_PASSCRED, &sock->flags)) {
@@ -130,16 +175,34 @@ static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg,
put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(ucreds), &ucreds);
}
- scm_destroy_cred(scm);
-
scm_passec(sock, msg, scm);
- if (!scm->fp)
+ if (scm->fp)
+ scm_detach_fds(msg, scm);
+
+ return true;
+}
+
+static inline void scm_recv(struct socket *sock, struct msghdr *msg,
+ struct scm_cookie *scm, int flags)
+{
+ if (!__scm_recv_common(sock, msg, scm, flags))
return;
-
- scm_detach_fds(msg, scm);
+
+ scm_destroy_cred(scm);
}
+static inline void scm_recv_unix(struct socket *sock, struct msghdr *msg,
+ struct scm_cookie *scm, int flags)
+{
+ if (!__scm_recv_common(sock, msg, scm, flags))
+ return;
+
+ if (test_bit(SOCK_PASSPIDFD, &sock->flags))
+ scm_pidfd_recv(msg, scm);
+
+ scm_destroy_cred(scm);
+}
#endif /* __LINUX_NET_SCM_H */
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index c335dd01a597..a2310fa995f6 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -148,8 +148,6 @@ void sctp_icmp_redirect(struct sock *, struct sctp_transport *,
void sctp_icmp_proto_unreachable(struct sock *sk,
struct sctp_association *asoc,
struct sctp_transport *t);
-void sctp_backlog_migrate(struct sctp_association *assoc,
- struct sock *oldsk, struct sock *newsk);
int sctp_transport_hashtable_init(void);
void sctp_transport_hashtable_destroy(void);
int sctp_hash_transport(struct sctp_transport *t);
@@ -425,11 +423,11 @@ static inline bool sctp_chunk_pending(const struct sctp_chunk *chunk)
* the chunk length to indicate when to stop. Make sure
* there is room for a param header too.
*/
-#define sctp_walk_params(pos, chunk, member)\
-_sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member)
+#define sctp_walk_params(pos, chunk)\
+_sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length))
-#define _sctp_walk_params(pos, chunk, end, member)\
-for (pos.v = chunk->member;\
+#define _sctp_walk_params(pos, chunk, end)\
+for (pos.v = (u8 *)(chunk + 1);\
(pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <=\
(void *)chunk + end) &&\
pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
@@ -452,8 +450,8 @@ for (err = (struct sctp_errhdr *)((void *)chunk_hdr + \
_sctp_walk_fwdtsn((pos), (chunk), ntohs((chunk)->chunk_hdr->length) - sizeof(struct sctp_fwdtsn_chunk))
#define _sctp_walk_fwdtsn(pos, chunk, end)\
-for (pos = chunk->subh.fwdtsn_hdr->skip;\
- (void *)pos <= (void *)chunk->subh.fwdtsn_hdr->skip + end - sizeof(struct sctp_fwdtsn_skip);\
+for (pos = (void *)(chunk->subh.fwdtsn_hdr + 1);\
+ (void *)pos <= (void *)(chunk->subh.fwdtsn_hdr + 1) + end - sizeof(struct sctp_fwdtsn_skip);\
pos++)
/* External references. */
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index f37c7a558d6d..64c42bd56bb2 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -156,7 +156,6 @@ sctp_state_fn_t sctp_sf_do_6_2_sack;
sctp_state_fn_t sctp_sf_autoclose_timer_expire;
/* Prototypes for utility support functions. */
-__u8 sctp_get_chunk_type(struct sctp_chunk *chunk);
const struct sctp_sm_table_entry *sctp_sm_lookup_event(
struct net *net,
enum sctp_event_type event_type,
@@ -166,8 +165,6 @@ int sctp_chunk_iif(const struct sctp_chunk *);
struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *,
struct sctp_chunk *,
gfp_t gfp);
-__u32 sctp_generate_verification_tag(void);
-void sctp_populate_tie_tags(__u8 *cookie, __u32 curTag, __u32 hisTag);
/* Prototypes for chunk-building functions. */
struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h
index fa00dc20a0d7..572d73fdcd5e 100644
--- a/include/net/sctp/stream_sched.h
+++ b/include/net/sctp/stream_sched.h
@@ -58,5 +58,7 @@ void sctp_sched_ops_register(enum sctp_sched_type sched,
struct sctp_sched_ops *sched_ops);
void sctp_sched_ops_prio_init(void);
void sctp_sched_ops_rr_init(void);
+void sctp_sched_ops_fc_init(void);
+void sctp_sched_ops_wfq_init(void);
#endif /* __sctp_stream_sched_h__ */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index e1f6e7fc2b11..5a24d6d8522a 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -332,7 +332,7 @@ struct sctp_cookie {
* the association TCB is re-constructed from the cookie.
*/
__u32 raw_addr_list_len;
- struct sctp_init_chunk peer_init[];
+ /* struct sctp_init_chunk peer_init[]; */
};
@@ -1122,8 +1122,6 @@ void sctp_outq_free(struct sctp_outq*);
void sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk, gfp_t);
int sctp_outq_sack(struct sctp_outq *, struct sctp_chunk *);
int sctp_outq_is_empty(const struct sctp_outq *);
-void sctp_outq_restart(struct sctp_outq *);
-
void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
enum sctp_retransmit_reason reason);
void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8);
@@ -1429,6 +1427,11 @@ struct sctp_stream_out_ext {
struct {
struct list_head rr_list;
};
+ struct {
+ struct list_head fc_list;
+ __u32 fc_length;
+ __u16 fc_weight;
+ };
};
};
@@ -1475,6 +1478,9 @@ struct sctp_stream {
/* The next stream in line */
struct sctp_stream_out_ext *rr_next;
};
+ struct {
+ struct list_head fc_list;
+ };
};
struct sctp_stream_interleave *si;
};
@@ -1703,7 +1709,6 @@ struct sctp_association {
__u16 ecn_capable:1, /* Can peer do ECN? */
ipv4_address:1, /* Peer understands IPv4 addresses? */
ipv6_address:1, /* Peer understands IPv6 addresses? */
- hostname_address:1, /* Peer understands DNS addresses? */
asconf_capable:1, /* Does peer support ADDIP? */
prsctp_capable:1, /* Can peer do PR-SCTP? */
reconf_capable:1, /* Can peer do RE-CONFIG? */
diff --git a/include/net/smc.h b/include/net/smc.h
index 597cb9381182..a002552be29c 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -67,6 +67,7 @@ struct smcd_ops {
int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx,
bool sf, unsigned int offset, void *data,
unsigned int size);
+ int (*supports_v2)(void);
u8* (*get_system_eid)(void);
u64 (*get_local_gid)(struct smcd_dev *dev);
u16 (*get_chid)(struct smcd_dev *dev);
diff --git a/include/net/sock.h b/include/net/sock.h
index 573f2bf7e0de..11d503417591 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -336,6 +336,7 @@ struct sk_filter;
* @sk_cgrp_data: cgroup data for this cgroup
* @sk_memcg: this socket's memory cgroup association
* @sk_write_pending: a write to stream socket waits to start
+ * @sk_wait_pending: number of threads blocked on this socket
* @sk_state_change: callback to indicate change in the state of the sock
* @sk_data_ready: callback to indicate there is data to be processed
* @sk_write_space: callback to indicate there is bf sending space available
@@ -428,6 +429,7 @@ struct sock {
unsigned int sk_napi_id;
#endif
int sk_rcvbuf;
+ int sk_wait_pending;
struct sk_filter __rcu *sk_filter;
union {
@@ -1150,8 +1152,12 @@ static inline void sock_rps_record_flow(const struct sock *sk)
* OR an additional socket flag
* [1] : sk_state and sk_prot are in the same cache line.
*/
- if (sk->sk_state == TCP_ESTABLISHED)
- sock_rps_record_flow_hash(sk->sk_rxhash);
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ /* This READ_ONCE() is paired with the WRITE_ONCE()
+ * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
+ */
+ sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
+ }
}
#endif
}
@@ -1160,20 +1166,25 @@ static inline void sock_rps_save_rxhash(struct sock *sk,
const struct sk_buff *skb)
{
#ifdef CONFIG_RPS
- if (unlikely(sk->sk_rxhash != skb->hash))
- sk->sk_rxhash = skb->hash;
+ /* The following WRITE_ONCE() is paired with the READ_ONCE()
+ * here, and another one in sock_rps_record_flow().
+ */
+ if (unlikely(READ_ONCE(sk->sk_rxhash) != skb->hash))
+ WRITE_ONCE(sk->sk_rxhash, skb->hash);
#endif
}
static inline void sock_rps_reset_rxhash(struct sock *sk)
{
#ifdef CONFIG_RPS
- sk->sk_rxhash = 0;
+ /* Paired with READ_ONCE() in sock_rps_record_flow() */
+ WRITE_ONCE(sk->sk_rxhash, 0);
#endif
}
#define sk_wait_event(__sk, __timeo, __condition, __wait) \
({ int __rc; \
+ __sk->sk_wait_pending++; \
release_sock(__sk); \
__rc = __condition; \
if (!__rc) { \
@@ -1183,6 +1194,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
} \
sched_annotate_sleep(); \
lock_sock(__sk); \
+ __sk->sk_wait_pending--; \
__rc = __condition; \
__rc; \
})
@@ -1246,7 +1258,7 @@ struct proto {
bool kern);
int (*ioctl)(struct sock *sk, int cmd,
- unsigned long arg);
+ int *karg);
int (*init)(struct sock *sk);
void (*destroy)(struct sock *sk);
void (*shutdown)(struct sock *sk, int how);
@@ -1265,8 +1277,7 @@ struct proto {
size_t len);
int (*recvmsg)(struct sock *sk, struct msghdr *msg,
size_t len, int flags, int *addr_len);
- int (*sendpage)(struct sock *sk, struct page *page,
- int offset, size_t size, int flags);
+ void (*splice_eof)(struct socket *sock);
int (*bind)(struct sock *sk,
struct sockaddr *addr, int addr_len);
int (*bind_add)(struct sock *sk,
@@ -1312,6 +1323,7 @@ struct proto {
/*
* Pressure flag: try to collapse.
* Technical note: it is used by multiple contexts non atomically.
+ * Make sure to use READ_ONCE()/WRITE_ONCE() for all reads/writes.
* All the __sk_mem_schedule() is of this nature: accounting
* is strict, actions are advisory and have some latency.
*/
@@ -1328,6 +1340,7 @@ struct proto {
struct kmem_cache *slab;
unsigned int obj_size;
+ unsigned int ipv6_pinfo_offset;
slab_flags_t slab_flags;
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
@@ -1409,6 +1422,12 @@ static inline bool sk_has_memory_pressure(const struct sock *sk)
return sk->sk_prot->memory_pressure != NULL;
}
+static inline bool sk_under_global_memory_pressure(const struct sock *sk)
+{
+ return sk->sk_prot->memory_pressure &&
+ !!READ_ONCE(*sk->sk_prot->memory_pressure);
+}
+
static inline bool sk_under_memory_pressure(const struct sock *sk)
{
if (!sk->sk_prot->memory_pressure)
@@ -1418,7 +1437,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
mem_cgroup_under_socket_pressure(sk->sk_memcg))
return true;
- return !!*sk->sk_prot->memory_pressure;
+ return !!READ_ONCE(*sk->sk_prot->memory_pressure);
}
static inline long
@@ -1495,7 +1514,7 @@ proto_memory_pressure(struct proto *prot)
{
if (!prot->memory_pressure)
return false;
- return !!*prot->memory_pressure;
+ return !!READ_ONCE(*prot->memory_pressure);
}
@@ -1906,10 +1925,6 @@ int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len);
int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int);
int sock_no_mmap(struct file *file, struct socket *sock,
struct vm_area_struct *vma);
-ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset,
- size_t size, int flags);
-ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
- int offset, size_t size, int flags);
/*
* Functions to fill in entries in struct proto_ops when a protocol
@@ -2088,6 +2103,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
}
kuid_t sock_i_uid(struct sock *sk);
+unsigned long __sock_i_ino(struct sock *sk);
unsigned long sock_i_ino(struct sock *sk);
static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
@@ -2131,7 +2147,7 @@ sk_dst_get(struct sock *sk)
rcu_read_lock();
dst = rcu_dereference(sk->sk_dst_cache);
- if (dst && !atomic_inc_not_zero(&dst->__refcnt))
+ if (dst && !rcuref_get(&dst->__rcuref))
dst = NULL;
rcu_read_unlock();
return dst;
@@ -2697,7 +2713,7 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
else
sock_write_timestamp(sk, kt);
- if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid)
+ if (sock_flag(sk, SOCK_WIFI_STATUS) && skb_wifi_acked_valid(skb))
__sock_recv_wifi_status(msg, sk, skb);
}
@@ -2718,7 +2734,7 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
__sock_recv_cmsgs(msg, sk, skb);
else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
sock_write_timestamp(sk, skb->tstamp);
- else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP))
+ else if (unlikely(sock_read_timestamp(sk) == SK_DEFAULT_STAMP))
sock_write_timestamp(sk, 0);
}
@@ -2806,20 +2822,23 @@ sk_is_refcounted(struct sock *sk)
* skb_steal_sock - steal a socket from an sk_buff
* @skb: sk_buff to steal the socket from
* @refcounted: is set to true if the socket is reference-counted
+ * @prefetched: is set to true if the socket was assigned from bpf
*/
static inline struct sock *
-skb_steal_sock(struct sk_buff *skb, bool *refcounted)
+skb_steal_sock(struct sk_buff *skb, bool *refcounted, bool *prefetched)
{
if (skb->sk) {
struct sock *sk = skb->sk;
*refcounted = true;
- if (skb_sk_is_prefetched(skb))
+ *prefetched = skb_sk_is_prefetched(skb);
+ if (*prefetched)
*refcounted = sk_is_refcounted(sk);
skb->destructor = NULL;
skb->sk = NULL;
return sk;
}
+ *prefetched = false;
*refcounted = false;
return NULL;
}
@@ -2961,6 +2980,9 @@ int sock_get_timeout(long timeo, void *optval, bool old_timeval);
int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
sockptr_t optval, int optlen, bool old_timeval);
+int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
+ void __user *arg, void *karg, size_t size);
+int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
static inline bool sk_is_readable(struct sock *sk)
{
if (sk->sk_prot->sock_is_readable)
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index ca0312b78294..a43062d4c734 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -201,8 +201,6 @@ struct switchdev_obj_in_state_mrp {
#define SWITCHDEV_OBJ_IN_STATE_MRP(OBJ) \
container_of((OBJ), struct switchdev_obj_in_state_mrp, obj)
-typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj);
-
struct switchdev_brport {
struct net_device *dev;
const void *ctx;
@@ -231,6 +229,7 @@ enum switchdev_notifier_type {
SWITCHDEV_BRPORT_OFFLOADED,
SWITCHDEV_BRPORT_UNOFFLOADED,
+ SWITCHDEV_BRPORT_REPLAY,
};
struct switchdev_notifier_info {
@@ -299,6 +298,11 @@ void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
const void *ctx,
struct notifier_block *atomic_nb,
struct notifier_block *blocking_nb);
+int switchdev_bridge_port_replay(struct net_device *brport_dev,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack);
void switchdev_deferred_process(void);
int switchdev_port_attr_set(struct net_device *dev,
@@ -322,10 +326,6 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
struct switchdev_notifier_info *info,
struct netlink_ext_ack *extack);
-void switchdev_port_fwd_mark_set(struct net_device *dev,
- struct net_device *group_dev,
- bool joining);
-
int switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long event,
const struct switchdev_notifier_fdb_info *fdb_info,
bool (*check_cb)(const struct net_device *dev),
diff --git a/include/net/tcp.h b/include/net/tcp.h
index db9f828e9d1e..91688d0dadcd 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -161,8 +161,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define MAX_TCP_KEEPCNT 127
#define MAX_TCP_SYNCNT 127
-#define TCP_SYNQ_INTERVAL (HZ/5) /* Period of SYNACK timer */
-
#define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)
#define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated
* after this time. It should be equal
@@ -324,25 +322,20 @@ int tcp_v4_early_demux(struct sk_buff *skb);
int tcp_v4_rcv(struct sk_buff *skb);
void tcp_remove_empty_skb(struct sock *sk);
-int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
size_t size, struct ubuf_info *uarg);
-int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
- int flags);
-int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
- size_t size, int flags);
-ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
- size_t size, int flags);
+void tcp_splice_eof(struct socket *sock);
int tcp_send_mss(struct sock *sk, int *size_goal, int flags);
+int tcp_wmem_schedule(struct sock *sk, int copy);
void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle,
int size_goal);
void tcp_release_cb(struct sock *sk);
void tcp_wfree(struct sk_buff *skb);
void tcp_write_timer_handler(struct sock *sk);
void tcp_delack_timer_handler(struct sock *sk);
-int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+int tcp_ioctl(struct sock *sk, int cmd, int *karg);
int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
void tcp_rcv_space_adjust(struct sock *sk);
@@ -352,10 +345,9 @@ void tcp_twsk_purge(struct list_head *net_exit_list, int family);
ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags);
-struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
+struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp,
bool force_schedule);
-void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks);
static inline void tcp_dec_quickack_mode(struct sock *sk,
const unsigned int pkts)
{
@@ -529,7 +521,7 @@ static inline void tcp_synq_overflow(const struct sock *sk)
last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp);
if (!time_between32(now, last_overflow, last_overflow + HZ))
- WRITE_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp, now);
+ WRITE_ONCE(tcp_sk_rw(sk)->rx_opt.ts_recent_stamp, now);
}
/* syncookies: no recent synqueue overflow on this listening socket? */
@@ -611,7 +603,6 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
unsigned int mss_now, gfp_t gfp);
void tcp_send_probe0(struct sock *);
-void tcp_send_partial(struct sock *);
int tcp_write_wakeup(struct sock *, int mib);
void tcp_send_fin(struct sock *sk);
void tcp_send_active_reset(struct sock *sk, gfp_t priority);
@@ -629,9 +620,9 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
void tcp_rearm_rto(struct sock *sk);
void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
void tcp_reset(struct sock *sk, struct sk_buff *skb);
-void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
void tcp_fin(struct sock *sk);
void tcp_check_space(struct sock *sk);
+void tcp_sack_compress_send_ack(struct sock *sk);
/* tcp_timer.c */
void tcp_init_xmit_timers(struct sock *);
@@ -1117,6 +1108,9 @@ struct tcp_congestion_ops {
int tcp_register_congestion_control(struct tcp_congestion_ops *type);
void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
+int tcp_update_congestion_control(struct tcp_congestion_ops *type,
+ struct tcp_congestion_ops *old_type);
+int tcp_validate_congestion_control(struct tcp_congestion_ops *ca);
void tcp_assign_congestion_control(struct sock *sk);
void tcp_init_congestion_control(struct sock *sk);
@@ -1433,13 +1427,39 @@ void tcp_select_initial_window(const struct sock *sk, int __space,
__u32 *window_clamp, int wscale_ok,
__u8 *rcv_wscale, __u32 init_rcv_wnd);
+static inline int __tcp_win_from_space(u8 scaling_ratio, int space)
+{
+ s64 scaled_space = (s64)space * scaling_ratio;
+
+ return scaled_space >> TCP_RMEM_TO_WIN_SCALE;
+}
+
static inline int tcp_win_from_space(const struct sock *sk, int space)
{
- int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale);
+ return __tcp_win_from_space(tcp_sk(sk)->scaling_ratio, space);
+}
+
+/* inverse of __tcp_win_from_space() */
+static inline int __tcp_space_from_win(u8 scaling_ratio, int win)
+{
+ u64 val = (u64)win << TCP_RMEM_TO_WIN_SCALE;
- return tcp_adv_win_scale <= 0 ?
- (space>>(-tcp_adv_win_scale)) :
- space - (space>>tcp_adv_win_scale);
+ do_div(val, scaling_ratio);
+ return val;
+}
+
+static inline int tcp_space_from_win(const struct sock *sk, int win)
+{
+ return __tcp_space_from_win(tcp_sk(sk)->scaling_ratio, win);
+}
+
+static inline void tcp_scaling_ratio_init(struct sock *sk)
+{
+ /* Assume a conservative default of 1200 bytes of payload per 4K page.
+ * This may be adjusted later in tcp_measure_rcv_mss().
+ */
+ tcp_sk(sk)->scaling_ratio = (1200 << TCP_RMEM_TO_WIN_SCALE) /
+ SKB_TRUESIZE(4096);
}
/* Note: caller must be prepared to deal with negative returns */
@@ -1467,6 +1487,8 @@ static inline void tcp_adjust_rcv_ssthresh(struct sock *sk)
}
void tcp_cleanup_rbuf(struct sock *sk, int copied);
+void __tcp_cleanup_rbuf(struct sock *sk, int copied);
+
/* We provision sk_rcvbuf around 200% of sk_rcvlowat.
* If 87.5 % (7/8) of the space has been consumed, we want to override
@@ -1508,25 +1530,38 @@ void tcp_leave_memory_pressure(struct sock *sk);
static inline int keepalive_intvl_when(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
+ int val;
+
+ /* Paired with WRITE_ONCE() in tcp_sock_set_keepintvl()
+ * and do_tcp_setsockopt().
+ */
+ val = READ_ONCE(tp->keepalive_intvl);
- return tp->keepalive_intvl ? :
- READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
+ return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
}
static inline int keepalive_time_when(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
+ int val;
- return tp->keepalive_time ? :
- READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
+ /* Paired with WRITE_ONCE() in tcp_sock_set_keepidle_locked() */
+ val = READ_ONCE(tp->keepalive_time);
+
+ return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
}
static inline int keepalive_probes(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
+ int val;
+
+ /* Paired with WRITE_ONCE() in tcp_sock_set_keepcnt()
+ * and do_tcp_setsockopt().
+ */
+ val = READ_ONCE(tp->keepalive_probes);
- return tp->keepalive_probes ? :
- READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
+ return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
}
static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
@@ -1995,7 +2030,7 @@ static inline bool inet_sk_transparent(const struct sock *sk)
case TCP_NEW_SYN_RECV:
return inet_rsk(inet_reqsk(sk))->no_srccheck;
}
- return inet_sk(sk)->transparent;
+ return inet_test_bit(TRANSPARENT, sk);
}
/* Determines whether this is a thin stream (which may suffer from
@@ -2040,14 +2075,18 @@ INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff))
INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb));
INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff));
INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb));
-int tcp_gro_complete(struct sk_buff *skb);
+void tcp_gro_complete(struct sk_buff *skb);
void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
+ u32 val;
+
+ val = READ_ONCE(tp->notsent_lowat);
+
+ return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
}
bool tcp_stream_memory_free(const struct sock *sk, int wake);
@@ -2318,11 +2357,18 @@ struct sk_msg;
struct sk_psock;
#ifdef CONFIG_BPF_SYSCALL
-struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#endif /* CONFIG_BPF_SYSCALL */
+#ifdef CONFIG_INET
+void tcp_eat_skb(struct sock *sk, struct sk_buff *skb);
+#else
+static inline void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
+{
+}
+#endif
+
int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
struct sk_msg *msg, u32 bytes, int flags);
#endif /* CONFIG_NET_SOCK_MSG */
diff --git a/include/net/tcx.h b/include/net/tcx.h
new file mode 100644
index 000000000000..264f147953ba
--- /dev/null
+++ b/include/net/tcx.h
@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023 Isovalent */
+#ifndef __NET_TCX_H
+#define __NET_TCX_H
+
+#include <linux/bpf.h>
+#include <linux/bpf_mprog.h>
+
+#include <net/sch_generic.h>
+
+struct mini_Qdisc;
+
+struct tcx_entry {
+ struct mini_Qdisc __rcu *miniq;
+ struct bpf_mprog_bundle bundle;
+ bool miniq_active;
+ struct rcu_head rcu;
+};
+
+struct tcx_link {
+ struct bpf_link link;
+ struct net_device *dev;
+ u32 location;
+};
+
+static inline void tcx_set_ingress(struct sk_buff *skb, bool ingress)
+{
+#ifdef CONFIG_NET_XGRESS
+ skb->tc_at_ingress = ingress;
+#endif
+}
+
+#ifdef CONFIG_NET_XGRESS
+static inline struct tcx_entry *tcx_entry(struct bpf_mprog_entry *entry)
+{
+ struct bpf_mprog_bundle *bundle = entry->parent;
+
+ return container_of(bundle, struct tcx_entry, bundle);
+}
+
+static inline struct tcx_link *tcx_link(struct bpf_link *link)
+{
+ return container_of(link, struct tcx_link, link);
+}
+
+static inline const struct tcx_link *tcx_link_const(const struct bpf_link *link)
+{
+ return tcx_link((struct bpf_link *)link);
+}
+
+void tcx_inc(void);
+void tcx_dec(void);
+
+static inline void tcx_entry_sync(void)
+{
+ /* bpf_mprog_entry got a/b swapped, therefore ensure that
+ * there are no inflight users on the old one anymore.
+ */
+ synchronize_rcu();
+}
+
+static inline void
+tcx_entry_update(struct net_device *dev, struct bpf_mprog_entry *entry,
+ bool ingress)
+{
+ ASSERT_RTNL();
+ if (ingress)
+ rcu_assign_pointer(dev->tcx_ingress, entry);
+ else
+ rcu_assign_pointer(dev->tcx_egress, entry);
+}
+
+static inline struct bpf_mprog_entry *
+tcx_entry_fetch(struct net_device *dev, bool ingress)
+{
+ ASSERT_RTNL();
+ if (ingress)
+ return rcu_dereference_rtnl(dev->tcx_ingress);
+ else
+ return rcu_dereference_rtnl(dev->tcx_egress);
+}
+
+static inline struct bpf_mprog_entry *tcx_entry_create(void)
+{
+ struct tcx_entry *tcx = kzalloc(sizeof(*tcx), GFP_KERNEL);
+
+ if (tcx) {
+ bpf_mprog_bundle_init(&tcx->bundle);
+ return &tcx->bundle.a;
+ }
+ return NULL;
+}
+
+static inline void tcx_entry_free(struct bpf_mprog_entry *entry)
+{
+ kfree_rcu(tcx_entry(entry), rcu);
+}
+
+static inline struct bpf_mprog_entry *
+tcx_entry_fetch_or_create(struct net_device *dev, bool ingress, bool *created)
+{
+ struct bpf_mprog_entry *entry = tcx_entry_fetch(dev, ingress);
+
+ *created = false;
+ if (!entry) {
+ entry = tcx_entry_create();
+ if (!entry)
+ return NULL;
+ *created = true;
+ }
+ return entry;
+}
+
+static inline void tcx_skeys_inc(bool ingress)
+{
+ tcx_inc();
+ if (ingress)
+ net_inc_ingress_queue();
+ else
+ net_inc_egress_queue();
+}
+
+static inline void tcx_skeys_dec(bool ingress)
+{
+ if (ingress)
+ net_dec_ingress_queue();
+ else
+ net_dec_egress_queue();
+ tcx_dec();
+}
+
+static inline void tcx_miniq_set_active(struct bpf_mprog_entry *entry,
+ const bool active)
+{
+ ASSERT_RTNL();
+ tcx_entry(entry)->miniq_active = active;
+}
+
+static inline bool tcx_entry_is_active(struct bpf_mprog_entry *entry)
+{
+ ASSERT_RTNL();
+ return bpf_mprog_total(entry) || tcx_entry(entry)->miniq_active;
+}
+
+static inline enum tcx_action_base tcx_action_code(struct sk_buff *skb,
+ int code)
+{
+ switch (code) {
+ case TCX_PASS:
+ skb->tc_index = qdisc_skb_cb(skb)->tc_classid;
+ fallthrough;
+ case TCX_DROP:
+ case TCX_REDIRECT:
+ return code;
+ case TCX_NEXT:
+ default:
+ return TCX_NEXT;
+ }
+}
+#endif /* CONFIG_NET_XGRESS */
+
+#if defined(CONFIG_NET_XGRESS) && defined(CONFIG_BPF_SYSCALL)
+int tcx_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+int tcx_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+int tcx_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog);
+void tcx_uninstall(struct net_device *dev, bool ingress);
+
+int tcx_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr);
+
+static inline void dev_tcx_uninstall(struct net_device *dev)
+{
+ ASSERT_RTNL();
+ tcx_uninstall(dev, true);
+ tcx_uninstall(dev, false);
+}
+#else
+static inline int tcx_prog_attach(const union bpf_attr *attr,
+ struct bpf_prog *prog)
+{
+ return -EINVAL;
+}
+
+static inline int tcx_link_attach(const union bpf_attr *attr,
+ struct bpf_prog *prog)
+{
+ return -EINVAL;
+}
+
+static inline int tcx_prog_detach(const union bpf_attr *attr,
+ struct bpf_prog *prog)
+{
+ return -EINVAL;
+}
+
+static inline int tcx_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ return -EINVAL;
+}
+
+static inline void dev_tcx_uninstall(struct net_device *dev)
+{
+}
+#endif /* CONFIG_NET_XGRESS && CONFIG_BPF_SYSCALL */
+#endif /* __NET_TCX_H */
diff --git a/include/net/tls.h b/include/net/tls.h
index 154949c7b0c8..a2b44578dcb7 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -51,16 +51,6 @@
struct tls_rec;
-struct tls_cipher_size_desc {
- unsigned int iv;
- unsigned int key;
- unsigned int salt;
- unsigned int tag;
- unsigned int rec_seq;
-};
-
-extern const struct tls_cipher_size_desc tls_cipher_size_desc[];
-
/* Maximum data size carried in a TLS record */
#define TLS_MAX_PAYLOAD_SIZE ((size_t)1 << 14)
@@ -69,8 +59,6 @@ extern const struct tls_cipher_size_desc tls_cipher_size_desc[];
#define TLS_CRYPTO_INFO_READY(info) ((info)->cipher_type)
-#define TLS_RECORD_TYPE_DATA 0x17
-
#define TLS_AAD_SPACE_SIZE 13
#define MAX_IV_SIZE 16
@@ -124,6 +112,7 @@ struct tls_strparser {
u32 mark : 8;
u32 stopped : 1;
u32 copy_mode : 1;
+ u32 mixed_decrypted : 1;
u32 msg_ready : 1;
struct strp_msg stm;
@@ -256,7 +245,7 @@ struct tls_context {
struct scatterlist *partially_sent_record;
u16 partially_sent_offset;
- bool in_tcp_sendpages;
+ bool splicing_pages;
bool pending_open_record_frags;
struct mutex tx_lock; /* protects partially_sent_* fields and
@@ -367,10 +356,12 @@ struct sk_buff *
tls_validate_xmit_skb_sw(struct sock *sk, struct net_device *dev,
struct sk_buff *skb);
-static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
+static inline bool tls_is_skb_tx_device_offloaded(const struct sk_buff *skb)
{
-#ifdef CONFIG_SOCK_VALIDATE_XMIT
- return sk_fullsock(sk) &&
+#ifdef CONFIG_TLS_DEVICE
+ struct sock *sk = skb->sk;
+
+ return sk && sk_fullsock(sk) &&
(smp_load_acquire(&sk->sk_validate_xmit_skb) ==
&tls_validate_xmit_skb);
#else
diff --git a/include/net/tls_prot.h b/include/net/tls_prot.h
new file mode 100644
index 000000000000..68a40756440b
--- /dev/null
+++ b/include/net/tls_prot.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright (c) 2023, Oracle and/or its affiliates.
+ *
+ * TLS Protocol definitions
+ *
+ * From https://www.iana.org/assignments/tls-parameters/tls-parameters.xhtml
+ */
+
+#ifndef _TLS_PROT_H
+#define _TLS_PROT_H
+
+/*
+ * TLS Record protocol: ContentType
+ */
+enum {
+ TLS_RECORD_TYPE_CHANGE_CIPHER_SPEC = 20,
+ TLS_RECORD_TYPE_ALERT = 21,
+ TLS_RECORD_TYPE_HANDSHAKE = 22,
+ TLS_RECORD_TYPE_DATA = 23,
+ TLS_RECORD_TYPE_HEARTBEAT = 24,
+ TLS_RECORD_TYPE_TLS12_CID = 25,
+ TLS_RECORD_TYPE_ACK = 26,
+};
+
+/*
+ * TLS Alert protocol: AlertLevel
+ */
+enum {
+ TLS_ALERT_LEVEL_WARNING = 1,
+ TLS_ALERT_LEVEL_FATAL = 2,
+};
+
+/*
+ * TLS Alert protocol: AlertDescription
+ */
+enum {
+ TLS_ALERT_DESC_CLOSE_NOTIFY = 0,
+ TLS_ALERT_DESC_UNEXPECTED_MESSAGE = 10,
+ TLS_ALERT_DESC_BAD_RECORD_MAC = 20,
+ TLS_ALERT_DESC_RECORD_OVERFLOW = 22,
+ TLS_ALERT_DESC_HANDSHAKE_FAILURE = 40,
+ TLS_ALERT_DESC_BAD_CERTIFICATE = 42,
+ TLS_ALERT_DESC_UNSUPPORTED_CERTIFICATE = 43,
+ TLS_ALERT_DESC_CERTIFICATE_REVOKED = 44,
+ TLS_ALERT_DESC_CERTIFICATE_EXPIRED = 45,
+ TLS_ALERT_DESC_CERTIFICATE_UNKNOWN = 46,
+ TLS_ALERT_DESC_ILLEGAL_PARAMETER = 47,
+ TLS_ALERT_DESC_UNKNOWN_CA = 48,
+ TLS_ALERT_DESC_ACCESS_DENIED = 49,
+ TLS_ALERT_DESC_DECODE_ERROR = 50,
+ TLS_ALERT_DESC_DECRYPT_ERROR = 51,
+ TLS_ALERT_DESC_TOO_MANY_CIDS_REQUESTED = 52,
+ TLS_ALERT_DESC_PROTOCOL_VERSION = 70,
+ TLS_ALERT_DESC_INSUFFICIENT_SECURITY = 71,
+ TLS_ALERT_DESC_INTERNAL_ERROR = 80,
+ TLS_ALERT_DESC_INAPPROPRIATE_FALLBACK = 86,
+ TLS_ALERT_DESC_USER_CANCELED = 90,
+ TLS_ALERT_DESC_MISSING_EXTENSION = 109,
+ TLS_ALERT_DESC_UNSUPPORTED_EXTENSION = 110,
+ TLS_ALERT_DESC_UNRECOGNIZED_NAME = 112,
+ TLS_ALERT_DESC_BAD_CERTIFICATE_STATUS_RESPONSE = 113,
+ TLS_ALERT_DESC_UNKNOWN_PSK_IDENTITY = 115,
+ TLS_ALERT_DESC_CERTIFICATE_REQUIRED = 116,
+ TLS_ALERT_DESC_NO_APPLICATION_PROTOCOL = 120,
+};
+
+#endif /* _TLS_PROT_H */
diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index d27b1caf3753..1a97e3f32029 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -33,8 +33,6 @@ void udplitev6_exit(void);
int tcpv6_init(void);
void tcpv6_exit(void);
-int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
-
/* this does all the common and the specific ctl work */
void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
struct sk_buff *skb);
diff --git a/include/net/udp.h b/include/net/udp.h
index de4b528522bb..488a6d2babcc 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -21,6 +21,7 @@
#include <linux/list.h>
#include <linux/bug.h>
#include <net/inet_sock.h>
+#include <net/gso.h>
#include <net/sock.h>
#include <net/snmp.h>
#include <net/ip.h>
@@ -272,18 +273,16 @@ static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
int udp_v4_early_demux(struct sk_buff *skb);
bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
-int udp_get_port(struct sock *sk, unsigned short snum,
- int (*saddr_cmp)(const struct sock *,
- const struct sock *));
int udp_err(struct sk_buff *, u32);
int udp_abort(struct sock *sk, int err);
int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
+void udp_splice_eof(struct socket *sock);
int udp_push_pending_frames(struct sock *sk);
void udp_flush_pending_frames(struct sock *sk);
int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size);
void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
int udp_rcv(struct sk_buff *skb);
-int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+int udp_ioctl(struct sock *sk, int cmd, int *karg);
int udp_init_sock(struct sock *sk);
int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
int __udp_disconnect(struct sock *sk, int flags);
@@ -437,7 +436,6 @@ struct udp_seq_afinfo {
struct udp_iter_state {
struct seq_net_private p;
int bucket;
- struct udp_seq_afinfo *bpf_seq_afinfo;
};
void *udp_seq_start(struct seq_file *seq, loff_t *pos);
@@ -528,7 +526,6 @@ static inline void udp_post_segment_fix_csum(struct sk_buff *skb)
#ifdef CONFIG_BPF_SYSCALL
struct sk_psock;
-struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
#endif
diff --git a/include/net/udplite.h b/include/net/udplite.h
index 299c14ce2bb9..bd33ff2b8f42 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -81,6 +81,4 @@ static inline __wsum udplite_csum(struct sk_buff *skb)
}
void udplite4_register(void);
-int udplite_get_port(struct sock *sk, unsigned short snum,
- int (*scmp)(const struct sock *, const struct sock *));
#endif /* _UDPLITE_H */
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index bca5b01af247..6a9f8a5f387c 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -3,6 +3,7 @@
#define __NET_VXLAN_H 1
#include <linux/if_vlan.h>
+#include <linux/rhashtable-types.h>
#include <net/udp_tunnel.h>
#include <net/dst_metadata.h>
#include <net/rtnetlink.h>
@@ -302,6 +303,10 @@ struct vxlan_dev {
struct vxlan_vni_group __rcu *vnigrp;
struct hlist_head fdb_head[FDB_HASH_SIZE];
+
+ struct rhashtable mdb_tbl;
+ struct hlist_head mdb_list;
+ unsigned int mdb_seq;
};
#define VXLAN_F_LEARN 0x01
@@ -322,6 +327,8 @@ struct vxlan_dev {
#define VXLAN_F_IPV6_LINKLOCAL 0x8000
#define VXLAN_F_TTL_INHERIT 0x10000
#define VXLAN_F_VNIFILTER 0x20000
+#define VXLAN_F_MDB 0x40000
+#define VXLAN_F_LOCALBYPASS 0x80000
/* Flags that are used in the receive path. These flags must match in
* order for a socket to be shareable
@@ -342,7 +349,8 @@ struct vxlan_dev {
VXLAN_F_UDP_ZERO_CSUM6_TX | \
VXLAN_F_UDP_ZERO_CSUM6_RX | \
VXLAN_F_COLLECT_METADATA | \
- VXLAN_F_VNIFILTER)
+ VXLAN_F_VNIFILTER | \
+ VXLAN_F_LOCALBYPASS)
struct net_device *vxlan_dev_create(struct net *net, const char *name,
u8 name_assign_type, struct vxlan_config *conf);
@@ -378,10 +386,15 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
return features;
}
-/* IP header + UDP + VXLAN + Ethernet header */
-#define VXLAN_HEADROOM (20 + 8 + 8 + 14)
-/* IPv6 header + UDP + VXLAN + Ethernet header */
-#define VXLAN6_HEADROOM (40 + 8 + 8 + 14)
+static inline int vxlan_headroom(u32 flags)
+{
+ /* VXLAN: IP4/6 header + UDP + VXLAN + Ethernet header */
+ /* VXLAN-GPE: IP4/6 header + UDP + VXLAN */
+ return (flags & VXLAN_F_IPV6 ? sizeof(struct ipv6hdr) :
+ sizeof(struct iphdr)) +
+ sizeof(struct udphdr) + sizeof(struct vxlanhdr) +
+ (flags & VXLAN_F_GPE ? 0 : ETH_HLEN);
+}
static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb)
{
@@ -543,12 +556,12 @@ static inline void vxlan_flag_attr_error(int attrtype,
}
static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh,
- int hash,
+ u32 hash,
struct vxlan_rdst *rdst)
{
struct fib_nh_common *nhc;
- nhc = nexthop_path_fdb_result(nh, hash);
+ nhc = nexthop_path_fdb_result(nh, hash >> 1);
if (unlikely(!nhc))
return false;
@@ -566,4 +579,23 @@ static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh,
return true;
}
+static inline void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, const struct vxlan_metadata *md)
+{
+ struct vxlanhdr_gbp *gbp;
+
+ if (!md->gbp)
+ return;
+
+ gbp = (struct vxlanhdr_gbp *)vxh;
+ vxh->vx_flags |= VXLAN_HF_GBP;
+
+ if (md->gbp & VXLAN_GBP_DONT_LEARN)
+ gbp->dont_learn = 1;
+
+ if (md->gbp & VXLAN_GBP_POLICY_APPLIED)
+ gbp->policy_applied = 1;
+
+ gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
+}
+
#endif
diff --git a/include/net/x25.h b/include/net/x25.h
index d7d6c2b4ffa7..597eb53c471e 100644
--- a/include/net/x25.h
+++ b/include/net/x25.h
@@ -177,10 +177,7 @@ struct x25_forward {
atomic_t refcnt;
};
-static inline struct x25_sock *x25_sk(const struct sock *sk)
-{
- return (struct x25_sock *)sk;
-}
+#define x25_sk(ptr) container_of_const(ptr, struct x25_sock, sk)
/* af_x25.c */
extern int sysctl_x25_restart_request_timeout;
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 76aa748e7923..de08c8e0d134 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -6,9 +6,10 @@
#ifndef __LINUX_NET_XDP_H__
#define __LINUX_NET_XDP_H__
-#include <linux/skbuff.h> /* skb_shared_info */
-#include <uapi/linux/netdev.h>
#include <linux/bitfield.h>
+#include <linux/filter.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h> /* skb_shared_info */
/**
* DOC: XDP RX-queue information
@@ -45,8 +46,6 @@ enum xdp_mem_type {
MEM_TYPE_MAX,
};
-typedef u32 xdp_features_t;
-
/* XDP flags for ndo_xdp_xmit */
#define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */
#define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH
@@ -318,35 +317,6 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq);
void xdp_return_frame_bulk(struct xdp_frame *xdpf,
struct xdp_frame_bulk *bq);
-/* When sending xdp_frame into the network stack, then there is no
- * return point callback, which is needed to release e.g. DMA-mapping
- * resources with page_pool. Thus, have explicit function to release
- * frame resources.
- */
-void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
-static inline void xdp_release_frame(struct xdp_frame *xdpf)
-{
- struct xdp_mem_info *mem = &xdpf->mem;
- struct skb_shared_info *sinfo;
- int i;
-
- /* Curr only page_pool needs this */
- if (mem->type != MEM_TYPE_PAGE_POOL)
- return;
-
- if (likely(!xdp_frame_has_frags(xdpf)))
- goto out;
-
- sinfo = xdp_get_shared_info_from_frame(xdpf);
- for (i = 0; i < sinfo->nr_frags; i++) {
- struct page *page = skb_frag_page(&sinfo->frags[i]);
-
- __xdp_release_frame(page_address(page), mem);
- }
-out:
- __xdp_release_frame(xdpf->data, mem);
-}
-
static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf)
{
struct skb_shared_info *sinfo;
@@ -472,6 +442,12 @@ enum xdp_rss_hash_type {
XDP_RSS_TYPE_L4_IPV6_SCTP_EX = XDP_RSS_TYPE_L4_IPV6_SCTP | XDP_RSS_L3_DYNHDR,
};
+struct xdp_metadata_ops {
+ int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
+ int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash,
+ enum xdp_rss_hash_type *rss_type);
+};
+
#ifdef CONFIG_NET
u32 bpf_xdp_metadata_kfunc_id(int id);
bool bpf_dev_bound_kfunc_id(u32 btf_id);
@@ -503,4 +479,20 @@ static inline void xdp_clear_features_flag(struct net_device *dev)
xdp_set_features_flag(dev, 0);
}
+static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
+ struct xdp_buff *xdp)
+{
+ /* Driver XDP hooks are invoked within a single NAPI poll cycle and thus
+ * under local_bh_disable(), which provides the needed RCU protection
+ * for accessing map entries.
+ */
+ u32 act = __bpf_prog_run(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
+
+ if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
+ if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev))
+ act = xdp_master_redirect(xdp);
+ }
+
+ return act;
+}
#endif /* __LINUX_NET_XDP_H__ */
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 3057e1a4a11c..1617af380162 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -38,6 +38,7 @@ struct xdp_umem {
struct xsk_map {
struct bpf_map map;
spinlock_t lock; /* Synchronize map updates */
+ atomic_t count;
struct xdp_sock __rcu *xsk_map[];
};
@@ -51,6 +52,7 @@ struct xdp_sock {
struct xsk_buff_pool *pool;
u16 queue_id;
bool zc;
+ bool sg;
enum {
XSK_READY = 0,
XSK_BOUND,
@@ -66,6 +68,12 @@ struct xdp_sock {
u64 rx_dropped;
u64 rx_queue_full;
+ /* When __xsk_generic_xmit() must return before it sees the EOP descriptor for the current
+ * packet, the partially built skb is saved here so that packet building can resume in next
+ * call of __xsk_generic_xmit().
+ */
+ struct sk_buff *skb;
+
struct list_head map_list;
/* Protects map_list */
spinlock_t map_list_lock;
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 9c0d860609ba..1f6fc8c7a84c 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -89,6 +89,11 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
return xp_alloc(pool);
}
+static inline bool xsk_is_eop_desc(struct xdp_desc *desc)
+{
+ return !xp_mb_desc(desc);
+}
+
/* Returns as many entries as possible up to max. 0 <= N <= max. */
static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
{
@@ -103,10 +108,45 @@ static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count)
static inline void xsk_buff_free(struct xdp_buff *xdp)
{
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+ struct list_head *xskb_list = &xskb->pool->xskb_list;
+ struct xdp_buff_xsk *pos, *tmp;
+ if (likely(!xdp_buff_has_frags(xdp)))
+ goto out;
+
+ list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
+ list_del(&pos->xskb_list_node);
+ xp_free(pos);
+ }
+
+ xdp_get_shared_info_from_buff(xdp)->nr_frags = 0;
+out:
xp_free(xskb);
}
+static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
+{
+ struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp);
+
+ list_add_tail(&frag->xskb_list_node, &frag->pool->xskb_list);
+}
+
+static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
+{
+ struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
+ struct xdp_buff *ret = NULL;
+ struct xdp_buff_xsk *frag;
+
+ frag = list_first_entry_or_null(&xskb->pool->xskb_list,
+ struct xdp_buff_xsk, xskb_list_node);
+ if (frag) {
+ list_del(&frag->xskb_list_node);
+ ret = &frag->xdp;
+ }
+
+ return ret;
+}
+
static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
{
xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM;
@@ -241,6 +281,11 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
return NULL;
}
+static inline bool xsk_is_eop_desc(struct xdp_desc *desc)
+{
+ return false;
+}
+
static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
{
return 0;
@@ -255,10 +300,15 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
{
}
-static inline void xsk_buff_discard(struct xdp_buff *xdp)
+static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
{
}
+static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
+{
+ return NULL;
+}
+
static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
{
}
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 3e1f70e8e424..363c7d510554 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -138,6 +138,10 @@ enum {
XFRM_DEV_OFFLOAD_PACKET,
};
+enum {
+ XFRM_DEV_OFFLOAD_FLAG_ACQ = 1,
+};
+
struct xfrm_dev_offload {
struct net_device *dev;
netdevice_tracker dev_tracker;
@@ -145,6 +149,7 @@ struct xfrm_dev_offload {
unsigned long offload_handle;
u8 dir : 2;
u8 type : 2;
+ u8 flags : 2;
};
struct xfrm_mode {
@@ -1049,6 +1054,7 @@ struct xfrm_offload {
struct sec_path {
int len;
int olen;
+ int verified_cnt;
struct xfrm_state *xvec[XFRM_MAX_DEPTH];
struct xfrm_offload ovec[XFRM_MAX_OFFLOAD_DEPTH];
@@ -1978,6 +1984,7 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
if (dev->xfrmdev_ops->xdo_dev_state_free)
dev->xfrmdev_ops->xdo_dev_state_free(x);
xso->dev = NULL;
+ xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
netdev_put(dev, &xso->dev_tracker);
}
}
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 3e952e569418..b0bdff26fc88 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -29,6 +29,7 @@ struct xdp_buff_xsk {
struct xsk_buff_pool *pool;
u64 orig_addr;
struct list_head free_list_node;
+ struct list_head xskb_list_node;
};
#define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb))
@@ -54,6 +55,7 @@ struct xsk_buff_pool {
struct xdp_umem *umem;
struct work_struct work;
struct list_head free_list;
+ struct list_head xskb_list;
u32 heads_cnt;
u16 queue_id;
@@ -180,13 +182,13 @@ static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool,
if (likely(!cross_pg))
return false;
- if (pool->dma_pages_cnt) {
- return !(pool->dma_pages[addr >> PAGE_SHIFT] &
- XSK_NEXT_PG_CONTIG_MASK);
- }
+ return pool->dma_pages &&
+ !(pool->dma_pages[addr >> PAGE_SHIFT] & XSK_NEXT_PG_CONTIG_MASK);
+}
- /* skb path */
- return addr + len > pool->addrs_cnt;
+static inline bool xp_mb_desc(struct xdp_desc *desc)
+{
+ return desc->options & XDP_PKT_CONTD;
}
static inline u64 xp_aligned_extract_addr(struct xsk_buff_pool *pool, u64 addr)