From 2fa3515cc0d3cc8413465e55b85e55aeab090812 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Wed, 2 Dec 2020 13:28:10 -0800
Subject: bpf: Remove trailing semicolon in macro definition

The macro use will already have a semicolon. Clean up escaped newlines.

Signed-off-by: Tom Rix <trix@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201202212810.3774614-1-trix@redhat.com
---
 include/trace/events/xdp.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index cd24e8a59529..76a97176ab81 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -145,17 +145,17 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err,
 	TP_ARGS(dev, xdp, tgt, err, map, index)
 );
 
-#define _trace_xdp_redirect(dev, xdp, to)		\
-	 trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to);
+#define _trace_xdp_redirect(dev, xdp, to)				\
+	 trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to)
 
-#define _trace_xdp_redirect_err(dev, xdp, to, err)	\
-	 trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to);
+#define _trace_xdp_redirect_err(dev, xdp, to, err)			\
+	 trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to)
 
 #define _trace_xdp_redirect_map(dev, xdp, to, map, index)		\
-	 trace_xdp_redirect(dev, xdp, to, 0, map, index);
+	 trace_xdp_redirect(dev, xdp, to, 0, map, index)
 
 #define _trace_xdp_redirect_map_err(dev, xdp, to, map, index, err)	\
-	 trace_xdp_redirect_err(dev, xdp, to, err, map, index);
+	 trace_xdp_redirect_err(dev, xdp, to, err, map, index)
 
 /* not used anymore, but kept around so as not to break old programs */
 DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map,
-- 
cgit v1.2.3


From 7d17167244f5415bc6bc90f5bb0074b6d79676b4 Mon Sep 17 00:00:00 2001
From: Florian Lehner <dev@der-flo.net>
Date: Fri, 4 Dec 2020 19:18:27 +0100
Subject: selftests/bpf: Print reason when a tester could not run a program

Commit 8184d44c9a57 ("selftests/bpf: skip verifier tests for unsupported
program types") added a check to skip unsupported program types. As
bpf_probe_prog_type can change errno, do_single_test should save it before
printing a reason why a supported BPF program type failed to load.

Fixes: 8184d44c9a57 ("selftests/bpf: skip verifier tests for unsupported program types")
Signed-off-by: Florian Lehner <dev@der-flo.net>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201204181828.11974-2-dev@der-flo.net
---
 tools/testing/selftests/bpf/test_verifier.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 4bfe3aa2cfc4..ceea9409639e 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -936,6 +936,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	int run_errs, run_successes;
 	int map_fds[MAX_NR_MAPS];
 	const char *expected_err;
+	int saved_errno;
 	int fixup_skips;
 	__u32 pflags;
 	int i, err;
@@ -997,6 +998,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	}
 
 	fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
+	saved_errno = errno;
 
 	/* BPF_PROG_TYPE_TRACING requires more setup and
 	 * bpf_probe_prog_type won't give correct answer
@@ -1013,7 +1015,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	if (expected_ret == ACCEPT || expected_ret == VERBOSE_ACCEPT) {
 		if (fd_prog < 0) {
 			printf("FAIL\nFailed to load prog '%s'!\n",
-			       strerror(errno));
+			       strerror(saved_errno));
 			goto fail_log;
 		}
 #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-- 
cgit v1.2.3


From 5f61b7c6975b03e6ace2cfb13d415d5f475c8830 Mon Sep 17 00:00:00 2001
From: Florian Lehner <dev@der-flo.net>
Date: Fri, 4 Dec 2020 19:18:28 +0100
Subject: selftests/bpf: Avoid errno clobbering

Print a message when the returned error is about a program type being
not supported or because of permission problems.
These messages are expected if the program to test was actually
executed.

Signed-off-by: Florian Lehner <dev@der-flo.net>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201204181828.11974-3-dev@der-flo.net
---
 tools/testing/selftests/bpf/test_verifier.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index ceea9409639e..777a81404fdb 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -875,19 +875,36 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
 	__u8 tmp[TEST_DATA_LEN << 2];
 	__u32 size_tmp = sizeof(tmp);
 	uint32_t retval;
-	int err;
+	int err, saved_errno;
 
 	if (unpriv)
 		set_admin(true);
 	err = bpf_prog_test_run(fd_prog, 1, data, size_data,
 				tmp, &size_tmp, &retval, NULL);
+	saved_errno = errno;
+
 	if (unpriv)
 		set_admin(false);
-	if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) {
-		printf("Unexpected bpf_prog_test_run error ");
-		return err;
+
+	if (err) {
+		switch (saved_errno) {
+		case 524/*ENOTSUPP*/:
+			printf("Did not run the program (not supported) ");
+			return 0;
+		case EPERM:
+			if (unpriv) {
+				printf("Did not run the program (no permission) ");
+				return 0;
+			}
+			/* fallthrough; */
+		default:
+			printf("FAIL: Unexpected bpf_prog_test_run error (%s) ",
+				strerror(saved_errno));
+			return err;
+		}
 	}
-	if (!err && retval != expected_val &&
+
+	if (retval != expected_val &&
 	    expected_val != POINTER_VALUE) {
 		printf("FAIL retval %d != %d ", retval, expected_val);
 		return 1;
-- 
cgit v1.2.3


From dba4a9256bb4d78ef89aaad5f49787aa27fcd5b4 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Fri, 4 Dec 2020 12:36:04 +0100
Subject: net: Remove the err argument from sock_from_file

Currently, the sock_from_file prototype takes an "err" pointer that is
either not set or set to -ENOTSOCK IFF the returned socket is NULL. This
makes the error redundant and it is ignored by a few callers.

This patch simplifies the API by letting callers deduce the error based
on whether the returned socket is NULL or not.

Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Florent Revest <revest@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: KP Singh <kpsingh@google.com>
Link: https://lore.kernel.org/bpf/20201204113609.1850150-1-revest@google.com
---
 fs/eventpoll.c               |  3 +--
 fs/io_uring.c                | 16 ++++++++--------
 include/linux/net.h          |  2 +-
 net/core/netclassid_cgroup.c |  3 +--
 net/core/netprio_cgroup.c    |  3 +--
 net/core/sock.c              |  8 +-------
 net/socket.c                 | 27 ++++++++++++++++-----------
 7 files changed, 29 insertions(+), 33 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 73c346e503d7..19499b7bb82c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -416,12 +416,11 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
 	unsigned int napi_id;
 	struct socket *sock;
 	struct sock *sk;
-	int err;
 
 	if (!net_busy_loop_on())
 		return;
 
-	sock = sock_from_file(epi->ffd.file, &err);
+	sock = sock_from_file(epi->ffd.file);
 	if (!sock)
 		return;
 
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1023f7b44cea..7942a42be4c3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4356,9 +4356,9 @@ static int io_sendmsg(struct io_kiocb *req, bool force_nonblock,
 	unsigned flags;
 	int ret;
 
-	sock = sock_from_file(req->file, &ret);
+	sock = sock_from_file(req->file);
 	if (unlikely(!sock))
-		return ret;
+		return -ENOTSOCK;
 
 	if (req->async_data) {
 		kmsg = req->async_data;
@@ -4405,9 +4405,9 @@ static int io_send(struct io_kiocb *req, bool force_nonblock,
 	unsigned flags;
 	int ret;
 
-	sock = sock_from_file(req->file, &ret);
+	sock = sock_from_file(req->file);
 	if (unlikely(!sock))
-		return ret;
+		return -ENOTSOCK;
 
 	ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
 	if (unlikely(ret))
@@ -4584,9 +4584,9 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock,
 	unsigned flags;
 	int ret, cflags = 0;
 
-	sock = sock_from_file(req->file, &ret);
+	sock = sock_from_file(req->file);
 	if (unlikely(!sock))
-		return ret;
+		return -ENOTSOCK;
 
 	if (req->async_data) {
 		kmsg = req->async_data;
@@ -4647,9 +4647,9 @@ static int io_recv(struct io_kiocb *req, bool force_nonblock,
 	unsigned flags;
 	int ret, cflags = 0;
 
-	sock = sock_from_file(req->file, &ret);
+	sock = sock_from_file(req->file);
 	if (unlikely(!sock))
-		return ret;
+		return -ENOTSOCK;
 
 	if (req->flags & REQ_F_BUFFER_SELECT) {
 		kbuf = io_recv_buffer_select(req, !force_nonblock);
diff --git a/include/linux/net.h b/include/linux/net.h
index 0dcd51feef02..9e2324efc26a 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -240,7 +240,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg);
 int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags);
 struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname);
 struct socket *sockfd_lookup(int fd, int *err);
-struct socket *sock_from_file(struct file *file, int *err);
+struct socket *sock_from_file(struct file *file);
 #define		     sockfd_put(sock) fput(sock->file)
 int net_ratelimit(void);
 
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 41b24cd31562..b49c57d35a88 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -68,9 +68,8 @@ struct update_classid_context {
 
 static int update_classid_sock(const void *v, struct file *file, unsigned n)
 {
-	int err;
 	struct update_classid_context *ctx = (void *)v;
-	struct socket *sock = sock_from_file(file, &err);
+	struct socket *sock = sock_from_file(file);
 
 	if (sock) {
 		spin_lock(&cgroup_sk_update_lock);
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 9bd4cab7d510..99a431c56f23 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -220,8 +220,7 @@ static ssize_t write_priomap(struct kernfs_open_file *of,
 
 static int update_netprio(const void *v, struct file *file, unsigned n)
 {
-	int err;
-	struct socket *sock = sock_from_file(file, &err);
+	struct socket *sock = sock_from_file(file);
 	if (sock) {
 		spin_lock(&cgroup_sk_update_lock);
 		sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data,
diff --git a/net/core/sock.c b/net/core/sock.c
index 4fd7e785f177..bbcd4b97eddd 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2827,14 +2827,8 @@ EXPORT_SYMBOL(sock_no_mmap);
 void __receive_sock(struct file *file)
 {
 	struct socket *sock;
-	int error;
 
-	/*
-	 * The resulting value of "error" is ignored here since we only
-	 * need to take action when the file is a socket and testing
-	 * "sock" for NULL is sufficient.
-	 */
-	sock = sock_from_file(file, &error);
+	sock = sock_from_file(file);
 	if (sock) {
 		sock_update_netprioidx(&sock->sk->sk_cgrp_data);
 		sock_update_classid(&sock->sk->sk_cgrp_data);
diff --git a/net/socket.c b/net/socket.c
index bfef11ba35b8..9a240b45bdf3 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -445,17 +445,15 @@ static int sock_map_fd(struct socket *sock, int flags)
 /**
  *	sock_from_file - Return the &socket bounded to @file.
  *	@file: file
- *	@err: pointer to an error code return
  *
- *	On failure returns %NULL and assigns -ENOTSOCK to @err.
+ *	On failure returns %NULL.
  */
 
-struct socket *sock_from_file(struct file *file, int *err)
+struct socket *sock_from_file(struct file *file)
 {
 	if (file->f_op == &socket_file_ops)
 		return file->private_data;	/* set in sock_map_fd */
 
-	*err = -ENOTSOCK;
 	return NULL;
 }
 EXPORT_SYMBOL(sock_from_file);
@@ -484,9 +482,11 @@ struct socket *sockfd_lookup(int fd, int *err)
 		return NULL;
 	}
 
-	sock = sock_from_file(file, err);
-	if (!sock)
+	sock = sock_from_file(file);
+	if (!sock) {
+		*err = -ENOTSOCK;
 		fput(file);
+	}
 	return sock;
 }
 EXPORT_SYMBOL(sockfd_lookup);
@@ -498,11 +498,12 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
 
 	*err = -EBADF;
 	if (f.file) {
-		sock = sock_from_file(f.file, err);
+		sock = sock_from_file(f.file);
 		if (likely(sock)) {
 			*fput_needed = f.flags & FDPUT_FPUT;
 			return sock;
 		}
+		*err = -ENOTSOCK;
 		fdput(f);
 	}
 	return NULL;
@@ -1693,9 +1694,11 @@ int __sys_accept4_file(struct file *file, unsigned file_flags,
 	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
 		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
 
-	sock = sock_from_file(file, &err);
-	if (!sock)
+	sock = sock_from_file(file);
+	if (!sock) {
+		err = -ENOTSOCK;
 		goto out;
+	}
 
 	err = -ENFILE;
 	newsock = sock_alloc();
@@ -1818,9 +1821,11 @@ int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
 	struct socket *sock;
 	int err;
 
-	sock = sock_from_file(file, &err);
-	if (!sock)
+	sock = sock_from_file(file);
+	if (!sock) {
+		err = -ENOTSOCK;
 		goto out;
+	}
 
 	err =
 	    security_socket_connect(sock, (struct sockaddr *)address, addrlen);
-- 
cgit v1.2.3


From 4f19cab76136e800a3f04d8c9aa4d8e770e3d3d8 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Fri, 4 Dec 2020 12:36:05 +0100
Subject: bpf: Add a bpf_sock_from_file helper

While eBPF programs can check whether a file is a socket by file->f_op
== &socket_file_ops, they cannot convert the void private_data pointer
to a struct socket BTF pointer. In order to do this a new helper
wrapping sock_from_file is added.

This is useful to tracing programs but also other program types
inheriting this set of helpers such as iterators or LSM programs.

Signed-off-by: Florent Revest <revest@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: KP Singh <kpsingh@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201204113609.1850150-2-revest@google.com
---
 include/uapi/linux/bpf.h       |  9 +++++++++
 kernel/trace/bpf_trace.c       | 20 ++++++++++++++++++++
 scripts/bpf_helpers_doc.py     |  4 ++++
 tools/include/uapi/linux/bpf.h |  9 +++++++++
 4 files changed, 42 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1233f14f659f..30b477a26482 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3822,6 +3822,14 @@ union bpf_attr {
  *		The **hash_algo** is returned on success,
  *		**-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
  *		invalid arguments are passed.
+ *
+ * struct socket *bpf_sock_from_file(struct file *file)
+ *	Description
+ *		If the given file represents a socket, returns the associated
+ *		socket.
+ *	Return
+ *		A pointer to a struct socket on success or NULL if the file is
+ *		not a socket.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3986,6 +3994,7 @@ union bpf_attr {
 	FN(bprm_opts_set),		\
 	FN(ktime_get_coarse_ns),	\
 	FN(ima_inode_hash),		\
+	FN(sock_from_file),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index cb9d7478ef0c..0cf0a6331482 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1270,6 +1270,24 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = {
 	.arg5_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_1(bpf_sock_from_file, struct file *, file)
+{
+	return (unsigned long) sock_from_file(file);
+}
+
+BTF_ID_LIST(bpf_sock_from_file_btf_ids)
+BTF_ID(struct, socket)
+BTF_ID(struct, file)
+
+static const struct bpf_func_proto bpf_sock_from_file_proto = {
+	.func		= bpf_sock_from_file,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_BTF_ID_OR_NULL,
+	.ret_btf_id	= &bpf_sock_from_file_btf_ids[0],
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg1_btf_id	= &bpf_sock_from_file_btf_ids[1],
+};
+
 const struct bpf_func_proto *
 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -1366,6 +1384,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_per_cpu_ptr_proto;
 	case BPF_FUNC_bpf_this_cpu_ptr:
 		return &bpf_this_cpu_ptr_proto;
+	case BPF_FUNC_sock_from_file:
+		return &bpf_sock_from_file_proto;
 	default:
 		return NULL;
 	}
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 8b829748d488..867ada23281c 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -437,6 +437,8 @@ class PrinterHelpers(Printer):
             'struct path',
             'struct btf_ptr',
             'struct inode',
+            'struct socket',
+            'struct file',
     ]
     known_types = {
             '...',
@@ -482,6 +484,8 @@ class PrinterHelpers(Printer):
             'struct path',
             'struct btf_ptr',
             'struct inode',
+            'struct socket',
+            'struct file',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1233f14f659f..30b477a26482 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3822,6 +3822,14 @@ union bpf_attr {
  *		The **hash_algo** is returned on success,
  *		**-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
  *		invalid arguments are passed.
+ *
+ * struct socket *bpf_sock_from_file(struct file *file)
+ *	Description
+ *		If the given file represents a socket, returns the associated
+ *		socket.
+ *	Return
+ *		A pointer to a struct socket on success or NULL if the file is
+ *		not a socket.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3986,6 +3994,7 @@ union bpf_attr {
 	FN(bprm_opts_set),		\
 	FN(ktime_get_coarse_ns),	\
 	FN(ima_inode_hash),		\
+	FN(sock_from_file),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From a50a85e40c59cf27ca3b324a5aa4c7f35314f251 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Fri, 4 Dec 2020 12:36:06 +0100
Subject: bpf: Expose bpf_sk_storage_* to iterator programs

Iterators are currently used to expose kernel information to userspace
over fast procfs-like files but iterators could also be used to
manipulate local storage. For example, the task_file iterator could be
used to initialize a socket local storage with associations between
processes and sockets or to selectively delete local storage values.

Signed-off-by: Florent Revest <revest@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: KP Singh <kpsingh@google.com>
Link: https://lore.kernel.org/bpf/20201204113609.1850150-3-revest@google.com
---
 net/core/bpf_sk_storage.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index a32037daa933..4edd033e899c 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -394,6 +394,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
 	 * use the bpf_sk_storage_(get|delete) helper.
 	 */
 	switch (prog->expected_attach_type) {
+	case BPF_TRACE_ITER:
 	case BPF_TRACE_RAW_TP:
 		/* bpf_sk_storage has no trace point */
 		return true;
-- 
cgit v1.2.3


From 593f6d41abbbc63e1ad297f7a36ab6060a812f0c Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Fri, 4 Dec 2020 12:36:07 +0100
Subject: selftests/bpf: Add an iterator selftest for bpf_sk_storage_delete

The eBPF program iterates over all entries (well, only one) of a socket
local storage map and deletes them all. The test makes sure that the
entry is indeed deleted.

Signed-off-by: Florent Revest <revest@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201204113609.1850150-4-revest@google.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c  | 64 ++++++++++++++++++++++
 .../bpf/progs/bpf_iter_bpf_sk_storage_helpers.c    | 23 ++++++++
 2 files changed, 87 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 448885b95eed..bb4a638f2e6f 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -20,6 +20,7 @@
 #include "bpf_iter_bpf_percpu_hash_map.skel.h"
 #include "bpf_iter_bpf_array_map.skel.h"
 #include "bpf_iter_bpf_percpu_array_map.skel.h"
+#include "bpf_iter_bpf_sk_storage_helpers.skel.h"
 #include "bpf_iter_bpf_sk_storage_map.skel.h"
 #include "bpf_iter_test_kern5.skel.h"
 #include "bpf_iter_test_kern6.skel.h"
@@ -913,6 +914,67 @@ out:
 	bpf_iter_bpf_percpu_array_map__destroy(skel);
 }
 
+/* An iterator program deletes all local storage in a map. */
+static void test_bpf_sk_storage_delete(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	struct bpf_iter_bpf_sk_storage_helpers *skel;
+	union bpf_iter_link_info linfo;
+	int err, len, map_fd, iter_fd;
+	struct bpf_link *link;
+	int sock_fd = -1;
+	__u32 val = 42;
+	char buf[64];
+
+	skel = bpf_iter_bpf_sk_storage_helpers__open_and_load();
+	if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	map_fd = bpf_map__fd(skel->maps.sk_stg_map);
+
+	sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno))
+		goto out;
+	err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
+	if (CHECK(err, "map_update", "map_update failed\n"))
+		goto out;
+
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.map.map_fd = map_fd;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+	link = bpf_program__attach_iter(skel->progs.delete_bpf_sk_storage_map,
+					&opts);
+	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+		goto out;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+		goto free_link;
+
+	/* do some tests */
+	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+		;
+	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+		goto close_iter;
+
+	/* test results */
+	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
+	if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
+		  "map value wasn't deleted (err=%d, errno=%d)\n", err, errno))
+		goto close_iter;
+
+close_iter:
+	close(iter_fd);
+free_link:
+	bpf_link__destroy(link);
+out:
+	if (sock_fd >= 0)
+		close(sock_fd);
+	bpf_iter_bpf_sk_storage_helpers__destroy(skel);
+}
+
 static void test_bpf_sk_storage_map(void)
 {
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
@@ -1067,6 +1129,8 @@ void test_bpf_iter(void)
 		test_bpf_percpu_array_map();
 	if (test__start_subtest("bpf_sk_storage_map"))
 		test_bpf_sk_storage_map();
+	if (test__start_subtest("bpf_sk_storage_delete"))
+		test_bpf_sk_storage_delete();
 	if (test__start_subtest("rdonly-buf-out-of-bound"))
 		test_rdonly_buf_out_of_bound();
 	if (test__start_subtest("buf-neg-offset"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
new file mode 100644
index 000000000000..01ff3235e413
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Google LLC. */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, int);
+} sk_stg_map SEC(".maps");
+
+SEC("iter/bpf_sk_storage_map")
+int delete_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx)
+{
+	if (ctx->sk)
+		bpf_sk_storage_delete(&sk_stg_map, ctx->sk);
+
+	return 0;
+}
-- 
cgit v1.2.3


From bd9b327e58f98aa7126291bf12b50720c660e787 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Fri, 4 Dec 2020 12:36:08 +0100
Subject: selftests/bpf: Add an iterator selftest for bpf_sk_storage_get

The eBPF program iterates over all files and tasks. For all socket
files, it stores the tgid of the last task it encountered with a handle
to that socket. This is a heuristic for finding the "owner" of a socket
similar to what's done by lsof, ss, netstat or fuser. Potentially, this
information could be used from a cgroup_skb/*gress hook to try to
associate network traffic with processes.

The test makes sure that a socket it created is tagged with prog_tests's
pid.

Signed-off-by: Florent Revest <revest@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201204113609.1850150-5-revest@google.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c  | 40 ++++++++++++++++++++++
 .../bpf/progs/bpf_iter_bpf_sk_storage_helpers.c    | 24 +++++++++++++
 2 files changed, 64 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index bb4a638f2e6f..9336d0f18331 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -975,6 +975,44 @@ out:
 	bpf_iter_bpf_sk_storage_helpers__destroy(skel);
 }
 
+/* This creates a socket and its local storage. It then runs a task_iter BPF
+ * program that replaces the existing socket local storage with the tgid of the
+ * only task owning a file descriptor to this socket, this process, prog_tests.
+ */
+static void test_bpf_sk_storage_get(void)
+{
+	struct bpf_iter_bpf_sk_storage_helpers *skel;
+	int err, map_fd, val = -1;
+	int sock_fd = -1;
+
+	skel = bpf_iter_bpf_sk_storage_helpers__open_and_load();
+	if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno))
+		goto out;
+
+	map_fd = bpf_map__fd(skel->maps.sk_stg_map);
+
+	err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
+	if (CHECK(err, "bpf_map_update_elem", "map_update_failed\n"))
+		goto close_socket;
+
+	do_dummy_read(skel->progs.fill_socket_owner);
+
+	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
+	CHECK(err || val != getpid(), "bpf_map_lookup_elem",
+	      "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
+	      getpid(), val, err);
+
+close_socket:
+	close(sock_fd);
+out:
+	bpf_iter_bpf_sk_storage_helpers__destroy(skel);
+}
+
 static void test_bpf_sk_storage_map(void)
 {
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
@@ -1131,6 +1169,8 @@ void test_bpf_iter(void)
 		test_bpf_sk_storage_map();
 	if (test__start_subtest("bpf_sk_storage_delete"))
 		test_bpf_sk_storage_delete();
+	if (test__start_subtest("bpf_sk_storage_get"))
+		test_bpf_sk_storage_get();
 	if (test__start_subtest("rdonly-buf-out-of-bound"))
 		test_rdonly_buf_out_of_bound();
 	if (test__start_subtest("buf-neg-offset"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
index 01ff3235e413..dde53df37de8 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
@@ -21,3 +21,27 @@ int delete_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx)
 
 	return 0;
 }
+
+SEC("iter/task_file")
+int fill_socket_owner(struct bpf_iter__task_file *ctx)
+{
+	struct task_struct *task = ctx->task;
+	struct file *file = ctx->file;
+	struct socket *sock;
+	int *sock_tgid;
+
+	if (!task || !file)
+		return 0;
+
+	sock = bpf_sock_from_file(file);
+	if (!sock)
+		return 0;
+
+	sock_tgid = bpf_sk_storage_get(&sk_stg_map, sock->sk, 0, 0);
+	if (!sock_tgid)
+		return 0;
+
+	*sock_tgid = task->tgid;
+
+	return 0;
+}
-- 
cgit v1.2.3


From 34da87213d3ddd26643aa83deff7ffc6463da0fc Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Fri, 4 Dec 2020 12:36:09 +0100
Subject: selftests/bpf: Test bpf_sk_storage_get in tcp iterators

This extends the existing bpf_sk_storage_get test where a socket is
created and tagged with its creator's pid by a task_file iterator.

A TCP iterator is now also used at the end of the test to negate the
values already stored in the local storage. The test therefore expects
-getpid() to be stored in the local storage.

Signed-off-by: Florent Revest <revest@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201204113609.1850150-6-revest@google.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c      | 18 ++++++++++++++++--
 .../bpf/progs/bpf_iter_bpf_sk_storage_helpers.c        | 18 ++++++++++++++++++
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 9336d0f18331..0e586368948d 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -978,6 +978,8 @@ out:
 /* This creates a socket and its local storage. It then runs a task_iter BPF
  * program that replaces the existing socket local storage with the tgid of the
  * only task owning a file descriptor to this socket, this process, prog_tests.
+ * It then runs a tcp socket iterator that negates the value in the existing
+ * socket local storage, the test verifies that the resulting value is -pid.
  */
 static void test_bpf_sk_storage_get(void)
 {
@@ -994,6 +996,10 @@ static void test_bpf_sk_storage_get(void)
 	if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno))
 		goto out;
 
+	err = listen(sock_fd, 1);
+	if (CHECK(err != 0, "listen", "errno: %d\n", errno))
+		goto close_socket;
+
 	map_fd = bpf_map__fd(skel->maps.sk_stg_map);
 
 	err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
@@ -1003,9 +1009,17 @@ static void test_bpf_sk_storage_get(void)
 	do_dummy_read(skel->progs.fill_socket_owner);
 
 	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
-	CHECK(err || val != getpid(), "bpf_map_lookup_elem",
+	if (CHECK(err || val != getpid(), "bpf_map_lookup_elem",
+	    "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
+	    getpid(), val, err))
+		goto close_socket;
+
+	do_dummy_read(skel->progs.negate_socket_local_storage);
+
+	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
+	CHECK(err || val != -getpid(), "bpf_map_lookup_elem",
 	      "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
-	      getpid(), val, err);
+	      -getpid(), val, err);
 
 close_socket:
 	close(sock_fd);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
index dde53df37de8..6cecab2b32ba 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
@@ -45,3 +45,21 @@ int fill_socket_owner(struct bpf_iter__task_file *ctx)
 
 	return 0;
 }
+
+SEC("iter/tcp")
+int negate_socket_local_storage(struct bpf_iter__tcp *ctx)
+{
+	struct sock_common *sk_common = ctx->sk_common;
+	int *sock_tgid;
+
+	if (!sk_common)
+		return 0;
+
+	sock_tgid = bpf_sk_storage_get(&sk_stg_map, sk_common, 0, 0);
+	if (!sock_tgid)
+		return 0;
+
+	*sock_tgid = -*sock_tgid;
+
+	return 0;
+}
-- 
cgit v1.2.3


From e1868b9e36d0ca52e4e7c6c06953f191446e44df Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 7 Dec 2020 10:28:21 -0800
Subject: bpf: Avoid overflows involving hash elem_size

Use of bpf_map_charge_init() was making sure hash tables would not use more
than 4GB of memory.

Since the implicit check disappeared, we have to be more careful
about overflows, to support big hash tables.

syzbot triggers a panic using :

bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_LRU_HASH, key_size=16384, value_size=8,
                     max_entries=262200, map_flags=0, inner_map_fd=-1, map_name="",
                     map_ifindex=0, btf_fd=-1, btf_key_type_id=0, btf_value_type_id=0,
                     btf_vmlinux_value_type_id=0}, 64) = ...

BUG: KASAN: vmalloc-out-of-bounds in bpf_percpu_lru_populate kernel/bpf/bpf_lru_list.c:594 [inline]
BUG: KASAN: vmalloc-out-of-bounds in bpf_lru_populate+0x4ef/0x5e0 kernel/bpf/bpf_lru_list.c:611
Write of size 2 at addr ffffc90017e4a020 by task syz-executor.5/19786

CPU: 0 PID: 19786 Comm: syz-executor.5 Not tainted 5.10.0-rc3-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x107/0x163 lib/dump_stack.c:118
 print_address_description.constprop.0.cold+0x5/0x4c8 mm/kasan/report.c:385
 __kasan_report mm/kasan/report.c:545 [inline]
 kasan_report.cold+0x1f/0x37 mm/kasan/report.c:562
 bpf_percpu_lru_populate kernel/bpf/bpf_lru_list.c:594 [inline]
 bpf_lru_populate+0x4ef/0x5e0 kernel/bpf/bpf_lru_list.c:611
 prealloc_init kernel/bpf/hashtab.c:319 [inline]
 htab_map_alloc+0xf6e/0x1230 kernel/bpf/hashtab.c:507
 find_and_alloc_map kernel/bpf/syscall.c:123 [inline]
 map_create kernel/bpf/syscall.c:829 [inline]
 __do_sys_bpf+0xa81/0x5170 kernel/bpf/syscall.c:4336
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x45deb9
Code: 0d b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 db b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007fd93fbc0c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000141
RAX: ffffffffffffffda RBX: 0000000000001a40 RCX: 000000000045deb9
RDX: 0000000000000040 RSI: 0000000020000280 RDI: 0000000000000000
RBP: 000000000119bf60 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 000000000119bf2c
R13: 00007ffc08a7be8f R14: 00007fd93fbc19c0 R15: 000000000119bf2c

Fixes: 755e5d55367a ("bpf: Eliminate rlimit-based memory accounting for hashtab maps")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Roman Gushchin <guro@fb.com>
Link: https://lore.kernel.org/bpf/20201207182821.3940306-1-eric.dumazet@gmail.com
---
 kernel/bpf/hashtab.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index fe7a0733a63a..f53cca70e215 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -224,7 +224,7 @@ static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l)
 
 static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
 {
-	return (struct htab_elem *) (htab->elems + i * htab->elem_size);
+	return (struct htab_elem *) (htab->elems + i * (u64)htab->elem_size);
 }
 
 static void htab_free_elems(struct bpf_htab *htab)
@@ -280,7 +280,7 @@ static int prealloc_init(struct bpf_htab *htab)
 	if (!htab_is_percpu(htab) && !htab_is_lru(htab))
 		num_entries += num_possible_cpus();
 
-	htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries,
+	htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries,
 					 htab->map.numa_node);
 	if (!htab->elems)
 		return -ENOMEM;
-- 
cgit v1.2.3


From 2f4b03195fe8ed3b1e213f4a6cfe14cfc109d829 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Mon, 7 Dec 2020 13:37:20 +0100
Subject: bpf: Propagate __user annotations properly

__htab_map_lookup_and_delete_batch() stores a user pointer in the local
variable ubatch and uses that in copy_{from,to}_user(), but ubatch misses a
__user annotation.

So, sparse warns in the various assignments and uses of ubatch:

  kernel/bpf/hashtab.c:1415:24: warning: incorrect type in initializer
    (different address spaces)
  kernel/bpf/hashtab.c:1415:24:    expected void *ubatch
  kernel/bpf/hashtab.c:1415:24:    got void [noderef] __user *

  kernel/bpf/hashtab.c:1444:46: warning: incorrect type in argument 2
    (different address spaces)
  kernel/bpf/hashtab.c:1444:46:    expected void const [noderef] __user *from
  kernel/bpf/hashtab.c:1444:46:    got void *ubatch

  kernel/bpf/hashtab.c:1608:16: warning: incorrect type in assignment
    (different address spaces)
  kernel/bpf/hashtab.c:1608:16:    expected void *ubatch
  kernel/bpf/hashtab.c:1608:16:    got void [noderef] __user *

  kernel/bpf/hashtab.c:1609:26: warning: incorrect type in argument 1
    (different address spaces)
  kernel/bpf/hashtab.c:1609:26:    expected void [noderef] __user *to
  kernel/bpf/hashtab.c:1609:26:    got void *ubatch

Add the __user annotation to repair this chain of propagating __user
annotations in __htab_map_lookup_and_delete_batch().

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201207123720.19111-1-lukas.bulwahn@gmail.com
---
 kernel/bpf/hashtab.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index f53cca70e215..7e848200cd26 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1412,7 +1412,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
 	void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val;
 	void __user *uvalues = u64_to_user_ptr(attr->batch.values);
 	void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
-	void *ubatch = u64_to_user_ptr(attr->batch.in_batch);
+	void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
 	u32 batch, max_count, size, bucket_size;
 	struct htab_elem *node_to_free = NULL;
 	u64 elem_map_flags, map_flags;
-- 
cgit v1.2.3


From 3546b9b8eced9799b35ef953f766a0973b17fda2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn.topel@intel.com>
Date: Mon, 7 Dec 2020 09:20:08 +0100
Subject: xsk: Validate socket state in xsk_recvmsg, prior touching socket
 members
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In AF_XDP the socket state needs to be checked, prior touching the
members of the socket. This was not the case for the recvmsg
implementation. Fix that by moving the xsk_is_bound() call.

Fixes: 45a86681844e ("xsk: Add support for recvmsg()")
Reported-by: kernel test robot <oliver.sang@intel.com>
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/20201207082008.132263-1-bjorn.topel@gmail.com
---
 net/xdp/xsk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 56c46e5f57bc..e28c6825e089 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -554,12 +554,12 @@ static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int fl
 	struct sock *sk = sock->sk;
 	struct xdp_sock *xs = xdp_sk(sk);
 
+	if (unlikely(!xsk_is_bound(xs)))
+		return -ENXIO;
 	if (unlikely(!(xs->dev->flags & IFF_UP)))
 		return -ENETDOWN;
 	if (unlikely(!xs->rx))
 		return -ENOBUFS;
-	if (unlikely(!xsk_is_bound(xs)))
-		return -ENXIO;
 	if (unlikely(need_wait))
 		return -EOPNOTSUPP;
 
-- 
cgit v1.2.3


From 8bdd8e275ede9786d845b3ec952836e61fd824e9 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 7 Dec 2020 22:43:26 -0800
Subject: bpf: Return -ENOTSUPP when attaching to non-kernel BTF

Return -ENOTSUPP if tracing BPF program is attempted to be attached with
specified attach_btf_obj_fd pointing to non-kernel (neither vmlinux nor
module) BTF object. This scenario might be supported in the future and isn't
outright invalid, so -EINVAL isn't the most appropriate error code.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201208064326.667389-1-andrii@kernel.org
---
 kernel/bpf/syscall.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0cd3cc2af9c1..287be337d5f6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2121,8 +2121,11 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 			if (IS_ERR(attach_btf))
 				return -EINVAL;
 			if (!btf_is_kernel(attach_btf)) {
+				/* attaching through specifying bpf_prog's BTF
+				 * objects directly might be supported eventually
+				 */
 				btf_put(attach_btf);
-				return -EINVAL;
+				return -ENOTSUPP;
 			}
 		}
 	} else if (attr->attach_btf_id) {
-- 
cgit v1.2.3


From b60da4955f53d1f50e44351a9c3a37a92503079e Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Tue, 8 Dec 2020 18:36:23 +0100
Subject: bpf: Only provide bpf_sock_from_file with CONFIG_NET

This moves the bpf_sock_from_file definition into net/core/filter.c
which only gets compiled with CONFIG_NET and also moves the helper proto
usage next to other tracing helpers that are conditional on CONFIG_NET.

This avoids
  ld: kernel/trace/bpf_trace.o: in function `bpf_sock_from_file':
  bpf_trace.c:(.text+0xe23): undefined reference to `sock_from_file'
When compiling a kernel with BPF and without NET.

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: KP Singh <kpsingh@kernel.org>
Link: https://lore.kernel.org/bpf/20201208173623.1136863-1-revest@chromium.org
---
 include/linux/bpf.h      |  1 +
 kernel/trace/bpf_trace.c | 22 ++--------------------
 net/core/filter.c        | 18 ++++++++++++++++++
 3 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d05e75ed8c1b..07cb5d15e743 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1859,6 +1859,7 @@ extern const struct bpf_func_proto bpf_snprintf_btf_proto;
 extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
 extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
 extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
+extern const struct bpf_func_proto bpf_sock_from_file_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0cf0a6331482..52ddd217d6a1 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1270,24 +1270,6 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = {
 	.arg5_type	= ARG_ANYTHING,
 };
 
-BPF_CALL_1(bpf_sock_from_file, struct file *, file)
-{
-	return (unsigned long) sock_from_file(file);
-}
-
-BTF_ID_LIST(bpf_sock_from_file_btf_ids)
-BTF_ID(struct, socket)
-BTF_ID(struct, file)
-
-static const struct bpf_func_proto bpf_sock_from_file_proto = {
-	.func		= bpf_sock_from_file,
-	.gpl_only	= false,
-	.ret_type	= RET_PTR_TO_BTF_ID_OR_NULL,
-	.ret_btf_id	= &bpf_sock_from_file_btf_ids[0],
-	.arg1_type	= ARG_PTR_TO_BTF_ID,
-	.arg1_btf_id	= &bpf_sock_from_file_btf_ids[1],
-};
-
 const struct bpf_func_proto *
 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -1384,8 +1366,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_per_cpu_ptr_proto;
 	case BPF_FUNC_bpf_this_cpu_ptr:
 		return &bpf_this_cpu_ptr_proto;
-	case BPF_FUNC_sock_from_file:
-		return &bpf_sock_from_file_proto;
 	default:
 		return NULL;
 	}
@@ -1778,6 +1758,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_sk_storage_get_tracing_proto;
 	case BPF_FUNC_sk_storage_delete:
 		return &bpf_sk_storage_delete_tracing_proto;
+	case BPF_FUNC_sock_from_file:
+		return &bpf_sock_from_file_proto;
 #endif
 	case BPF_FUNC_seq_printf:
 		return prog->expected_attach_type == BPF_TRACE_ITER ?
diff --git a/net/core/filter.c b/net/core/filter.c
index 77001a35768f..255aeee72402 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -10413,6 +10413,24 @@ const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
 	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
 };
 
+BPF_CALL_1(bpf_sock_from_file, struct file *, file)
+{
+	return (unsigned long)sock_from_file(file);
+}
+
+BTF_ID_LIST(bpf_sock_from_file_btf_ids)
+BTF_ID(struct, socket)
+BTF_ID(struct, file)
+
+const struct bpf_func_proto bpf_sock_from_file_proto = {
+	.func		= bpf_sock_from_file,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_BTF_ID_OR_NULL,
+	.ret_btf_id	= &bpf_sock_from_file_btf_ids[0],
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg1_btf_id	= &bpf_sock_from_file_btf_ids[1],
+};
+
 static const struct bpf_func_proto *
 bpf_sk_base_func_proto(enum bpf_func_id func_id)
 {
-- 
cgit v1.2.3


From a89052572ebbf4bcee7c39390640e92b60eaa653 Mon Sep 17 00:00:00 2001
From: Weqaar Janjua <weqaar.janjua@gmail.com>
Date: Mon, 7 Dec 2020 21:53:29 +0000
Subject: selftests/bpf: Xsk selftests framework
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds AF_XDP selftests framework under selftests/bpf.

Topology:
---------
     -----------           -----------
     |  xskX   | --------- |  xskY   |
     -----------     |     -----------
          |          |          |
     -----------     |     ----------
     |  vethX  | --------- |  vethY |
     -----------   peer    ----------
          |          |          |
     namespaceX      |     namespaceY

Prerequisites setup by script test_xsk.sh:

   Set up veth interfaces as per the topology shown ^^:
   * setup two veth interfaces and one namespace
   ** veth<xxxx> in root namespace
   ** veth<yyyy> in af_xdp<xxxx> namespace
   ** namespace af_xdp<xxxx>
   * create a spec file veth.spec that includes this run-time configuration
   *** xxxx and yyyy are randomly generated 4 digit numbers used to avoid
       conflict with any existing interface
   * tests the veth and xsk layers of the topology

Signed-off-by: Weqaar Janjua <weqaar.a.janjua@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Yonghong Song <yhs@fb.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20201207215333.11586-2-weqaar.a.janjua@intel.com
---
 tools/testing/selftests/bpf/Makefile       |   4 +-
 tools/testing/selftests/bpf/test_xsk.sh    | 152 +++++++++++++++++++++++++++++
 tools/testing/selftests/bpf/xsk_prereqs.sh | 119 ++++++++++++++++++++++
 3 files changed, 274 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/bpf/test_xsk.sh
 create mode 100755 tools/testing/selftests/bpf/xsk_prereqs.sh

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index ac25ba5d0d6c..6a1ddfe68f15 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -46,7 +46,8 @@ endif
 
 TEST_GEN_FILES =
 TEST_FILES = test_lwt_ip_encap.o \
-	test_tc_edt.o
+	test_tc_edt.o \
+	xsk_prereqs.sh
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -70,6 +71,7 @@ TEST_PROGS := test_kmod.sh \
 	test_bpftool_build.sh \
 	test_bpftool.sh \
 	test_bpftool_metadata.sh \
+	test_xsk.sh
 
 TEST_PROGS_EXTENDED := with_addr.sh \
 	with_tunnels.sh \
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
new file mode 100755
index 000000000000..cae4c5574c4c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -0,0 +1,152 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2020 Intel Corporation, Weqaar Janjua <weqaar.a.janjua@intel.com>
+
+# AF_XDP selftests based on veth
+#
+# End-to-end AF_XDP over Veth test
+#
+# Topology:
+# ---------
+#      -----------           -----------
+#      |  xskX   | --------- |  xskY   |
+#      -----------     |     -----------
+#           |          |          |
+#      -----------     |     ----------
+#      |  vethX  | --------- |  vethY |
+#      -----------   peer    ----------
+#           |          |          |
+#      namespaceX      |     namespaceY
+#
+# AF_XDP is an address family optimized for high performance packet processing,
+# it is XDP’s user-space interface.
+#
+# An AF_XDP socket is linked to a single UMEM which is a region of virtual
+# contiguous memory, divided into equal-sized frames.
+#
+# Refer to AF_XDP Kernel Documentation for detailed information:
+# https://www.kernel.org/doc/html/latest/networking/af_xdp.html
+#
+# Prerequisites setup by script:
+#
+#   Set up veth interfaces as per the topology shown ^^:
+#   * setup two veth interfaces and one namespace
+#   ** veth<xxxx> in root namespace
+#   ** veth<yyyy> in af_xdp<xxxx> namespace
+#   ** namespace af_xdp<xxxx>
+#   * create a spec file veth.spec that includes this run-time configuration
+#   *** xxxx and yyyy are randomly generated 4 digit numbers used to avoid
+#       conflict with any existing interface
+#   * tests the veth and xsk layers of the topology
+#
+# Kernel configuration:
+# ---------------------
+# See "config" file for recommended kernel config options.
+#
+# Turn on XDP sockets and veth support when compiling i.e.
+# 	Networking support -->
+# 		Networking options -->
+# 			[ * ] XDP sockets
+#
+# Executing Tests:
+# ----------------
+# Must run with CAP_NET_ADMIN capability.
+#
+# Run (full color-coded output):
+#   sudo ./test_xsk.sh -c
+#
+# If running from kselftests:
+#   sudo make colorconsole=1 run_tests
+#
+# Run (full output without color-coding):
+#   sudo ./test_xsk.sh
+
+. xsk_prereqs.sh
+
+while getopts c flag
+do
+	case "${flag}" in
+		c) colorconsole=1;;
+	esac
+done
+
+TEST_NAME="PREREQUISITES"
+
+URANDOM=/dev/urandom
+[ ! -e "${URANDOM}" ] && { echo "${URANDOM} not found. Skipping tests."; test_exit 1 1; }
+
+VETH0_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4)
+VETH0=ve${VETH0_POSTFIX}
+VETH1_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4)
+VETH1=ve${VETH1_POSTFIX}
+NS0=root
+NS1=af_xdp${VETH1_POSTFIX}
+MTU=1500
+
+setup_vethPairs() {
+	echo "setting up ${VETH0}: namespace: ${NS0}"
+	ip netns add ${NS1}
+	ip link add ${VETH0} type veth peer name ${VETH1}
+	if [ -f /proc/net/if_inet6 ]; then
+		echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6
+	fi
+	echo "setting up ${VETH1}: namespace: ${NS1}"
+	ip link set ${VETH1} netns ${NS1}
+	ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU}
+	ip link set ${VETH0} mtu ${MTU}
+	ip netns exec ${NS1} ip link set ${VETH1} up
+	ip link set ${VETH0} up
+}
+
+validate_root_exec
+validate_veth_support ${VETH0}
+validate_ip_utility
+setup_vethPairs
+
+retval=$?
+if [ $retval -ne 0 ]; then
+	test_status $retval "${TEST_NAME}"
+	cleanup_exit ${VETH0} ${VETH1} ${NS1}
+	exit $retval
+fi
+
+echo "${VETH0}:${VETH1},${NS1}" > ${SPECFILE}
+
+validate_veth_spec_file
+
+echo "Spec file created: ${SPECFILE}"
+
+test_status $retval "${TEST_NAME}"
+
+## START TESTS
+
+statusList=()
+
+### TEST 1
+TEST_NAME="XSK KSELFTEST FRAMEWORK"
+
+echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Generic mode"
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+retval=$?
+if [ $retval -eq 0 ]; then
+	echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Native mode"
+	vethXDPnative ${VETH0} ${VETH1} ${NS1}
+fi
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+## END TESTS
+
+cleanup_exit ${VETH0} ${VETH1} ${NS1}
+
+for _status in "${statusList[@]}"
+do
+	if [ $_status -ne 0 ]; then
+		test_exit $ksft_fail 0
+	fi
+done
+
+test_exit $ksft_pass 0
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
new file mode 100755
index 000000000000..29762739c21b
--- /dev/null
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2020 Intel Corporation.
+
+ksft_pass=0
+ksft_fail=1
+ksft_xfail=2
+ksft_xpass=3
+ksft_skip=4
+
+GREEN='\033[0;92m'
+YELLOW='\033[0;93m'
+RED='\033[0;31m'
+NC='\033[0m'
+STACK_LIM=131072
+SPECFILE=veth.spec
+
+validate_root_exec()
+{
+	msg="skip all tests:"
+	if [ $UID != 0 ]; then
+		echo $msg must be run as root >&2
+		test_exit $ksft_fail 2
+	else
+		return $ksft_pass
+	fi
+}
+
+validate_veth_support()
+{
+	msg="skip all tests:"
+	if [ $(ip link add $1 type veth 2>/dev/null; echo $?;) != 0 ]; then
+		echo $msg veth kernel support not available >&2
+		test_exit $ksft_skip 1
+	else
+		ip link del $1
+		return $ksft_pass
+	fi
+}
+
+validate_veth_spec_file()
+{
+	if [ ! -f ${SPECFILE} ]; then
+		test_exit $ksft_skip 1
+	fi
+}
+
+test_status()
+{
+	statusval=$1
+	if [ -n "${colorconsole+set}" ]; then
+		if [ $statusval -eq 2 ]; then
+			echo -e "${YELLOW}$2${NC}: [ ${RED}FAIL${NC} ]"
+		elif [ $statusval -eq 1 ]; then
+			echo -e "${YELLOW}$2${NC}: [ ${RED}SKIPPED${NC} ]"
+		elif [ $statusval -eq 0 ]; then
+			echo -e "${YELLOW}$2${NC}: [ ${GREEN}PASS${NC} ]"
+		fi
+	else
+		if [ $statusval -eq 2 ]; then
+			echo -e "$2: [ FAIL ]"
+		elif [ $statusval -eq 1 ]; then
+			echo -e "$2: [ SKIPPED ]"
+		elif [ $statusval -eq 0 ]; then
+			echo -e "$2: [ PASS ]"
+		fi
+	fi
+}
+
+test_exit()
+{
+	retval=$1
+	if [ $2 -ne 0 ]; then
+		test_status $2 $(basename $0)
+	fi
+	exit $retval
+}
+
+clear_configs()
+{
+	if [ $(ip netns show | grep $3 &>/dev/null; echo $?;) == 0 ]; then
+		[ $(ip netns exec $3 ip link show $2 &>/dev/null; echo $?;) == 0 ] &&
+			{ echo "removing link $1:$2"; ip netns exec $3 ip link del $2; }
+		echo "removing ns $3"
+		ip netns del $3
+	fi
+	#Once we delete a veth pair node, the entire veth pair is removed,
+	#this is just to be cautious just incase the NS does not exist then
+	#veth node inside NS won't get removed so we explicitly remove it
+	[ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] &&
+		{ echo "removing link $1"; ip link del $1; }
+	if [ -f ${SPECFILE} ]; then
+		echo "removing spec file:" ${SPECFILE}
+		rm -f ${SPECFILE}
+	fi
+}
+
+cleanup_exit()
+{
+	echo "cleaning up..."
+	clear_configs $1 $2 $3
+}
+
+validate_ip_utility()
+{
+	[ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip 1; }
+}
+
+vethXDPgeneric()
+{
+	ip link set dev $1 xdpdrv off
+	ip netns exec $3 ip link set dev $2 xdpdrv off
+}
+
+vethXDPnative()
+{
+	ip link set dev $1 xdpgeneric off
+	ip netns exec $3 ip link set dev $2 xdpgeneric off
+}
-- 
cgit v1.2.3


From facb7cb2e909ad2d21ebbfdc051726d4cd8f1d35 Mon Sep 17 00:00:00 2001
From: Weqaar Janjua <weqaar.janjua@gmail.com>
Date: Mon, 7 Dec 2020 21:53:30 +0000
Subject: selftests/bpf: Xsk selftests - SKB POLL, NOPOLL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds following tests:

1. AF_XDP SKB mode
   Generic mode XDP is driver independent, used when the driver does
   not have support for XDP. Works on any netdevice using sockets and
   generic XDP path. XDP hook from netif_receive_skb().
   a. nopoll - soft-irq processing
   b. poll - using poll() syscall

Signed-off-by: Weqaar Janjua <weqaar.a.janjua@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Yonghong Song <yhs@fb.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20201207215333.11586-3-weqaar.a.janjua@intel.com
---
 tools/testing/selftests/bpf/Makefile       |   3 +-
 tools/testing/selftests/bpf/test_xsk.sh    |  39 +-
 tools/testing/selftests/bpf/xdpxceiver.c   | 979 +++++++++++++++++++++++++++++
 tools/testing/selftests/bpf/xdpxceiver.h   | 153 +++++
 tools/testing/selftests/bpf/xsk_prereqs.sh |  16 +
 5 files changed, 1187 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/xdpxceiver.c
 create mode 100644 tools/testing/selftests/bpf/xdpxceiver.h

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 6a1ddfe68f15..944ae17a39ed 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -82,7 +82,8 @@ TEST_PROGS_EXTENDED := with_addr.sh \
 # Compile but not part of 'make run_tests'
 TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
 	flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
-	test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko
+	test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
+	xdpxceiver
 
 TEST_CUSTOM_PROGS = urandom_read
 
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index cae4c5574c4c..0b7bafb65f43 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -8,8 +8,17 @@
 #
 # Topology:
 # ---------
-#      -----------           -----------
-#      |  xskX   | --------- |  xskY   |
+#                 -----------
+#               _ | Process | _
+#              /  -----------  \
+#             /        |        \
+#            /         |         \
+#      -----------     |     -----------
+#      | Thread1 |     |     | Thread2 |
+#      -----------     |     -----------
+#           |          |          |
+#      -----------     |     -----------
+#      |  xskX   |     |     |  xskY   |
 #      -----------     |     -----------
 #           |          |          |
 #      -----------     |     ----------
@@ -39,6 +48,8 @@
 #       conflict with any existing interface
 #   * tests the veth and xsk layers of the topology
 #
+# See the source xdpxceiver.c for information on each test
+#
 # Kernel configuration:
 # ---------------------
 # See "config" file for recommended kernel config options.
@@ -138,6 +149,30 @@ retval=$?
 test_status $retval "${TEST_NAME}"
 statusList+=($retval)
 
+### TEST 2
+TEST_NAME="SKB NOPOLL"
+
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+params=("-S")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 3
+TEST_NAME="SKB POLL"
+
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+params=("-S" "-p")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
 ## END TESTS
 
 cleanup_exit ${VETH0} ${VETH1} ${NS1}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
new file mode 100644
index 000000000000..3f2a65b6a9f5
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -0,0 +1,979 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2020 Intel Corporation. */
+
+/*
+ * Some functions in this program are taken from
+ * Linux kernel samples/bpf/xdpsock* and modified
+ * for use.
+ *
+ * See test_xsk.sh for detailed information on test topology
+ * and prerequisite network setup.
+ *
+ * This test program contains two threads, each thread is single socket with
+ * a unique UMEM. It validates in-order packet delivery and packet content
+ * by sending packets to each other.
+ *
+ * Tests Information:
+ * ------------------
+ * These selftests test AF_XDP SKB and Native/DRV modes using veth
+ * Virtual Ethernet interfaces.
+ *
+ * The following tests are run:
+ *
+ * 1. AF_XDP SKB mode
+ *    Generic mode XDP is driver independent, used when the driver does
+ *    not have support for XDP. Works on any netdevice using sockets and
+ *    generic XDP path. XDP hook from netif_receive_skb().
+ *    a. nopoll - soft-irq processing
+ *    b. poll - using poll() syscall
+ *
+ * Total tests: 2
+ *
+ * Flow:
+ * -----
+ * - Single process spawns two threads: Tx and Rx
+ * - Each of these two threads attach to a veth interface within their assigned
+ *   namespaces
+ * - Each thread Creates one AF_XDP socket connected to a unique umem for each
+ *   veth interface
+ * - Tx thread Transmits 10k packets from veth<xxxx> to veth<yyyy>
+ * - Rx thread verifies if all 10k packets were received and delivered in-order,
+ *   and have the right content
+ *
+ * Enable/disable debug mode:
+ * --------------------------
+ * To enable L2 - L4 headers and payload dump of each packet on STDOUT, add
+ * parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D")
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <errno.h>
+#include <getopt.h>
+#include <asm/barrier.h>
+typedef __u16 __sum16;
+#include <linux/if_link.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <locale.h>
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <time.h>
+#include <unistd.h>
+#include <stdatomic.h>
+#include <bpf/xsk.h>
+#include "xdpxceiver.h"
+#include "../kselftest.h"
+
+static void __exit_with_error(int error, const char *file, const char *func, int line)
+{
+	ksft_test_result_fail
+	    ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
+	ksft_exit_xfail();
+}
+
+#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
+
+#define print_ksft_result(void)\
+	(ksft_test_result_pass("PASS: %s %s\n", uut ? "" : "SKB", opt_poll ? "POLL" : "NOPOLL"))
+
+static void pthread_init_mutex(void)
+{
+	pthread_mutex_init(&sync_mutex, NULL);
+	pthread_mutex_init(&sync_mutex_tx, NULL);
+	pthread_cond_init(&signal_rx_condition, NULL);
+	pthread_cond_init(&signal_tx_condition, NULL);
+}
+
+static void pthread_destroy_mutex(void)
+{
+	pthread_mutex_destroy(&sync_mutex);
+	pthread_mutex_destroy(&sync_mutex_tx);
+	pthread_cond_destroy(&signal_rx_condition);
+	pthread_cond_destroy(&signal_tx_condition);
+}
+
+static void *memset32_htonl(void *dest, u32 val, u32 size)
+{
+	u32 *ptr = (u32 *)dest;
+	int i;
+
+	val = htonl(val);
+
+	for (i = 0; i < (size & (~0x3)); i += 4)
+		ptr[i >> 2] = val;
+
+	for (; i < size; i++)
+		((char *)dest)[i] = ((char *)&val)[i & 3];
+
+	return dest;
+}
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+static inline unsigned short from32to16(unsigned int x)
+{
+	/* add up 16-bit and 16-bit for 16+c bit */
+	x = (x & 0xffff) + (x >> 16);
+	/* add up carry.. */
+	x = (x & 0xffff) + (x >> 16);
+	return x;
+}
+
+/*
+ * Fold a partial checksum
+ * This function code has been taken from
+ * Linux kernel include/asm-generic/checksum.h
+ */
+static inline __u16 csum_fold(__u32 csum)
+{
+	u32 sum = (__force u32)csum;
+
+	sum = (sum & 0xffff) + (sum >> 16);
+	sum = (sum & 0xffff) + (sum >> 16);
+	return (__force __u16)~sum;
+}
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+static inline u32 from64to32(u64 x)
+{
+	/* add up 32-bit and 32-bit for 32+c bit */
+	x = (x & 0xffffffff) + (x >> 32);
+	/* add up carry.. */
+	x = (x & 0xffffffff) + (x >> 32);
+	return (u32)x;
+}
+
+__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum);
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
+{
+	unsigned long long s = (__force u32)sum;
+
+	s += (__force u32)saddr;
+	s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN__
+	s += proto + len;
+#else
+	s += (proto + len) << 8;
+#endif
+	return (__force __u32)from64to32(s);
+}
+
+/*
+ * This function has been taken from
+ * Linux kernel include/asm-generic/checksum.h
+ */
+static inline __u16
+csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt)
+{
+	u32 csum = 0;
+	u32 cnt = 0;
+
+	/* udp hdr and data */
+	for (; cnt < len; cnt += 2)
+		csum += udp_pkt[cnt >> 1];
+
+	return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
+}
+
+static void gen_eth_hdr(void *data, struct ethhdr *eth_hdr)
+{
+	memcpy(eth_hdr->h_dest, ((struct ifobject *)data)->dst_mac, ETH_ALEN);
+	memcpy(eth_hdr->h_source, ((struct ifobject *)data)->src_mac, ETH_ALEN);
+	eth_hdr->h_proto = htons(ETH_P_IP);
+}
+
+static void gen_ip_hdr(void *data, struct iphdr *ip_hdr)
+{
+	ip_hdr->version = IP_PKT_VER;
+	ip_hdr->ihl = 0x5;
+	ip_hdr->tos = IP_PKT_TOS;
+	ip_hdr->tot_len = htons(IP_PKT_SIZE);
+	ip_hdr->id = 0;
+	ip_hdr->frag_off = 0;
+	ip_hdr->ttl = IPDEFTTL;
+	ip_hdr->protocol = IPPROTO_UDP;
+	ip_hdr->saddr = ((struct ifobject *)data)->src_ip;
+	ip_hdr->daddr = ((struct ifobject *)data)->dst_ip;
+	ip_hdr->check = 0;
+}
+
+static void gen_udp_hdr(void *data, void *arg, struct udphdr *udp_hdr)
+{
+	udp_hdr->source = htons(((struct ifobject *)arg)->src_port);
+	udp_hdr->dest = htons(((struct ifobject *)arg)->dst_port);
+	udp_hdr->len = htons(UDP_PKT_SIZE);
+	memset32_htonl(pkt_data + PKT_HDR_SIZE,
+		       htonl(((struct generic_data *)data)->seqnum), UDP_PKT_DATA_SIZE);
+}
+
+static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
+{
+	udp_hdr->check = 0;
+	udp_hdr->check =
+	    udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
+}
+
+static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
+{
+	memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE);
+}
+
+static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size)
+{
+	int ret;
+
+	data->umem = calloc(1, sizeof(struct xsk_umem_info));
+	if (!data->umem)
+		exit_with_error(errno);
+
+	ret = xsk_umem__create(&data->umem->umem, buffer, size,
+			       &data->umem->fq, &data->umem->cq, NULL);
+	if (ret)
+		exit_with_error(ret);
+
+	data->umem->buffer = buffer;
+}
+
+static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
+{
+	int ret, i;
+	u32 idx;
+
+	ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
+	if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
+		exit_with_error(ret);
+	for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
+		*xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * XSK_UMEM__DEFAULT_FRAME_SIZE;
+	xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
+}
+
+static int xsk_configure_socket(struct ifobject *ifobject)
+{
+	struct xsk_socket_config cfg;
+	struct xsk_ring_cons *rxr;
+	struct xsk_ring_prod *txr;
+	int ret;
+
+	ifobject->xsk = calloc(1, sizeof(struct xsk_socket_info));
+	if (!ifobject->xsk)
+		exit_with_error(errno);
+
+	ifobject->xsk->umem = ifobject->umem;
+	cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+	cfg.libbpf_flags = 0;
+	cfg.xdp_flags = opt_xdp_flags;
+	cfg.bind_flags = opt_xdp_bind_flags;
+
+	rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL;
+	txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL;
+
+	ret = xsk_socket__create(&ifobject->xsk->xsk, ifobject->ifname,
+				 opt_queue, ifobject->umem->umem, rxr, txr, &cfg);
+
+	if (ret)
+		return 1;
+
+	return 0;
+}
+
+static struct option long_options[] = {
+	{"interface", required_argument, 0, 'i'},
+	{"queue", optional_argument, 0, 'q'},
+	{"poll", no_argument, 0, 'p'},
+	{"xdp-skb", no_argument, 0, 'S'},
+	{"copy", no_argument, 0, 'c'},
+	{"debug", optional_argument, 0, 'D'},
+	{"tx-pkt-count", optional_argument, 0, 'C'},
+	{0, 0, 0, 0}
+};
+
+static void usage(const char *prog)
+{
+	const char *str =
+	    "  Usage: %s [OPTIONS]\n"
+	    "  Options:\n"
+	    "  -i, --interface      Use interface\n"
+	    "  -q, --queue=n        Use queue n (default 0)\n"
+	    "  -p, --poll           Use poll syscall\n"
+	    "  -S, --xdp-skb=n      Use XDP SKB mode\n"
+	    "  -c, --copy           Force copy mode\n"
+	    "  -D, --debug          Debug mode - dump packets L2 - L5\n"
+	    "  -C, --tx-pkt-count=n Number of packets to send\n";
+	ksft_print_msg(str, prog);
+}
+
+static bool switch_namespace(int idx)
+{
+	char fqns[26] = "/var/run/netns/";
+	int nsfd;
+
+	strncat(fqns, ifdict[idx]->nsname, sizeof(fqns) - strlen(fqns) - 1);
+	nsfd = open(fqns, O_RDONLY);
+
+	if (nsfd == -1)
+		exit_with_error(errno);
+
+	if (setns(nsfd, 0) == -1)
+		exit_with_error(errno);
+
+	return true;
+}
+
+static void *nsswitchthread(void *args)
+{
+	if (switch_namespace(((struct targs *)args)->idx)) {
+		ifdict[((struct targs *)args)->idx]->ifindex =
+		    if_nametoindex(ifdict[((struct targs *)args)->idx]->ifname);
+		if (!ifdict[((struct targs *)args)->idx]->ifindex) {
+			ksft_test_result_fail
+			    ("ERROR: [%s] interface \"%s\" does not exist\n",
+			     __func__, ifdict[((struct targs *)args)->idx]->ifname);
+			((struct targs *)args)->retptr = false;
+		} else {
+			ksft_print_msg("Interface found: %s\n",
+				       ifdict[((struct targs *)args)->idx]->ifname);
+			((struct targs *)args)->retptr = true;
+		}
+	} else {
+		((struct targs *)args)->retptr = false;
+	}
+	pthread_exit(NULL);
+}
+
+static int validate_interfaces(void)
+{
+	bool ret = true;
+
+	for (int i = 0; i < MAX_INTERFACES; i++) {
+		if (!strcmp(ifdict[i]->ifname, "")) {
+			ret = false;
+			ksft_test_result_fail("ERROR: interfaces: -i <int>,<ns> -i <int>,<ns>.");
+		}
+		if (strcmp(ifdict[i]->nsname, "")) {
+			struct targs *targs;
+
+			targs = (struct targs *)malloc(sizeof(struct targs));
+			if (!targs)
+				exit_with_error(errno);
+
+			targs->idx = i;
+			if (pthread_create(&ns_thread, NULL, nsswitchthread, (void *)targs))
+				exit_with_error(errno);
+
+			pthread_join(ns_thread, NULL);
+
+			if (targs->retptr)
+				ksft_print_msg("NS switched: %s\n", ifdict[i]->nsname);
+
+			free(targs);
+		} else {
+			ifdict[i]->ifindex = if_nametoindex(ifdict[i]->ifname);
+			if (!ifdict[i]->ifindex) {
+				ksft_test_result_fail
+				    ("ERROR: interface \"%s\" does not exist\n", ifdict[i]->ifname);
+				ret = false;
+			} else {
+				ksft_print_msg("Interface found: %s\n", ifdict[i]->ifname);
+			}
+		}
+	}
+	return ret;
+}
+
+static void parse_command_line(int argc, char **argv)
+{
+	int option_index, interface_index = 0, c;
+
+	opterr = 0;
+
+	for (;;) {
+		c = getopt_long(argc, argv, "i:q:pScDC:", long_options, &option_index);
+
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 'i':
+			if (interface_index == MAX_INTERFACES)
+				break;
+			char *sptr, *token;
+
+			sptr = strndupa(optarg, strlen(optarg));
+			memcpy(ifdict[interface_index]->ifname,
+			       strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS);
+			token = strsep(&sptr, ",");
+			if (token)
+				memcpy(ifdict[interface_index]->nsname, token,
+				       MAX_INTERFACES_NAMESPACE_CHARS);
+			interface_index++;
+			break;
+		case 'q':
+			opt_queue = atoi(optarg);
+			break;
+		case 'p':
+			opt_poll = 1;
+			break;
+		case 'S':
+			opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
+			opt_xdp_bind_flags |= XDP_COPY;
+			uut = ORDER_CONTENT_VALIDATE_XDP_SKB;
+			break;
+		case 'c':
+			opt_xdp_bind_flags |= XDP_COPY;
+			break;
+		case 'D':
+			debug_pkt_dump = 1;
+			break;
+		case 'C':
+			opt_pkt_count = atoi(optarg);
+			break;
+		default:
+			usage(basename(argv[0]));
+			ksft_exit_xfail();
+		}
+	}
+
+	if (!validate_interfaces()) {
+		usage(basename(argv[0]));
+		ksft_exit_xfail();
+	}
+}
+
+static void kick_tx(struct xsk_socket_info *xsk)
+{
+	int ret;
+
+	ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+	if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN)
+		return;
+	exit_with_error(errno);
+}
+
+static inline void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
+{
+	unsigned int rcvd;
+	u32 idx;
+
+	if (!xsk->outstanding_tx)
+		return;
+
+	if (!NEED_WAKEUP || xsk_ring_prod__needs_wakeup(&xsk->tx))
+		kick_tx(xsk);
+
+	rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
+	if (rcvd) {
+		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
+		xsk->outstanding_tx -= rcvd;
+		xsk->tx_npkts += rcvd;
+	}
+}
+
+static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds)
+{
+	unsigned int rcvd, i;
+	u32 idx_rx = 0, idx_fq = 0;
+	int ret;
+
+	rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+	if (!rcvd) {
+		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+			ret = poll(fds, 1, POLL_TMOUT);
+			if (ret < 0)
+				exit_with_error(ret);
+		}
+		return;
+	}
+
+	ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+	while (ret != rcvd) {
+		if (ret < 0)
+			exit_with_error(ret);
+		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+			ret = poll(fds, 1, POLL_TMOUT);
+			if (ret < 0)
+				exit_with_error(ret);
+		}
+		ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+	}
+
+	for (i = 0; i < rcvd; i++) {
+		u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
+		(void)xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
+		u64 orig = xsk_umem__extract_addr(addr);
+
+		addr = xsk_umem__add_offset_to_addr(addr);
+		pkt_node_rx = malloc(sizeof(struct pkt) + PKT_SIZE);
+		if (!pkt_node_rx)
+			exit_with_error(errno);
+
+		pkt_node_rx->pkt_frame = (char *)malloc(PKT_SIZE);
+		if (!pkt_node_rx->pkt_frame)
+			exit_with_error(errno);
+
+		memcpy(pkt_node_rx->pkt_frame, xsk_umem__get_data(xsk->umem->buffer, addr),
+		       PKT_SIZE);
+
+		TAILQ_INSERT_HEAD(&head, pkt_node_rx, pkt_nodes);
+
+		*xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
+	}
+
+	xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
+	xsk_ring_cons__release(&xsk->rx, rcvd);
+	xsk->rx_npkts += rcvd;
+}
+
+static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
+{
+	u32 idx;
+	unsigned int i;
+
+	while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size)
+		complete_tx_only(xsk, batch_size);
+
+	for (i = 0; i < batch_size; i++) {
+		struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
+
+		tx_desc->addr = (*frameptr + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
+		tx_desc->len = PKT_SIZE;
+	}
+
+	xsk_ring_prod__submit(&xsk->tx, batch_size);
+	xsk->outstanding_tx += batch_size;
+	*frameptr += batch_size;
+	*frameptr %= num_frames;
+	complete_tx_only(xsk, batch_size);
+}
+
+static inline int get_batch_size(int pkt_cnt)
+{
+	if (!opt_pkt_count)
+		return BATCH_SIZE;
+
+	if (pkt_cnt + BATCH_SIZE <= opt_pkt_count)
+		return BATCH_SIZE;
+
+	return opt_pkt_count - pkt_cnt;
+}
+
+static void complete_tx_only_all(void *arg)
+{
+	bool pending;
+
+	do {
+		pending = false;
+		if (((struct ifobject *)arg)->xsk->outstanding_tx) {
+			complete_tx_only(((struct ifobject *)
+					  arg)->xsk, BATCH_SIZE);
+			pending = !!((struct ifobject *)arg)->xsk->outstanding_tx;
+		}
+	} while (pending);
+}
+
+static void tx_only_all(void *arg)
+{
+	struct pollfd fds[MAX_SOCKS] = { };
+	u32 frame_nb = 0;
+	int pkt_cnt = 0;
+	int ret;
+
+	fds[0].fd = xsk_socket__fd(((struct ifobject *)arg)->xsk->xsk);
+	fds[0].events = POLLOUT;
+
+	while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
+		int batch_size = get_batch_size(pkt_cnt);
+
+		if (opt_poll) {
+			ret = poll(fds, 1, POLL_TMOUT);
+			if (ret <= 0)
+				continue;
+
+			if (!(fds[0].revents & POLLOUT))
+				continue;
+		}
+
+		tx_only(((struct ifobject *)arg)->xsk, &frame_nb, batch_size);
+		pkt_cnt += batch_size;
+	}
+
+	if (opt_pkt_count)
+		complete_tx_only_all(arg);
+}
+
+static void worker_pkt_dump(void)
+{
+	struct in_addr ipaddr;
+
+	fprintf(stdout, "---------------------------------------\n");
+	for (int iter = 0; iter < num_frames - 1; iter++) {
+		/*extract L2 frame */
+		fprintf(stdout, "DEBUG>> L2: dst mac: ");
+		for (int i = 0; i < ETH_ALEN; i++)
+			fprintf(stdout, "%02X", ((struct ethhdr *)
+						 pkt_buf[iter]->payload)->h_dest[i]);
+
+		fprintf(stdout, "\nDEBUG>> L2: src mac: ");
+		for (int i = 0; i < ETH_ALEN; i++)
+			fprintf(stdout, "%02X", ((struct ethhdr *)
+						 pkt_buf[iter]->payload)->h_source[i]);
+
+		/*extract L3 frame */
+		fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n",
+			((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->ihl);
+
+		ipaddr.s_addr =
+		    ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->saddr;
+		fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", inet_ntoa(ipaddr));
+
+		ipaddr.s_addr =
+		    ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->daddr;
+		fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", inet_ntoa(ipaddr));
+
+		/*extract L4 frame */
+		fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n",
+			ntohs(((struct udphdr *)(pkt_buf[iter]->payload +
+						 sizeof(struct ethhdr) +
+						 sizeof(struct iphdr)))->source));
+
+		fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n",
+			ntohs(((struct udphdr *)(pkt_buf[iter]->payload +
+						 sizeof(struct ethhdr) +
+						 sizeof(struct iphdr)))->dest));
+		/*extract L5 frame */
+		int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE));
+
+		if (payload == EOT) {
+			ksft_print_msg("End-of-tranmission frame received\n");
+			fprintf(stdout, "---------------------------------------\n");
+			break;
+		}
+		fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
+		fprintf(stdout, "---------------------------------------\n");
+	}
+}
+
+static void worker_pkt_validate(void)
+{
+	u32 payloadseqnum = -2;
+
+	while (1) {
+		pkt_node_rx_q = malloc(sizeof(struct pkt));
+		pkt_node_rx_q = TAILQ_LAST(&head, head_s);
+		if (!pkt_node_rx_q)
+			break;
+		/*do not increment pktcounter if !(tos=0x9 and ipv4) */
+		if ((((struct iphdr *)(pkt_node_rx_q->pkt_frame +
+				       sizeof(struct ethhdr)))->version == IP_PKT_VER)
+		    && (((struct iphdr *)(pkt_node_rx_q->pkt_frame + sizeof(struct ethhdr)))->tos ==
+			IP_PKT_TOS)) {
+			payloadseqnum = *((uint32_t *) (pkt_node_rx_q->pkt_frame + PKT_HDR_SIZE));
+			if (debug_pkt_dump && payloadseqnum != EOT) {
+				pkt_obj = (struct pkt_frame *)malloc(sizeof(struct pkt_frame));
+				pkt_obj->payload = (char *)malloc(PKT_SIZE);
+				memcpy(pkt_obj->payload, pkt_node_rx_q->pkt_frame, PKT_SIZE);
+				pkt_buf[payloadseqnum] = pkt_obj;
+			}
+
+			if (payloadseqnum == EOT) {
+				ksft_print_msg("End-of-tranmission frame received: PASS\n");
+				sigvar = 1;
+				break;
+			}
+
+			if (prev_pkt + 1 != payloadseqnum) {
+				ksft_test_result_fail
+				    ("ERROR: [%s] prev_pkt [%d], payloadseqnum [%d]\n",
+				     __func__, prev_pkt, payloadseqnum);
+				ksft_exit_xfail();
+			}
+
+			TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes);
+			free(pkt_node_rx_q->pkt_frame);
+			free(pkt_node_rx_q);
+			pkt_node_rx_q = NULL;
+			prev_pkt = payloadseqnum;
+			pkt_counter++;
+		} else {
+			ksft_print_msg("Invalid frame received: ");
+			ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n",
+				((struct iphdr *)(pkt_node_rx_q->pkt_frame +
+				       sizeof(struct ethhdr)))->version,
+				((struct iphdr *)(pkt_node_rx_q->pkt_frame +
+				       sizeof(struct ethhdr)))->tos);
+			TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes);
+			free(pkt_node_rx_q->pkt_frame);
+			free(pkt_node_rx_q);
+			pkt_node_rx_q = NULL;
+		}
+	}
+}
+
+static void thread_common_ops(void *arg, void *bufs, pthread_mutex_t *mutexptr,
+			      atomic_int *spinningptr)
+{
+	int ctr = 0;
+	int ret;
+
+	xsk_configure_umem((struct ifobject *)arg, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
+	ret = xsk_configure_socket((struct ifobject *)arg);
+
+	/* Retry Create Socket if it fails as xsk_socket__create()
+	 * is asynchronous
+	 *
+	 * Essential to lock Mutex here to prevent Tx thread from
+	 * entering before Rx and causing a deadlock
+	 */
+	pthread_mutex_lock(mutexptr);
+	while (ret && ctr < SOCK_RECONF_CTR) {
+		atomic_store(spinningptr, 1);
+		xsk_configure_umem((struct ifobject *)arg,
+				   bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
+		ret = xsk_configure_socket((struct ifobject *)arg);
+		usleep(USLEEP_MAX);
+		ctr++;
+	}
+	atomic_store(spinningptr, 0);
+	pthread_mutex_unlock(mutexptr);
+
+	if (ctr >= SOCK_RECONF_CTR)
+		exit_with_error(ret);
+}
+
+static void *worker_testapp_validate(void *arg)
+{
+	struct udphdr *udp_hdr =
+	    (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr));
+	struct generic_data *data = (struct generic_data *)malloc(sizeof(struct generic_data));
+	struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr));
+	struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
+	void *bufs;
+
+	pthread_attr_setstacksize(&attr, THREAD_STACK);
+
+	bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE,
+		    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (bufs == MAP_FAILED)
+		exit_with_error(errno);
+
+	if (strcmp(((struct ifobject *)arg)->nsname, ""))
+		switch_namespace(((struct ifobject *)arg)->ifdict_index);
+
+	if (((struct ifobject *)arg)->fv.vector == tx) {
+		int spinningrxctr = 0;
+
+		thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_tx);
+
+		while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) {
+			spinningrxctr++;
+			usleep(USLEEP_MAX);
+		}
+
+		ksft_print_msg("Interface [%s] vector [Tx]\n", ((struct ifobject *)arg)->ifname);
+		for (int i = 0; i < num_frames; i++) {
+			/*send EOT frame */
+			if (i == (num_frames - 1))
+				data->seqnum = -1;
+			else
+				data->seqnum = i;
+			gen_udp_hdr((void *)data, (void *)arg, udp_hdr);
+			gen_ip_hdr((void *)arg, ip_hdr);
+			gen_udp_csum(udp_hdr, ip_hdr);
+			gen_eth_hdr((void *)arg, eth_hdr);
+			gen_eth_frame(((struct ifobject *)arg)->umem,
+				      i * XSK_UMEM__DEFAULT_FRAME_SIZE);
+		}
+
+		free(data);
+		ksft_print_msg("Sending %d packets on interface %s\n",
+			       (opt_pkt_count - 1), ((struct ifobject *)arg)->ifname);
+		tx_only_all(arg);
+	} else if (((struct ifobject *)arg)->fv.vector == rx) {
+		struct pollfd fds[MAX_SOCKS] = { };
+		int ret;
+
+		thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_rx);
+
+		ksft_print_msg("Interface [%s] vector [Rx]\n", ((struct ifobject *)arg)->ifname);
+		xsk_populate_fill_ring(((struct ifobject *)arg)->umem);
+
+		TAILQ_INIT(&head);
+		if (debug_pkt_dump) {
+			pkt_buf = malloc(sizeof(struct pkt_frame **) * num_frames);
+			if (!pkt_buf)
+				exit_with_error(errno);
+		}
+
+		fds[0].fd = xsk_socket__fd(((struct ifobject *)arg)->xsk->xsk);
+		fds[0].events = POLLIN;
+
+		pthread_mutex_lock(&sync_mutex);
+		pthread_cond_signal(&signal_rx_condition);
+		pthread_mutex_unlock(&sync_mutex);
+
+		while (1) {
+			if (opt_poll) {
+				ret = poll(fds, 1, POLL_TMOUT);
+				if (ret <= 0)
+					continue;
+			}
+			rx_pkt(((struct ifobject *)arg)->xsk, fds);
+			worker_pkt_validate();
+
+			if (sigvar)
+				break;
+		}
+
+		ksft_print_msg("Received %d packets on interface %s\n",
+			       pkt_counter, ((struct ifobject *)arg)->ifname);
+	}
+
+	xsk_socket__delete(((struct ifobject *)arg)->xsk->xsk);
+	(void)xsk_umem__delete(((struct ifobject *)arg)->umem->umem);
+	pthread_exit(NULL);
+}
+
+static void testapp_validate(void)
+{
+	pthread_attr_init(&attr);
+	pthread_attr_setstacksize(&attr, THREAD_STACK);
+
+	pthread_mutex_lock(&sync_mutex);
+
+	/*Spawn RX thread */
+	if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[1]))
+		exit_with_error(errno);
+
+	struct timespec max_wait = { 0, 0 };
+
+	if (clock_gettime(CLOCK_REALTIME, &max_wait))
+		exit_with_error(errno);
+	max_wait.tv_sec += TMOUT_SEC;
+
+	if (pthread_cond_timedwait(&signal_rx_condition, &sync_mutex, &max_wait) == ETIMEDOUT)
+		exit_with_error(errno);
+
+	pthread_mutex_unlock(&sync_mutex);
+
+	/*Spawn TX thread */
+	if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[0]))
+		exit_with_error(errno);
+
+	pthread_join(t1, NULL);
+	pthread_join(t0, NULL);
+
+	if (debug_pkt_dump) {
+		worker_pkt_dump();
+		for (int iter = 0; iter < num_frames - 1; iter++) {
+			free(pkt_buf[iter]->payload);
+			free(pkt_buf[iter]);
+		}
+		free(pkt_buf);
+	}
+
+	print_ksft_result();
+}
+
+static void init_iface_config(void *ifaceconfig)
+{
+	/*Init interface0 */
+	ifdict[0]->fv.vector = tx;
+	memcpy(ifdict[0]->dst_mac, ((struct ifaceconfigobj *)ifaceconfig)->dst_mac, ETH_ALEN);
+	memcpy(ifdict[0]->src_mac, ((struct ifaceconfigobj *)ifaceconfig)->src_mac, ETH_ALEN);
+	ifdict[0]->dst_ip = ((struct ifaceconfigobj *)ifaceconfig)->dst_ip.s_addr;
+	ifdict[0]->src_ip = ((struct ifaceconfigobj *)ifaceconfig)->src_ip.s_addr;
+	ifdict[0]->dst_port = ((struct ifaceconfigobj *)ifaceconfig)->dst_port;
+	ifdict[0]->src_port = ((struct ifaceconfigobj *)ifaceconfig)->src_port;
+
+	/*Init interface1 */
+	ifdict[1]->fv.vector = rx;
+	memcpy(ifdict[1]->dst_mac, ((struct ifaceconfigobj *)ifaceconfig)->src_mac, ETH_ALEN);
+	memcpy(ifdict[1]->src_mac, ((struct ifaceconfigobj *)ifaceconfig)->dst_mac, ETH_ALEN);
+	ifdict[1]->dst_ip = ((struct ifaceconfigobj *)ifaceconfig)->src_ip.s_addr;
+	ifdict[1]->src_ip = ((struct ifaceconfigobj *)ifaceconfig)->dst_ip.s_addr;
+	ifdict[1]->dst_port = ((struct ifaceconfigobj *)ifaceconfig)->src_port;
+	ifdict[1]->src_port = ((struct ifaceconfigobj *)ifaceconfig)->dst_port;
+}
+
+int main(int argc, char **argv)
+{
+	struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
+
+	if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
+		exit_with_error(errno);
+
+	const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
+	const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
+	const char *IP1 = "192.168.100.162";
+	const char *IP2 = "192.168.100.161";
+	u16 UDP_DST_PORT = 2020;
+	u16 UDP_SRC_PORT = 2121;
+
+	ifaceconfig = (struct ifaceconfigobj *)malloc(sizeof(struct ifaceconfigobj));
+	memcpy(ifaceconfig->dst_mac, MAC1, ETH_ALEN);
+	memcpy(ifaceconfig->src_mac, MAC2, ETH_ALEN);
+	inet_aton(IP1, &ifaceconfig->dst_ip);
+	inet_aton(IP2, &ifaceconfig->src_ip);
+	ifaceconfig->dst_port = UDP_DST_PORT;
+	ifaceconfig->src_port = UDP_SRC_PORT;
+
+	for (int i = 0; i < MAX_INTERFACES; i++) {
+		ifdict[i] = (struct ifobject *)malloc(sizeof(struct ifobject));
+		if (!ifdict[i])
+			exit_with_error(errno);
+
+		ifdict[i]->ifdict_index = i;
+	}
+
+	setlocale(LC_ALL, "");
+
+	parse_command_line(argc, argv);
+
+	num_frames = ++opt_pkt_count;
+
+	init_iface_config((void *)ifaceconfig);
+
+	pthread_init_mutex();
+
+	ksft_set_plan(1);
+
+	testapp_validate();
+
+	for (int i = 0; i < MAX_INTERFACES; i++)
+		free(ifdict[i]);
+
+	pthread_destroy_mutex();
+
+	ksft_exit_pass();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
new file mode 100644
index 000000000000..5929f2fc1224
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright(c) 2020 Intel Corporation.
+ */
+
+#ifndef XDPXCEIVER_H_
+#define XDPXCEIVER_H_
+
+#ifndef SOL_XDP
+#define SOL_XDP 283
+#endif
+
+#ifndef AF_XDP
+#define AF_XDP 44
+#endif
+
+#ifndef PF_XDP
+#define PF_XDP AF_XDP
+#endif
+
+#define MAX_INTERFACES 2
+#define MAX_INTERFACE_NAME_CHARS 7
+#define MAX_INTERFACES_NAMESPACE_CHARS 10
+#define MAX_SOCKS 1
+#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
+			sizeof(struct udphdr))
+#define MIN_PKT_SIZE 64
+#define ETH_FCS_SIZE 4
+#define PKT_SIZE (MIN_PKT_SIZE - ETH_FCS_SIZE)
+#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
+#define IP_PKT_VER 0x4
+#define IP_PKT_TOS 0x9
+#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
+#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
+#define TMOUT_SEC (3)
+#define EOT (-1)
+#define USLEEP_MAX 200000
+#define THREAD_STACK 60000000
+#define SOCK_RECONF_CTR 10
+#define BATCH_SIZE 64
+#define POLL_TMOUT 1000
+#define NEED_WAKEUP 1
+
+typedef __u32 u32;
+typedef __u16 u16;
+typedef __u8 u8;
+
+enum TESTS {
+	ORDER_CONTENT_VALIDATE_XDP_SKB = 0,
+};
+
+u8 uut;
+u8 debug_pkt_dump;
+u32 num_frames;
+
+static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int opt_queue;
+static int opt_pkt_count;
+static int opt_poll;
+static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
+static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
+static u32 pkt_counter;
+static u32 prev_pkt = -1;
+static int sigvar;
+
+struct xsk_umem_info {
+	struct xsk_ring_prod fq;
+	struct xsk_ring_cons cq;
+	struct xsk_umem *umem;
+	void *buffer;
+};
+
+struct xsk_socket_info {
+	struct xsk_ring_cons rx;
+	struct xsk_ring_prod tx;
+	struct xsk_umem_info *umem;
+	struct xsk_socket *xsk;
+	unsigned long rx_npkts;
+	unsigned long tx_npkts;
+	unsigned long prev_rx_npkts;
+	unsigned long prev_tx_npkts;
+	u32 outstanding_tx;
+};
+
+struct flow_vector {
+	enum fvector {
+		tx,
+		rx,
+		bidi,
+		undef,
+	} vector;
+};
+
+struct generic_data {
+	u32 seqnum;
+};
+
+struct ifaceconfigobj {
+	u8 dst_mac[ETH_ALEN];
+	u8 src_mac[ETH_ALEN];
+	struct in_addr dst_ip;
+	struct in_addr src_ip;
+	u16 src_port;
+	u16 dst_port;
+} *ifaceconfig;
+
+struct ifobject {
+	int ifindex;
+	int ifdict_index;
+	char ifname[MAX_INTERFACE_NAME_CHARS];
+	char nsname[MAX_INTERFACES_NAMESPACE_CHARS];
+	struct flow_vector fv;
+	struct xsk_socket_info *xsk;
+	struct xsk_umem_info *umem;
+	u8 dst_mac[ETH_ALEN];
+	u8 src_mac[ETH_ALEN];
+	u32 dst_ip;
+	u32 src_ip;
+	u16 src_port;
+	u16 dst_port;
+};
+
+static struct ifobject *ifdict[MAX_INTERFACES];
+
+/*threads*/
+atomic_int spinning_tx;
+atomic_int spinning_rx;
+pthread_mutex_t sync_mutex;
+pthread_mutex_t sync_mutex_tx;
+pthread_cond_t signal_rx_condition;
+pthread_cond_t signal_tx_condition;
+pthread_t t0, t1, ns_thread;
+pthread_attr_t attr;
+
+struct targs {
+	bool retptr;
+	int idx;
+};
+
+TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head);
+struct head_s *head_p;
+struct pkt {
+	char *pkt_frame;
+
+	TAILQ_ENTRY(pkt) pkt_nodes;
+} *pkt_node_rx, *pkt_node_rx_q;
+
+struct pkt_frame {
+	char *payload;
+} *pkt_obj;
+
+struct pkt_frame **pkt_buf;
+
+#endif				/* XDPXCEIVER_H */
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index 29762739c21b..9d54c4645127 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -14,6 +14,8 @@ RED='\033[0;31m'
 NC='\033[0m'
 STACK_LIM=131072
 SPECFILE=veth.spec
+XSKOBJ=xdpxceiver
+NUMPKTS=10000
 
 validate_root_exec()
 {
@@ -117,3 +119,17 @@ vethXDPnative()
 	ip link set dev $1 xdpgeneric off
 	ip netns exec $3 ip link set dev $2 xdpgeneric off
 }
+
+execxdpxceiver()
+{
+	local -a 'paramkeys=("${!'"$1"'[@]}")' copy
+	paramkeysstr=${paramkeys[*]}
+
+	for index in $paramkeysstr;
+		do
+			current=$1"[$index]"
+			copy[$index]=${!current}
+		done
+
+	./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${copy[*]} -C ${NUMPKTS}
+}
-- 
cgit v1.2.3


From 9103a8594d9324d8e1512442ba580e4e91d42c2d Mon Sep 17 00:00:00 2001
From: Weqaar Janjua <weqaar.janjua@gmail.com>
Date: Mon, 7 Dec 2020 21:53:31 +0000
Subject: selftests/bpf: Xsk selftests - DRV POLL, NOPOLL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds following tests:

2. AF_XDP DRV/Native mode
   Works on any netdevice with XDP_REDIRECT support, driver dependent.
   Processes packets before SKB allocation. Provides better performance
   than SKB. Driver hook available just after DMA of buffer descriptor.
   a. nopoll
   b. poll
   * Only copy mode is supported because veth does not currently support
     zero-copy mode

Signed-off-by: Weqaar Janjua <weqaar.a.janjua@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Yonghong Song <yhs@fb.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20201207215333.11586-4-weqaar.a.janjua@intel.com
---
 tools/testing/selftests/bpf/test_xsk.sh  | 24 ++++++++++++++++++++++++
 tools/testing/selftests/bpf/xdpxceiver.c | 22 +++++++++++++++++++---
 tools/testing/selftests/bpf/xdpxceiver.h |  3 ++-
 3 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 0b7bafb65f43..aad8065637fd 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -173,6 +173,30 @@ retval=$?
 test_status $retval "${TEST_NAME}"
 statusList+=($retval)
 
+### TEST 4
+TEST_NAME="DRV NOPOLL"
+
+vethXDPnative ${VETH0} ${VETH1} ${NS1}
+
+params=("-N")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 5
+TEST_NAME="DRV POLL"
+
+vethXDPnative ${VETH0} ${VETH1} ${NS1}
+
+params=("-N" "-p")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
 ## END TESTS
 
 cleanup_exit ${VETH0} ${VETH1} ${NS1}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 3f2a65b6a9f5..9fcd80a38b07 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -27,7 +27,16 @@
  *    a. nopoll - soft-irq processing
  *    b. poll - using poll() syscall
  *
- * Total tests: 2
+ * 2. AF_XDP DRV/Native mode
+ *    Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes
+ *    packets before SKB allocation. Provides better performance than SKB. Driver
+ *    hook available just after DMA of buffer descriptor.
+ *    a. nopoll
+ *    b. poll
+ *    - Only copy mode is supported because veth does not currently support
+ *      zero-copy mode
+ *
+ * Total tests: 4
  *
  * Flow:
  * -----
@@ -88,7 +97,7 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
 
 #define print_ksft_result(void)\
-	(ksft_test_result_pass("PASS: %s %s\n", uut ? "" : "SKB", opt_poll ? "POLL" : "NOPOLL"))
+	(ksft_test_result_pass("PASS: %s %s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" : "NOPOLL"))
 
 static void pthread_init_mutex(void)
 {
@@ -311,6 +320,7 @@ static struct option long_options[] = {
 	{"queue", optional_argument, 0, 'q'},
 	{"poll", no_argument, 0, 'p'},
 	{"xdp-skb", no_argument, 0, 'S'},
+	{"xdp-native", no_argument, 0, 'N'},
 	{"copy", no_argument, 0, 'c'},
 	{"debug", optional_argument, 0, 'D'},
 	{"tx-pkt-count", optional_argument, 0, 'C'},
@@ -326,6 +336,7 @@ static void usage(const char *prog)
 	    "  -q, --queue=n        Use queue n (default 0)\n"
 	    "  -p, --poll           Use poll syscall\n"
 	    "  -S, --xdp-skb=n      Use XDP SKB mode\n"
+	    "  -N, --xdp-native=n   Enforce XDP DRV (native) mode\n"
 	    "  -c, --copy           Force copy mode\n"
 	    "  -D, --debug          Debug mode - dump packets L2 - L5\n"
 	    "  -C, --tx-pkt-count=n Number of packets to send\n";
@@ -417,7 +428,7 @@ static void parse_command_line(int argc, char **argv)
 	opterr = 0;
 
 	for (;;) {
-		c = getopt_long(argc, argv, "i:q:pScDC:", long_options, &option_index);
+		c = getopt_long(argc, argv, "i:q:pSNcDC:", long_options, &option_index);
 
 		if (c == -1)
 			break;
@@ -448,6 +459,11 @@ static void parse_command_line(int argc, char **argv)
 			opt_xdp_bind_flags |= XDP_COPY;
 			uut = ORDER_CONTENT_VALIDATE_XDP_SKB;
 			break;
+		case 'N':
+			opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
+			opt_xdp_bind_flags |= XDP_COPY;
+			uut = ORDER_CONTENT_VALIDATE_XDP_DRV;
+			break;
 		case 'c':
 			opt_xdp_bind_flags |= XDP_COPY;
 			break;
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 5929f2fc1224..12070d66344b 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -38,7 +38,7 @@
 #define SOCK_RECONF_CTR 10
 #define BATCH_SIZE 64
 #define POLL_TMOUT 1000
-#define NEED_WAKEUP 1
+#define NEED_WAKEUP true
 
 typedef __u32 u32;
 typedef __u16 u16;
@@ -46,6 +46,7 @@ typedef __u8 u8;
 
 enum TESTS {
 	ORDER_CONTENT_VALIDATE_XDP_SKB = 0,
+	ORDER_CONTENT_VALIDATE_XDP_DRV = 1,
 };
 
 u8 uut;
-- 
cgit v1.2.3


From 6674bf66560a6c55aada1e3cd4fca7a3ed204075 Mon Sep 17 00:00:00 2001
From: Weqaar Janjua <weqaar.janjua@gmail.com>
Date: Mon, 7 Dec 2020 21:53:32 +0000
Subject: selftests/bpf: Xsk selftests - Socket Teardown - SKB, DRV
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds following tests:

1. AF_XDP SKB mode
   c. Socket Teardown
      Create a Tx and a Rx socket, Tx from one socket, Rx on another.
      Destroy both sockets, then repeat multiple times. Only nopoll mode
      is used

2. AF_XDP DRV/Native mode
   c. Socket Teardown
   * Only copy mode is supported because veth does not currently support
     zero-copy mode

Signed-off-by: Weqaar Janjua <weqaar.a.janjua@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Yonghong Song <yhs@fb.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20201207215333.11586-5-weqaar.a.janjua@intel.com
---
 tools/testing/selftests/bpf/test_xsk.sh  | 24 ++++++++++++++++++++++
 tools/testing/selftests/bpf/xdpxceiver.c | 35 ++++++++++++++++++++++++++++----
 tools/testing/selftests/bpf/xdpxceiver.h |  2 ++
 3 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index aad8065637fd..9be9dff25560 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -197,6 +197,30 @@ retval=$?
 test_status $retval "${TEST_NAME}"
 statusList+=($retval)
 
+### TEST 6
+TEST_NAME="SKB SOCKET TEARDOWN"
+
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+params=("-S" "-T")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 7
+TEST_NAME="DRV SOCKET TEARDOWN"
+
+vethXDPnative ${VETH0} ${VETH1} ${NS1}
+
+params=("-N" "-T")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
 ## END TESTS
 
 cleanup_exit ${VETH0} ${VETH1} ${NS1}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 9fcd80a38b07..e8907109782d 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -26,6 +26,9 @@
  *    generic XDP path. XDP hook from netif_receive_skb().
  *    a. nopoll - soft-irq processing
  *    b. poll - using poll() syscall
+ *    c. Socket Teardown
+ *       Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy
+ *       both sockets, then repeat multiple times. Only nopoll mode is used
  *
  * 2. AF_XDP DRV/Native mode
  *    Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes
@@ -33,10 +36,11 @@
  *    hook available just after DMA of buffer descriptor.
  *    a. nopoll
  *    b. poll
+ *    c. Socket Teardown
  *    - Only copy mode is supported because veth does not currently support
  *      zero-copy mode
  *
- * Total tests: 4
+ * Total tests: 6
  *
  * Flow:
  * -----
@@ -97,7 +101,8 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
 
 #define print_ksft_result(void)\
-	(ksft_test_result_pass("PASS: %s %s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" : "NOPOLL"))
+	(ksft_test_result_pass("PASS: %s %s %s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" :\
+			       "NOPOLL", opt_teardown ? "Socket Teardown" : ""))
 
 static void pthread_init_mutex(void)
 {
@@ -322,6 +327,7 @@ static struct option long_options[] = {
 	{"xdp-skb", no_argument, 0, 'S'},
 	{"xdp-native", no_argument, 0, 'N'},
 	{"copy", no_argument, 0, 'c'},
+	{"tear-down", no_argument, 0, 'T'},
 	{"debug", optional_argument, 0, 'D'},
 	{"tx-pkt-count", optional_argument, 0, 'C'},
 	{0, 0, 0, 0}
@@ -338,6 +344,7 @@ static void usage(const char *prog)
 	    "  -S, --xdp-skb=n      Use XDP SKB mode\n"
 	    "  -N, --xdp-native=n   Enforce XDP DRV (native) mode\n"
 	    "  -c, --copy           Force copy mode\n"
+	    "  -T, --tear-down      Tear down sockets by repeatedly recreating them\n"
 	    "  -D, --debug          Debug mode - dump packets L2 - L5\n"
 	    "  -C, --tx-pkt-count=n Number of packets to send\n";
 	ksft_print_msg(str, prog);
@@ -428,7 +435,7 @@ static void parse_command_line(int argc, char **argv)
 	opterr = 0;
 
 	for (;;) {
-		c = getopt_long(argc, argv, "i:q:pSNcDC:", long_options, &option_index);
+		c = getopt_long(argc, argv, "i:q:pSNcTDC:", long_options, &option_index);
 
 		if (c == -1)
 			break;
@@ -467,6 +474,9 @@ static void parse_command_line(int argc, char **argv)
 		case 'c':
 			opt_xdp_bind_flags |= XDP_COPY;
 			break;
+		case 'T':
+			opt_teardown = 1;
+			break;
 		case 'D':
 			debug_pkt_dump = 1;
 			break;
@@ -871,6 +881,9 @@ static void *worker_testapp_validate(void *arg)
 
 		ksft_print_msg("Received %d packets on interface %s\n",
 			       pkt_counter, ((struct ifobject *)arg)->ifname);
+
+		if (opt_teardown)
+			ksft_print_msg("Destroying socket\n");
 	}
 
 	xsk_socket__delete(((struct ifobject *)arg)->xsk->xsk);
@@ -916,6 +929,20 @@ static void testapp_validate(void)
 		free(pkt_buf);
 	}
 
+	if (!opt_teardown)
+		print_ksft_result();
+}
+
+static void testapp_sockets(void)
+{
+	for (int i = 0; i < MAX_TEARDOWN_ITER; i++) {
+		pkt_counter = 0;
+		prev_pkt = -1;
+		sigvar = 0;
+		ksft_print_msg("Creating socket\n");
+		testapp_validate();
+	}
+
 	print_ksft_result();
 }
 
@@ -982,7 +1009,7 @@ int main(int argc, char **argv)
 
 	ksft_set_plan(1);
 
-	testapp_validate();
+	opt_teardown ? testapp_sockets() : testapp_validate();
 
 	for (int i = 0; i < MAX_INTERFACES; i++)
 		free(ifdict[i]);
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 12070d66344b..58185b914f99 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -21,6 +21,7 @@
 #define MAX_INTERFACE_NAME_CHARS 7
 #define MAX_INTERFACES_NAMESPACE_CHARS 10
 #define MAX_SOCKS 1
+#define MAX_TEARDOWN_ITER 10
 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
 			sizeof(struct udphdr))
 #define MIN_PKT_SIZE 64
@@ -57,6 +58,7 @@ static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 static int opt_queue;
 static int opt_pkt_count;
 static int opt_poll;
+static int opt_teardown;
 static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
 static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
 static u32 pkt_counter;
-- 
cgit v1.2.3


From 7d20441eb05ec6d8dc7b16381c53b3c0b3ad6e8a Mon Sep 17 00:00:00 2001
From: Weqaar Janjua <weqaar.janjua@gmail.com>
Date: Mon, 7 Dec 2020 21:53:33 +0000
Subject: selftests/bpf: Xsk selftests - Bi-directional Sockets - SKB, DRV
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds following tests:

1. AF_XDP SKB mode
   d. Bi-directional Sockets
      Configure sockets as bi-directional tx/rx sockets, sets up fill
      and completion rings on each socket, tx/rx in both directions.
      Only nopoll mode is used

2. AF_XDP DRV/Native mode
   d. Bi-directional Sockets
   * Only copy mode is supported because veth does not currently support
     zero-copy mode

Signed-off-by: Weqaar Janjua <weqaar.a.janjua@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Yonghong Song <yhs@fb.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20201207215333.11586-6-weqaar.a.janjua@intel.com
---
 tools/testing/selftests/bpf/test_xsk.sh  |  24 ++++++++
 tools/testing/selftests/bpf/xdpxceiver.c | 100 +++++++++++++++++++++++--------
 tools/testing/selftests/bpf/xdpxceiver.h |   4 ++
 3 files changed, 104 insertions(+), 24 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 9be9dff25560..88a7483eaae4 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -221,6 +221,30 @@ retval=$?
 test_status $retval "${TEST_NAME}"
 statusList+=($retval)
 
+### TEST 8
+TEST_NAME="SKB BIDIRECTIONAL SOCKETS"
+
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+params=("-S" "-B")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 9
+TEST_NAME="DRV BIDIRECTIONAL SOCKETS"
+
+vethXDPnative ${VETH0} ${VETH1} ${NS1}
+
+params=("-N" "-B")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
 ## END TESTS
 
 cleanup_exit ${VETH0} ${VETH1} ${NS1}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index e8907109782d..014dedaa4dd2 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -29,6 +29,10 @@
  *    c. Socket Teardown
  *       Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy
  *       both sockets, then repeat multiple times. Only nopoll mode is used
+ *    d. Bi-directional sockets
+ *       Configure sockets as bi-directional tx/rx sockets, sets up fill and
+ *       completion rings on each socket, tx/rx in both directions. Only nopoll
+ *       mode is used
  *
  * 2. AF_XDP DRV/Native mode
  *    Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes
@@ -37,10 +41,11 @@
  *    a. nopoll
  *    b. poll
  *    c. Socket Teardown
+ *    d. Bi-directional sockets
  *    - Only copy mode is supported because veth does not currently support
  *      zero-copy mode
  *
- * Total tests: 6
+ * Total tests: 8
  *
  * Flow:
  * -----
@@ -101,8 +106,9 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
 
 #define print_ksft_result(void)\
-	(ksft_test_result_pass("PASS: %s %s %s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" :\
-			       "NOPOLL", opt_teardown ? "Socket Teardown" : ""))
+	(ksft_test_result_pass("PASS: %s %s %s%s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" :\
+			       "NOPOLL", opt_teardown ? "Socket Teardown" : "",\
+			       opt_bidi ? "Bi-directional Sockets" : ""))
 
 static void pthread_init_mutex(void)
 {
@@ -308,8 +314,13 @@ static int xsk_configure_socket(struct ifobject *ifobject)
 	cfg.xdp_flags = opt_xdp_flags;
 	cfg.bind_flags = opt_xdp_bind_flags;
 
-	rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL;
-	txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL;
+	if (!opt_bidi) {
+		rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL;
+		txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL;
+	} else {
+		rxr = &ifobject->xsk->rx;
+		txr = &ifobject->xsk->tx;
+	}
 
 	ret = xsk_socket__create(&ifobject->xsk->xsk, ifobject->ifname,
 				 opt_queue, ifobject->umem->umem, rxr, txr, &cfg);
@@ -328,6 +339,7 @@ static struct option long_options[] = {
 	{"xdp-native", no_argument, 0, 'N'},
 	{"copy", no_argument, 0, 'c'},
 	{"tear-down", no_argument, 0, 'T'},
+	{"bidi", optional_argument, 0, 'B'},
 	{"debug", optional_argument, 0, 'D'},
 	{"tx-pkt-count", optional_argument, 0, 'C'},
 	{0, 0, 0, 0}
@@ -345,6 +357,7 @@ static void usage(const char *prog)
 	    "  -N, --xdp-native=n   Enforce XDP DRV (native) mode\n"
 	    "  -c, --copy           Force copy mode\n"
 	    "  -T, --tear-down      Tear down sockets by repeatedly recreating them\n"
+	    "  -B, --bidi           Bi-directional sockets test\n"
 	    "  -D, --debug          Debug mode - dump packets L2 - L5\n"
 	    "  -C, --tx-pkt-count=n Number of packets to send\n";
 	ksft_print_msg(str, prog);
@@ -435,7 +448,7 @@ static void parse_command_line(int argc, char **argv)
 	opterr = 0;
 
 	for (;;) {
-		c = getopt_long(argc, argv, "i:q:pSNcTDC:", long_options, &option_index);
+		c = getopt_long(argc, argv, "i:q:pSNcTBDC:", long_options, &option_index);
 
 		if (c == -1)
 			break;
@@ -477,6 +490,9 @@ static void parse_command_line(int argc, char **argv)
 		case 'T':
 			opt_teardown = 1;
 			break;
+		case 'B':
+			opt_bidi = 1;
+			break;
 		case 'D':
 			debug_pkt_dump = 1;
 			break;
@@ -802,22 +818,25 @@ static void *worker_testapp_validate(void *arg)
 	struct generic_data *data = (struct generic_data *)malloc(sizeof(struct generic_data));
 	struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr));
 	struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
-	void *bufs;
+	void *bufs = NULL;
 
 	pthread_attr_setstacksize(&attr, THREAD_STACK);
 
-	bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE,
-		    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (bufs == MAP_FAILED)
-		exit_with_error(errno);
+	if (!bidi_pass) {
+		bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE,
+			    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+		if (bufs == MAP_FAILED)
+			exit_with_error(errno);
 
-	if (strcmp(((struct ifobject *)arg)->nsname, ""))
-		switch_namespace(((struct ifobject *)arg)->ifdict_index);
+		if (strcmp(((struct ifobject *)arg)->nsname, ""))
+			switch_namespace(((struct ifobject *)arg)->ifdict_index);
+	}
 
 	if (((struct ifobject *)arg)->fv.vector == tx) {
 		int spinningrxctr = 0;
 
-		thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_tx);
+		if (!bidi_pass)
+			thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_tx);
 
 		while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) {
 			spinningrxctr++;
@@ -847,7 +866,8 @@ static void *worker_testapp_validate(void *arg)
 		struct pollfd fds[MAX_SOCKS] = { };
 		int ret;
 
-		thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_rx);
+		if (!bidi_pass)
+			thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_rx);
 
 		ksft_print_msg("Interface [%s] vector [Rx]\n", ((struct ifobject *)arg)->ifname);
 		xsk_populate_fill_ring(((struct ifobject *)arg)->umem);
@@ -886,8 +906,10 @@ static void *worker_testapp_validate(void *arg)
 			ksft_print_msg("Destroying socket\n");
 	}
 
-	xsk_socket__delete(((struct ifobject *)arg)->xsk->xsk);
-	(void)xsk_umem__delete(((struct ifobject *)arg)->umem->umem);
+	if (!opt_bidi || (opt_bidi && bidi_pass)) {
+		xsk_socket__delete(((struct ifobject *)arg)->xsk->xsk);
+		(void)xsk_umem__delete(((struct ifobject *)arg)->umem->umem);
+	}
 	pthread_exit(NULL);
 }
 
@@ -896,11 +918,26 @@ static void testapp_validate(void)
 	pthread_attr_init(&attr);
 	pthread_attr_setstacksize(&attr, THREAD_STACK);
 
+	if (opt_bidi && bidi_pass) {
+		pthread_init_mutex();
+		if (!switching_notify) {
+			ksft_print_msg("Switching Tx/Rx vectors\n");
+			switching_notify++;
+		}
+	}
+
 	pthread_mutex_lock(&sync_mutex);
 
 	/*Spawn RX thread */
-	if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[1]))
-		exit_with_error(errno);
+	if (!opt_bidi || (opt_bidi && !bidi_pass)) {
+		if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[1]))
+			exit_with_error(errno);
+	} else if (opt_bidi && bidi_pass) {
+		/*switch Tx/Rx vectors */
+		ifdict[0]->fv.vector = rx;
+		if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[0]))
+			exit_with_error(errno);
+	}
 
 	struct timespec max_wait = { 0, 0 };
 
@@ -914,8 +951,15 @@ static void testapp_validate(void)
 	pthread_mutex_unlock(&sync_mutex);
 
 	/*Spawn TX thread */
-	if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[0]))
-		exit_with_error(errno);
+	if (!opt_bidi || (opt_bidi && !bidi_pass)) {
+		if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[0]))
+			exit_with_error(errno);
+	} else if (opt_bidi && bidi_pass) {
+		/*switch Tx/Rx vectors */
+		ifdict[1]->fv.vector = tx;
+		if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[1]))
+			exit_with_error(errno);
+	}
 
 	pthread_join(t1, NULL);
 	pthread_join(t0, NULL);
@@ -929,18 +973,19 @@ static void testapp_validate(void)
 		free(pkt_buf);
 	}
 
-	if (!opt_teardown)
+	if (!opt_teardown && !opt_bidi)
 		print_ksft_result();
 }
 
 static void testapp_sockets(void)
 {
-	for (int i = 0; i < MAX_TEARDOWN_ITER; i++) {
+	for (int i = 0; i < (opt_teardown ? MAX_TEARDOWN_ITER : MAX_BIDI_ITER); i++) {
 		pkt_counter = 0;
 		prev_pkt = -1;
 		sigvar = 0;
 		ksft_print_msg("Creating socket\n");
 		testapp_validate();
+		opt_bidi ? bidi_pass++ : bidi_pass;
 	}
 
 	print_ksft_result();
@@ -1009,7 +1054,14 @@ int main(int argc, char **argv)
 
 	ksft_set_plan(1);
 
-	opt_teardown ? testapp_sockets() : testapp_validate();
+	if (!opt_teardown && !opt_bidi) {
+		testapp_validate();
+	} else if (opt_teardown && opt_bidi) {
+		ksft_test_result_fail("ERROR: parameters -T and -B cannot be used together\n");
+		ksft_exit_xfail();
+	} else {
+		testapp_sockets();
+	}
 
 	for (int i = 0; i < MAX_INTERFACES; i++)
 		free(ifdict[i]);
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 58185b914f99..61f595b6f200 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -22,6 +22,7 @@
 #define MAX_INTERFACES_NAMESPACE_CHARS 10
 #define MAX_SOCKS 1
 #define MAX_TEARDOWN_ITER 10
+#define MAX_BIDI_ITER 2
 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
 			sizeof(struct udphdr))
 #define MIN_PKT_SIZE 64
@@ -53,12 +54,15 @@ enum TESTS {
 u8 uut;
 u8 debug_pkt_dump;
 u32 num_frames;
+u8 switching_notify;
+u8 bidi_pass;
 
 static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 static int opt_queue;
 static int opt_pkt_count;
 static int opt_poll;
 static int opt_teardown;
+static int opt_bidi;
 static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
 static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
 static u32 pkt_counter;
-- 
cgit v1.2.3


From a5b7b1194a57bc59f289f3e4433a1be81cc3e19d Mon Sep 17 00:00:00 2001
From: Veronika Kabatova <vkabatov@redhat.com>
Date: Thu, 10 Dec 2020 13:01:34 +0100
Subject: selftests/bpf: Drop tcp-{client,server}.py from Makefile

The files don't exist anymore so this breaks generic kselftest builds
when using "make install" or "make gen_tar".

Fixes: 247f0ec361b7 ("selftests/bpf: Drop python client/server in favor of threads")
Signed-off-by: Veronika Kabatova <vkabatov@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201210120134.2148482-1-vkabatov@redhat.com
---
 tools/testing/selftests/bpf/Makefile | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 944ae17a39ed..50b3495d7ddf 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -75,8 +75,6 @@ TEST_PROGS := test_kmod.sh \
 
 TEST_PROGS_EXTENDED := with_addr.sh \
 	with_tunnels.sh \
-	tcp_client.py \
-	tcp_server.py \
 	test_xdp_vlan.sh
 
 # Compile but not part of 'make run_tests'
-- 
cgit v1.2.3


From 7535a3526dfe78db02a08ca2fa6bf69f393105dd Mon Sep 17 00:00:00 2001
From: Weqaar Janjua <weqaar.janjua@gmail.com>
Date: Thu, 10 Dec 2020 11:54:35 +0000
Subject: selftests/bpf: Xsk selftests - adding xdpxceiver to .gitignore

This patch adds *xdpxceiver* to selftests/bpf/.gitignore

Reported-by: Yonghong Song <yhs@fb.com>
Suggested-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Weqaar Janjua <weqaar.a.janjua@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201210115435.3995-1-weqaar.a.janjua@intel.com
---
 tools/testing/selftests/bpf/.gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 752d8edddc66..f5b7ef93618c 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -36,3 +36,4 @@ test_cpp
 /runqslower
 /bench
 *.ko
+xdpxceiver
-- 
cgit v1.2.3


From 41003dd0241c2ceb2461a88a18ff461795f2af57 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 9 Dec 2020 15:29:12 +0100
Subject: selftests/bpf: Make selftest compilation work on clang 11

We can't compile test_core_reloc_module.c selftest with clang 11, compile
fails with:

  CLNG-LLC [test_maps] test_core_reloc_module.o
  progs/test_core_reloc_module.c:57:21: error: use of unknown builtin \
  '__builtin_preserve_type_info' [-Wimplicit-function-declaration]
   out->read_ctx_sz = bpf_core_type_size(struct bpf_testmod_test_read_ctx);

Skipping these tests if __builtin_preserve_type_info() is not supported
by compiler.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201209142912.99145-1-jolsa@kernel.org
---
 tools/testing/selftests/bpf/progs/test_core_reloc_module.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
index 56363959f7b0..f59f175c7baf 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
@@ -40,6 +40,7 @@ int BPF_PROG(test_core_module_probed,
 	     struct task_struct *task,
 	     struct bpf_testmod_test_read_ctx *read_ctx)
 {
+#if __has_builtin(__builtin_preserve_enum_value)
 	struct core_reloc_module_output *out = (void *)&data.out;
 	__u64 pid_tgid = bpf_get_current_pid_tgid();
 	__u32 real_tgid = (__u32)(pid_tgid >> 32);
@@ -61,6 +62,9 @@ int BPF_PROG(test_core_module_probed,
 	out->len_exists = bpf_core_field_exists(read_ctx->len);
 
 	out->comm_len = BPF_CORE_READ_STR_INTO(&out->comm, task, comm);
+#else
+	data.skip = true;
+#endif
 
 	return 0;
 }
@@ -70,6 +74,7 @@ int BPF_PROG(test_core_module_direct,
 	     struct task_struct *task,
 	     struct bpf_testmod_test_read_ctx *read_ctx)
 {
+#if __has_builtin(__builtin_preserve_enum_value)
 	struct core_reloc_module_output *out = (void *)&data.out;
 	__u64 pid_tgid = bpf_get_current_pid_tgid();
 	__u32 real_tgid = (__u32)(pid_tgid >> 32);
@@ -91,6 +96,9 @@ int BPF_PROG(test_core_module_direct,
 	out->len_exists = bpf_core_field_exists(read_ctx->len);
 
 	out->comm_len = BPF_CORE_READ_STR_INTO(&out->comm, task, comm);
+#else
+	data.skip = true;
+#endif
 
 	return 0;
 }
-- 
cgit v1.2.3


From 092fde0f863b72b67c4d6dc03844f5658fc00a35 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Thu, 10 Dec 2020 17:34:07 +0100
Subject: samples/bpf: Fix possible hang in xdpsock with multiple threads

Fix a possible hang in xdpsock that can occur when using multiple
threads. In this case, one or more of the threads might get stuck in
the while-loop in tx_only after the user has signaled the main thread
to stop execution. In this case, no more Tx packets will be sent, so a
thread might get stuck in the aforementioned while-loop. Fix this by
introducing a test inside the while-loop to check if the benchmark has
been terminated. If so, return from the function.

Fixes: cd9e72b6f210 ("samples/bpf: xdpsock: Add option to specify batch size")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201210163407.22066-1-magnus.karlsson@gmail.com
---
 samples/bpf/xdpsock_user.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 568f9815bb1b..db0cb73513a5 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -1275,6 +1275,8 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
 	while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) <
 				      batch_size) {
 		complete_tx_only(xsk, batch_size);
+		if (benchmark_done)
+			return;
 	}
 
 	for (i = 0; i < batch_size; i++) {
-- 
cgit v1.2.3


From a67079b03165a17f9aceab3dd26b1638af68e0fc Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 10 Dec 2020 17:59:46 -0800
Subject: selftests/bpf: fix bpf_testmod.ko recompilation logic

bpf_testmod.ko build rule declared dependency on VMLINUX_BTF, but the variable
itself was initialized after the rule was declared, which often caused
bpf_testmod.ko to not be re-compiled. Fix by moving VMLINUX_BTF determination
sooner.

Also enforce bpf_testmod.ko recompilation when we detect that vmlinux image
changed by removing bpf_testmod/bpf_testmod.ko. This is necessary to generate
correct module's split BTF. Without it, Kbuild's module build logic might
determine that nothing changed on the kernel side and thus bpf_testmod.ko
shouldn't be rebuilt, so won't re-generate module BTF, which often leads to
module's BTF with wrong string offsets against vmlinux BTF. Removing .ko file
forces Kbuild to re-build the module.

Reported-by: Alexei Starovoitov <ast@kernel.org>
Fixes: 9f7fa225894c ("selftests/bpf: Add bpf_testmod kernel module for testing")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20201211015946.4062098-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 50b3495d7ddf..8b515a17f44b 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -116,6 +116,13 @@ INCLUDE_DIR := $(SCRATCH_DIR)/include
 BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
 RESOLVE_BTFIDS := $(BUILD_DIR)/resolve_btfids/resolve_btfids
 
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
+		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)	\
+		     ../../../../vmlinux				\
+		     /sys/kernel/btf/vmlinux				\
+		     /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+
 # Define simple and short `make test_progs`, `make test_sysctl`, etc targets
 # to build individual tests.
 # NOTE: Semicolon at the end is critical to override lib.mk's default static
@@ -140,6 +147,7 @@ $(OUTPUT)/urandom_read: urandom_read.c
 
 $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
 	$(call msg,MOD,,$@)
+	$(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation
 	$(Q)$(MAKE) $(submake_extras) -C bpf_testmod
 	$(Q)cp bpf_testmod/bpf_testmod.ko $@
 
@@ -147,13 +155,6 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
 	$(call msg,CC,,$@)
 	$(Q)$(CC) -c $(CFLAGS) -o $@ $<
 
-VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
-		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)	\
-		     ../../../../vmlinux				\
-		     /sys/kernel/btf/vmlinux				\
-		     /boot/vmlinux-$(shell uname -r)
-VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
-
 DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
 
 $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
-- 
cgit v1.2.3


From 89ad7420b25c2b40a4d916f4fd43b9ccacd50500 Mon Sep 17 00:00:00 2001
From: Andrew Delgadillo <adelg@google.com>
Date: Fri, 11 Dec 2020 00:43:44 +0000
Subject: selftests/bpf: Drop the need for LLVM's llc

LLC is meant for compiler development and debugging. Consequently, it
exposes many low level options about its backend. To avoid future bugs
introduced by using the raw LLC tool, use clang directly so that all
appropriate options are passed to the back end.

Additionally, simplify the Makefile by removing the
CLANG_NATIVE_BPF_BUILD_RULE as it is not being use, stop passing
dwarfris attr since elfutils/libdw now supports the bpf backend (which
should work with any recent pahole), and stop passing alu32 since
-mcpu=v3 implies alu32.

Signed-off-by: Andrew Delgadillo <adelg@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201211004344.3355074-1-adelg@google.com
---
 tools/testing/selftests/bpf/Makefile | 28 +++++-----------------------
 1 file changed, 5 insertions(+), 23 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 8b515a17f44b..8c33e999319a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -19,7 +19,6 @@ ifneq ($(wildcard $(GENHDR)),)
 endif
 
 CLANG		?= clang
-LLC		?= llc
 LLVM_OBJCOPY	?= llvm-objcopy
 BPF_GCC		?= $(shell command -v bpf-gcc;)
 SAN_CFLAGS	?=
@@ -253,31 +252,19 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
 # $1 - input .c file
 # $2 - output .o file
 # $3 - CFLAGS
-# $4 - LDFLAGS
 define CLANG_BPF_BUILD_RULE
-	$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
-	$(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm			\
-		-c $1 -o - || echo "BPF obj compilation failed") | 	\
-	$(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
+	$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
+	$(Q)$(CLANG) $3 -O2 -target bpf -c $1 -o $2 -mcpu=v3
 endef
 # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
 define CLANG_NOALU32_BPF_BUILD_RULE
-	$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
-	$(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm			\
-		-c $1 -o - || echo "BPF obj compilation failed") | 	\
-	$(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2
-endef
-# Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC
-define CLANG_NATIVE_BPF_BUILD_RULE
 	$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
-	$(Q)($(CLANG) $3 -O2 -emit-llvm					\
-		-c $1 -o - || echo "BPF obj compilation failed") | 	\
-	$(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
+	$(Q)$(CLANG) $3 -O2 -target bpf -c $1 -o $2 -mcpu=v2
 endef
 # Build BPF object using GCC
 define GCC_BPF_BUILD_RULE
 	$(call msg,GCC-BPF,$(TRUNNER_BINARY),$2)
-	$(Q)$(BPF_GCC) $3 $4 -O2 -c $1 -o $2
+	$(Q)$(BPF_GCC) $3 -O2 -c $1 -o $2
 endef
 
 SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
@@ -332,8 +319,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 		     $$(INCLUDE_DIR)/vmlinux.h				\
 		     $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT)
 	$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@,			\
-					  $(TRUNNER_BPF_CFLAGS),	\
-					  $(TRUNNER_BPF_LDFLAGS))
+					  $(TRUNNER_BPF_CFLAGS))
 
 $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h:			\
 		      $(TRUNNER_OUTPUT)/%.o				\
@@ -401,19 +387,16 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko	\
 		       $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
 TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
-TRUNNER_BPF_LDFLAGS := -mattr=+alu32
 $(eval $(call DEFINE_TEST_RUNNER,test_progs))
 
 # Define test_progs-no_alu32 test runner.
 TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE
-TRUNNER_BPF_LDFLAGS :=
 $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
 
 # Define test_progs BPF-GCC-flavored test runner.
 ifneq ($(BPF_GCC),)
 TRUNNER_BPF_BUILD_RULE := GCC_BPF_BUILD_RULE
 TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc)
-TRUNNER_BPF_LDFLAGS :=
 $(eval $(call DEFINE_TEST_RUNNER,test_progs,bpf_gcc))
 endif
 
@@ -424,7 +407,6 @@ TRUNNER_EXTRA_SOURCES := test_maps.c
 TRUNNER_EXTRA_FILES :=
 TRUNNER_BPF_BUILD_RULE := $$(error no BPF objects should be built)
 TRUNNER_BPF_CFLAGS :=
-TRUNNER_BPF_LDFLAGS :=
 $(eval $(call DEFINE_TEST_RUNNER,test_maps))
 
 # Define test_verifier test runner.
-- 
cgit v1.2.3


From b4fe9fec51ef48011f11c2da4099f0b530449c92 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@kernel.org>
Date: Fri, 11 Dec 2020 01:07:11 +0000
Subject: selftests/bpf: Silence ima_setup.sh when not running in verbose mode.

Currently, ima_setup.sh spews outputs from commands like mkfs and dd
on the terminal without taking into account the verbosity level of
the test framework. Update test_progs to set the environment variable
SELFTESTS_VERBOSE=1 when a verbose output is requested. This
environment variable is then used by ima_setup.sh (and can be used by
other similar scripts) to obey the verbosity level of the test harness
without needing to re-implement command line options for verbosity.

In "silent" mode, the script saves the output to a temporary file, the
contents of which are echoed back to stderr when the script encounters
an error.

Fixes: 34b82d3ac105 ("bpf: Add a selftest for bpf_ima_inode_hash")
Reported-by: Andrii Nakryiko <andrii@kernel.org>
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: KP Singh <kpsingh@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201211010711.3716917-1-kpsingh@kernel.org
---
 tools/testing/selftests/bpf/ima_setup.sh | 24 ++++++++++++++++++++++++
 tools/testing/selftests/bpf/test_progs.c | 10 ++++++++++
 2 files changed, 34 insertions(+)

diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh
index 2bfc646bc230..8e62581113a3 100755
--- a/tools/testing/selftests/bpf/ima_setup.sh
+++ b/tools/testing/selftests/bpf/ima_setup.sh
@@ -7,6 +7,8 @@ set -o pipefail
 
 IMA_POLICY_FILE="/sys/kernel/security/ima/policy"
 TEST_BINARY="/bin/true"
+VERBOSE="${SELFTESTS_VERBOSE:=0}"
+LOG_FILE="$(mktemp /tmp/ima_setup.XXXX.log)"
 
 usage()
 {
@@ -75,6 +77,19 @@ run()
 	exec "${copied_bin_path}"
 }
 
+catch()
+{
+	local exit_code="$1"
+	local log_file="$2"
+
+	if [[ "${exit_code}" -ne 0 ]]; then
+		cat "${log_file}" >&3
+	fi
+
+	rm -f "${log_file}"
+	exit ${exit_code}
+}
+
 main()
 {
 	[[ $# -ne 2 ]] && usage
@@ -96,4 +111,13 @@ main()
 	fi
 }
 
+trap 'catch "$?" "${LOG_FILE}"' EXIT
+
+if [[ "${VERBOSE}" -eq 0 ]]; then
+	# Save the stderr to 3 so that we can output back to
+	# it incase of an error.
+	exec 3>&2 1>"${LOG_FILE}" 2>&1
+fi
+
 main "$@"
+rm -f "${LOG_FILE}"
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 5ef081bdae4e..7d077d48cadd 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -587,6 +587,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 				return -EINVAL;
 			}
 		}
+
+		if (env->verbosity > VERBOSE_NONE) {
+			if (setenv("SELFTESTS_VERBOSE", "1", 1) == -1) {
+				fprintf(stderr,
+					"Unable to setenv SELFTESTS_VERBOSE=1 (errno=%d)",
+					errno);
+				return -1;
+			}
+		}
+
 		break;
 	case ARG_GET_TEST_CNT:
 		env->get_test_cnt = true;
-- 
cgit v1.2.3


From fe62de310e2b563c0d303a09d06b020077fe86b4 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 11 Dec 2020 13:58:24 -0800
Subject: libbpf: Support modules in bpf_program__set_attach_target() API

Support finding kernel targets in kernel modules when using
bpf_program__set_attach_target() API. This brings it up to par with what
libbpf supports when doing declarative SEC()-based target determination.

Some minor internal refactoring was needed to make sure vmlinux BTF can be
loaded before bpf_object's load phase.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201211215825.3646154-2-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 64 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 40 insertions(+), 24 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 9be88a90a4aa..6ae748f6ea11 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2518,7 +2518,7 @@ static int bpf_object__finalize_btf(struct bpf_object *obj)
 	return 0;
 }
 
-static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
+static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
 {
 	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
 	    prog->type == BPF_PROG_TYPE_LSM)
@@ -2533,37 +2533,43 @@ static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
 	return false;
 }
 
-static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
+static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
 {
-	bool need_vmlinux_btf = false;
 	struct bpf_program *prog;
-	int i, err;
+	int i;
 
 	/* CO-RE relocations need kernel BTF */
 	if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
-		need_vmlinux_btf = true;
+		return true;
 
 	/* Support for typed ksyms needs kernel BTF */
 	for (i = 0; i < obj->nr_extern; i++) {
 		const struct extern_desc *ext;
 
 		ext = &obj->externs[i];
-		if (ext->type == EXT_KSYM && ext->ksym.type_id) {
-			need_vmlinux_btf = true;
-			break;
-		}
+		if (ext->type == EXT_KSYM && ext->ksym.type_id)
+			return true;
 	}
 
 	bpf_object__for_each_program(prog, obj) {
 		if (!prog->load)
 			continue;
-		if (libbpf_prog_needs_vmlinux_btf(prog)) {
-			need_vmlinux_btf = true;
-			break;
-		}
+		if (prog_needs_vmlinux_btf(prog))
+			return true;
 	}
 
-	if (!need_vmlinux_btf)
+	return false;
+}
+
+static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
+{
+	int err;
+
+	/* btf_vmlinux could be loaded earlier */
+	if (obj->btf_vmlinux)
+		return 0;
+
+	if (!force && !obj_needs_vmlinux_btf(obj))
 		return 0;
 
 	obj->btf_vmlinux = libbpf_find_kernel_btf();
@@ -7475,7 +7481,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 	}
 
 	err = bpf_object__probe_loading(obj);
-	err = err ? : bpf_object__load_vmlinux_btf(obj);
+	err = err ? : bpf_object__load_vmlinux_btf(obj, false);
 	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
 	err = err ? : bpf_object__sanitize_and_load_btf(obj);
 	err = err ? : bpf_object__sanitize_maps(obj);
@@ -10870,23 +10876,33 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
 				   int attach_prog_fd,
 				   const char *attach_func_name)
 {
-	int btf_id;
+	int btf_obj_fd = 0, btf_id = 0, err;
 
 	if (!prog || attach_prog_fd < 0 || !attach_func_name)
 		return -EINVAL;
 
-	if (attach_prog_fd)
+	if (prog->obj->loaded)
+		return -EINVAL;
+
+	if (attach_prog_fd) {
 		btf_id = libbpf_find_prog_btf_id(attach_func_name,
 						 attach_prog_fd);
-	else
-		btf_id = libbpf_find_vmlinux_btf_id(attach_func_name,
-						    prog->expected_attach_type);
-
-	if (btf_id < 0)
-		return btf_id;
+		if (btf_id < 0)
+			return btf_id;
+	} else {
+		/* load btf_vmlinux, if not yet */
+		err = bpf_object__load_vmlinux_btf(prog->obj, true);
+		if (err)
+			return err;
+		err = find_kernel_btf_id(prog->obj, attach_func_name,
+					 prog->expected_attach_type,
+					 &btf_obj_fd, &btf_id);
+		if (err)
+			return err;
+	}
 
 	prog->attach_btf_id = btf_id;
-	prog->attach_btf_obj_fd = 0;
+	prog->attach_btf_obj_fd = btf_obj_fd;
 	prog->attach_prog_fd = attach_prog_fd;
 	return 0;
 }
-- 
cgit v1.2.3


From 2e33f831fccd2df83836a8e255755f85d364aaeb Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 11 Dec 2020 13:58:25 -0800
Subject: selftests/bpf: Add set_attach_target() API selftest for module target

Add test for bpf_program__set_attach_target() API, validating it can find
kernel module fentry target.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201211215825.3646154-3-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/module_attach.c | 11 ++++++++++-
 tools/testing/selftests/bpf/progs/test_module_attach.c | 11 +++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index 4b65e9918764..50796b651f72 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -28,10 +28,18 @@ void test_module_attach(void)
 	struct test_module_attach__bss *bss;
 	int err;
 
-	skel = test_module_attach__open_and_load();
+	skel = test_module_attach__open();
 	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
 		return;
 
+	err = bpf_program__set_attach_target(skel->progs.handle_fentry_manual,
+					     0, "bpf_testmod_test_read");
+	ASSERT_OK(err, "set_attach_target");
+
+	err = test_module_attach__load(skel);
+	if (CHECK(err, "skel_load", "failed to load skeleton\n"))
+		return;
+
 	bss = skel->bss;
 
 	err = test_module_attach__attach(skel);
@@ -44,6 +52,7 @@ void test_module_attach(void)
 	ASSERT_EQ(bss->raw_tp_read_sz, READ_SZ, "raw_tp");
 	ASSERT_EQ(bss->tp_btf_read_sz, READ_SZ, "tp_btf");
 	ASSERT_EQ(bss->fentry_read_sz, READ_SZ, "fentry");
+	ASSERT_EQ(bss->fentry_manual_read_sz, READ_SZ, "fentry_manual");
 	ASSERT_EQ(bss->fexit_read_sz, READ_SZ, "fexit");
 	ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet");
 	ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret");
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
index b563563df172..efd1e287ac17 100644
--- a/tools/testing/selftests/bpf/progs/test_module_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -38,6 +38,17 @@ int BPF_PROG(handle_fentry,
 	return 0;
 }
 
+__u32 fentry_manual_read_sz = 0;
+
+SEC("fentry/placeholder")
+int BPF_PROG(handle_fentry_manual,
+	     struct file *file, struct kobject *kobj,
+	     struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+	fentry_manual_read_sz = len;
+	return 0;
+}
+
 __u32 fexit_read_sz = 0;
 int fexit_ret = 0;
 
-- 
cgit v1.2.3


From a4d2a7ad86834092b327082004ead755d2603376 Mon Sep 17 00:00:00 2001
From: Brendan Jackman <jackmanb@google.com>
Date: Mon, 14 Dec 2020 11:38:12 +0000
Subject: libbpf: Expose libbpf ring_buffer epoll_fd

This provides a convenient perf ringbuf -> libbpf ringbuf migration
path for users of external polling systems. It is analogous to
perf_buffer__epoll_fd.

Signed-off-by: Brendan Jackman <jackmanb@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201214113812.305274-1-jackmanb@google.com
---
 tools/lib/bpf/libbpf.h   | 1 +
 tools/lib/bpf/libbpf.map | 1 +
 tools/lib/bpf/ringbuf.c  | 6 ++++++
 3 files changed, 8 insertions(+)

diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 6909ee81113a..3c35eb401931 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -536,6 +536,7 @@ LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd,
 				ring_buffer_sample_fn sample_cb, void *ctx);
 LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
 LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
+LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb);
 
 /* Perf buffer APIs */
 struct perf_buffer;
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 7c4126542e2b..1c0fd2dd233a 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -346,6 +346,7 @@ LIBBPF_0.3.0 {
 		btf__parse_split;
 		btf__new_empty_split;
 		btf__new_split;
+		ring_buffer__epoll_fd;
 		xsk_setup_xdp_prog;
 		xsk_socket__update_xskmap;
 } LIBBPF_0.2.0;
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 5c6522c89af1..a8f997d47adf 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -282,3 +282,9 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
 	}
 	return cnt < 0 ? -errno : res;
 }
+
+/* Get an fd that can be used to sleep until data is available in the ring(s) */
+int ring_buffer__epoll_fd(const struct ring_buffer *rb)
+{
+	return rb->epoll_fd;
+}
-- 
cgit v1.2.3


From cd17d38f8b28f808c368121041c0a4fa91757e0d Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 9 Dec 2020 17:33:49 -0800
Subject: bpf: Permits pointers on stack for helper calls

Currently, when checking stack memory accessed by helper calls,
for spills, only PTR_TO_BTF_ID and SCALAR_VALUE are
allowed.

Song discovered an issue where the below bpf program
  int dump_task(struct bpf_iter__task *ctx)
  {
    struct seq_file *seq = ctx->meta->seq;
    static char[] info = "abc";
    BPF_SEQ_PRINTF(seq, "%s\n", info);
    return 0;
  }
may cause a verifier failure.

The verifier output looks like:
  ; struct seq_file *seq = ctx->meta->seq;
  1: (79) r1 = *(u64 *)(r1 +0)
  ; BPF_SEQ_PRINTF(seq, "%s\n", info);
  2: (18) r2 = 0xffff9054400f6000
  4: (7b) *(u64 *)(r10 -8) = r2
  5: (bf) r4 = r10
  ;
  6: (07) r4 += -8
  ; BPF_SEQ_PRINTF(seq, "%s\n", info);
  7: (18) r2 = 0xffff9054400fe000
  9: (b4) w3 = 4
  10: (b4) w5 = 8
  11: (85) call bpf_seq_printf#126
   R1_w=ptr_seq_file(id=0,off=0,imm=0) R2_w=map_value(id=0,off=0,ks=4,vs=4,imm=0)
  R3_w=inv4 R4_w=fp-8 R5_w=inv8 R10=fp0 fp-8_w=map_value
  last_idx 11 first_idx 0
  regs=8 stack=0 before 10: (b4) w5 = 8
  regs=8 stack=0 before 9: (b4) w3 = 4
  invalid indirect read from stack off -8+0 size 8

Basically, the verifier complains the map_value pointer at "fp-8" location.
To fix the issue, if env->allow_ptr_leaks is true, let us also permit
pointers on the stack to be accessible by the helper.

Reported-by: Song Liu <songliubraving@fb.com>
Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201210013349.943719-1-yhs@fb.com
---
 kernel/bpf/verifier.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 93def76cf32b..9159c9822ede 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3769,7 +3769,8 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 			goto mark;
 
 		if (state->stack[spi].slot_type[0] == STACK_SPILL &&
-		    state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
+		    (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
+		     env->allow_ptr_leaks)) {
 			__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
 			for (j = 0; j < BPF_REG_SIZE; j++)
 				state->stack[spi].slot_type[j] = STACK_MISC;
-- 
cgit v1.2.3


From b4b638c36b7e7acd847b9c4b9c80f268e45ea30c Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 9 Dec 2020 17:33:50 -0800
Subject: selftests/bpf: Add a test for ptr_to_map_value on stack for helper
 access

Change bpf_iter_task.c such that pointer to map_value may appear
on the stack for bpf_seq_printf() to access. Without previous
verifier patch, the bpf_iter test will fail.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201210013350.943985-1-yhs@fb.com
---
 tools/testing/selftests/bpf/progs/bpf_iter_task.c | 3 ++-
 tools/testing/selftests/bpf/verifier/unpriv.c     | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index 4983087852a0..b7f32c160f4e 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -11,9 +11,10 @@ int dump_task(struct bpf_iter__task *ctx)
 {
 	struct seq_file *seq = ctx->meta->seq;
 	struct task_struct *task = ctx->task;
+	static char info[] = "    === END ===";
 
 	if (task == (void *)0) {
-		BPF_SEQ_PRINTF(seq, "    === END ===\n");
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
 		return 0;
 	}
 
diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c
index 91bb77c24a2e..a3fe0fbaed41 100644
--- a/tools/testing/selftests/bpf/verifier/unpriv.c
+++ b/tools/testing/selftests/bpf/verifier/unpriv.c
@@ -108,8 +108,9 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
-	.errstr = "invalid indirect read from stack off -8+0 size 8",
-	.result = REJECT,
+	.errstr_unpriv = "invalid indirect read from stack off -8+0 size 8",
+	.result_unpriv = REJECT,
+	.result = ACCEPT,
 },
 {
 	"unpriv: mangle pointer on stack 1",
-- 
cgit v1.2.3