From 6648e613226e18897231ab5e42ffc29e63fa3365 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 4 Apr 2024 10:10:01 +0800 Subject: bpf, skmsg: Fix NULL pointer dereference in sk_psock_skb_ingress_enqueue Fix NULL pointer data-races in sk_psock_skb_ingress_enqueue() which syzbot reported [1]. [1] BUG: KCSAN: data-race in sk_psock_drop / sk_psock_skb_ingress_enqueue write to 0xffff88814b3278b8 of 8 bytes by task 10724 on cpu 1: sk_psock_stop_verdict net/core/skmsg.c:1257 [inline] sk_psock_drop+0x13e/0x1f0 net/core/skmsg.c:843 sk_psock_put include/linux/skmsg.h:459 [inline] sock_map_close+0x1a7/0x260 net/core/sock_map.c:1648 unix_release+0x4b/0x80 net/unix/af_unix.c:1048 __sock_release net/socket.c:659 [inline] sock_close+0x68/0x150 net/socket.c:1421 __fput+0x2c1/0x660 fs/file_table.c:422 __fput_sync+0x44/0x60 fs/file_table.c:507 __do_sys_close fs/open.c:1556 [inline] __se_sys_close+0x101/0x1b0 fs/open.c:1541 __x64_sys_close+0x1f/0x30 fs/open.c:1541 do_syscall_64+0xd3/0x1d0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 read to 0xffff88814b3278b8 of 8 bytes by task 10713 on cpu 0: sk_psock_data_ready include/linux/skmsg.h:464 [inline] sk_psock_skb_ingress_enqueue+0x32d/0x390 net/core/skmsg.c:555 sk_psock_skb_ingress_self+0x185/0x1e0 net/core/skmsg.c:606 sk_psock_verdict_apply net/core/skmsg.c:1008 [inline] sk_psock_verdict_recv+0x3e4/0x4a0 net/core/skmsg.c:1202 unix_read_skb net/unix/af_unix.c:2546 [inline] unix_stream_read_skb+0x9e/0xf0 net/unix/af_unix.c:2682 sk_psock_verdict_data_ready+0x77/0x220 net/core/skmsg.c:1223 unix_stream_sendmsg+0x527/0x860 net/unix/af_unix.c:2339 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x140/0x180 net/socket.c:745 ____sys_sendmsg+0x312/0x410 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x1e9/0x280 net/socket.c:2667 __do_sys_sendmsg net/socket.c:2676 [inline] __se_sys_sendmsg net/socket.c:2674 [inline] __x64_sys_sendmsg+0x46/0x50 net/socket.c:2674 do_syscall_64+0xd3/0x1d0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 value changed: 0xffffffff83d7feb0 -> 0x0000000000000000 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 10713 Comm: syz-executor.4 Tainted: G W 6.8.0-syzkaller-08951-gfe46a7dd189e #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/29/2024 Prior to this, commit 4cd12c6065df ("bpf, sockmap: Fix NULL pointer dereference in sk_psock_verdict_data_ready()") fixed one NULL pointer similarly due to no protection of saved_data_ready. Here is another different caller causing the same issue because of the same reason. So we should protect it with sk_callback_lock read lock because the writer side in the sk_psock_drop() uses "write_lock_bh(&sk->sk_callback_lock);". To avoid errors that could happen in future, I move those two pairs of lock into the sk_psock_data_ready(), which is suggested by John Fastabend. Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Reported-by: syzbot+aa8c8ec2538929f18f2d@syzkaller.appspotmail.com Signed-off-by: Jason Xing Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Closes: https://syzkaller.appspot.com/bug?extid=aa8c8ec2538929f18f2d Link: https://lore.kernel.org/all/20240329134037.92124-1-kerneljasonxing@gmail.com Link: https://lore.kernel.org/bpf/20240404021001.94815-1-kerneljasonxing@gmail.com --- net/core/skmsg.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 4d75ef9d24bf..fd20aae30be2 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1226,11 +1226,8 @@ static void sk_psock_verdict_data_ready(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); - if (psock) { - read_lock_bh(&sk->sk_callback_lock); + if (psock) sk_psock_data_ready(sk, psock); - read_unlock_bh(&sk->sk_callback_lock); - } rcu_read_unlock(); } } -- cgit v1.2.3 From 5bcf0dcbf9066348058b88a510c57f70f384c92c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 18 Apr 2024 09:18:39 +0200 Subject: xdp: use flags field to disambiguate broadcast redirect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When redirecting a packet using XDP, the bpf_redirect_map() helper will set up the redirect destination information in struct bpf_redirect_info (using the __bpf_xdp_redirect_map() helper function), and the xdp_do_redirect() function will read this information after the XDP program returns and pass the frame on to the right redirect destination. When using the BPF_F_BROADCAST flag to do multicast redirect to a whole map, __bpf_xdp_redirect_map() sets the 'map' pointer in struct bpf_redirect_info to point to the destination map to be broadcast. And xdp_do_redirect() reacts to the value of this map pointer to decide whether it's dealing with a broadcast or a single-value redirect. However, if the destination map is being destroyed before xdp_do_redirect() is called, the map pointer will be cleared out (by bpf_clear_redirect_map()) without waiting for any XDP programs to stop running. This causes xdp_do_redirect() to think that the redirect was to a single target, but the target pointer is also NULL (since broadcast redirects don't have a single target), so this causes a crash when a NULL pointer is passed to dev_map_enqueue(). To fix this, change xdp_do_redirect() to react directly to the presence of the BPF_F_BROADCAST flag in the 'flags' value in struct bpf_redirect_info to disambiguate between a single-target and a broadcast redirect. And only read the 'map' pointer if the broadcast flag is set, aborting if that has been cleared out in the meantime. This prevents the crash, while keeping the atomic (cmpxchg-based) clearing of the map pointer itself, and without adding any more checks in the non-broadcast fast path. Fixes: e624d4ed4aa8 ("xdp: Extend xdp_redirect_map with broadcast support") Reported-and-tested-by: syzbot+af9492708df9797198d6@syzkaller.appspotmail.com Signed-off-by: Toke Høiland-Jørgensen Acked-by: Stanislav Fomichev Reviewed-by: Hangbin Liu Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/20240418071840.156411-1-toke@redhat.com Signed-off-by: Martin KaFai Lau --- net/core/filter.c | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 8adf95765cdd..ae5254f712c9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4360,10 +4360,12 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; u32 map_id = ri->map_id; + u32 flags = ri->flags; struct bpf_map *map; int err; ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->flags = 0; ri->map_type = BPF_MAP_TYPE_UNSPEC; if (unlikely(!xdpf)) { @@ -4375,11 +4377,20 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, case BPF_MAP_TYPE_DEVMAP: fallthrough; case BPF_MAP_TYPE_DEVMAP_HASH: - map = READ_ONCE(ri->map); - if (unlikely(map)) { + if (unlikely(flags & BPF_F_BROADCAST)) { + map = READ_ONCE(ri->map); + + /* The map pointer is cleared when the map is being torn + * down by bpf_clear_redirect_map() + */ + if (unlikely(!map)) { + err = -ENOENT; + break; + } + WRITE_ONCE(ri->map, NULL); err = dev_map_enqueue_multi(xdpf, dev, map, - ri->flags & BPF_F_EXCLUDE_INGRESS); + flags & BPF_F_EXCLUDE_INGRESS); } else { err = dev_map_enqueue(fwd, xdpf, dev); } @@ -4442,9 +4453,9 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect_frame); static int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog, - void *fwd, - enum bpf_map_type map_type, u32 map_id) + struct bpf_prog *xdp_prog, void *fwd, + enum bpf_map_type map_type, u32 map_id, + u32 flags) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); struct bpf_map *map; @@ -4454,11 +4465,20 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, case BPF_MAP_TYPE_DEVMAP: fallthrough; case BPF_MAP_TYPE_DEVMAP_HASH: - map = READ_ONCE(ri->map); - if (unlikely(map)) { + if (unlikely(flags & BPF_F_BROADCAST)) { + map = READ_ONCE(ri->map); + + /* The map pointer is cleared when the map is being torn + * down by bpf_clear_redirect_map() + */ + if (unlikely(!map)) { + err = -ENOENT; + break; + } + WRITE_ONCE(ri->map, NULL); err = dev_map_redirect_multi(dev, skb, xdp_prog, map, - ri->flags & BPF_F_EXCLUDE_INGRESS); + flags & BPF_F_EXCLUDE_INGRESS); } else { err = dev_map_generic_redirect(fwd, skb, xdp_prog); } @@ -4495,9 +4515,11 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; u32 map_id = ri->map_id; + u32 flags = ri->flags; int err; ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->flags = 0; ri->map_type = BPF_MAP_TYPE_UNSPEC; if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) { @@ -4517,7 +4539,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, return 0; } - return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id); + return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id, flags); err: _trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err); return err; -- cgit v1.2.3