From d3fd203f36d46aa29600a72d57a1b61af80e4a25 Mon Sep 17 00:00:00 2001 From: Baisong Zhong Date: Wed, 2 Nov 2022 16:16:20 +0800 Subject: bpf, test_run: Fix alignment problem in bpf_prog_test_run_skb() We got a syzkaller problem because of aarch64 alignment fault if KFENCE enabled. When the size from user bpf program is an odd number, like 399, 407, etc, it will cause the struct skb_shared_info's unaligned access. As seen below: BUG: KFENCE: use-after-free read in __skb_clone+0x23c/0x2a0 net/core/skbuff.c:1032 Use-after-free read at 0xffff6254fffac077 (in kfence-#213): __lse_atomic_add arch/arm64/include/asm/atomic_lse.h:26 [inline] arch_atomic_add arch/arm64/include/asm/atomic.h:28 [inline] arch_atomic_inc include/linux/atomic-arch-fallback.h:270 [inline] atomic_inc include/asm-generic/atomic-instrumented.h:241 [inline] __skb_clone+0x23c/0x2a0 net/core/skbuff.c:1032 skb_clone+0xf4/0x214 net/core/skbuff.c:1481 ____bpf_clone_redirect net/core/filter.c:2433 [inline] bpf_clone_redirect+0x78/0x1c0 net/core/filter.c:2420 bpf_prog_d3839dd9068ceb51+0x80/0x330 bpf_dispatcher_nop_func include/linux/bpf.h:728 [inline] bpf_test_run+0x3c0/0x6c0 net/bpf/test_run.c:53 bpf_prog_test_run_skb+0x638/0xa7c net/bpf/test_run.c:594 bpf_prog_test_run kernel/bpf/syscall.c:3148 [inline] __do_sys_bpf kernel/bpf/syscall.c:4441 [inline] __se_sys_bpf+0xad0/0x1634 kernel/bpf/syscall.c:4381 kfence-#213: 0xffff6254fffac000-0xffff6254fffac196, size=407, cache=kmalloc-512 allocated by task 15074 on cpu 0 at 1342.585390s: kmalloc include/linux/slab.h:568 [inline] kzalloc include/linux/slab.h:675 [inline] bpf_test_init.isra.0+0xac/0x290 net/bpf/test_run.c:191 bpf_prog_test_run_skb+0x11c/0xa7c net/bpf/test_run.c:512 bpf_prog_test_run kernel/bpf/syscall.c:3148 [inline] __do_sys_bpf kernel/bpf/syscall.c:4441 [inline] __se_sys_bpf+0xad0/0x1634 kernel/bpf/syscall.c:4381 __arm64_sys_bpf+0x50/0x60 kernel/bpf/syscall.c:4381 To fix the problem, we adjust @size so that (@size + @hearoom) is a multiple of SMP_CACHE_BYTES. So we make sure the struct skb_shared_info is aligned to a cache line. Fixes: 1cf1cae963c2 ("bpf: introduce BPF_PROG_TEST_RUN command") Signed-off-by: Baisong Zhong Signed-off-by: Daniel Borkmann Cc: Eric Dumazet Link: https://lore.kernel.org/bpf/20221102081620.1465154-1-zhongbaisong@huawei.com --- net/bpf/test_run.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 13d578ce2a09..fcb3e6c5e03c 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -774,6 +774,7 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, if (user_size > size) return ERR_PTR(-EMSGSIZE); + size = SKB_DATA_ALIGN(size); data = kzalloc(size + headroom + tailroom, GFP_USER); if (!data) return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From 18acb7fac22ff7b36c7ea5a76b12996e7b7dbaba Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Nov 2022 13:00:13 +0100 Subject: bpf: Revert ("Fix dispatcher patchable function entry to 5 bytes nop") MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because __attribute__((patchable_function_entry)) is only available since GCC-8 this solution fails to build on the minimum required GCC version. Undo these changes so we might try again -- without cluttering up the patches with too many changes. This is an almost complete revert of: dbe69b299884 ("bpf: Fix dispatcher patchable function entry to 5 bytes nop") ceea991a019c ("bpf: Move bpf_dispatcher function out of ftrace locations") (notably the arch/x86/Kconfig hunk is kept). Reported-by: David Laight Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Daniel Borkmann Tested-by: Björn Töpel Tested-by: Jiri Olsa Acked-by: Björn Töpel Acked-by: Jiri Olsa Link: https://lkml.kernel.org/r/439d8dc735bb4858875377df67f1b29a@AcuMS.aculab.com Link: https://lore.kernel.org/bpf/20221103120647.728830733@infradead.org --- arch/x86/net/bpf_jit_comp.c | 13 ------------- include/linux/bpf.h | 21 +-------------------- kernel/bpf/dispatcher.c | 6 ------ 3 files changed, 1 insertion(+), 39 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 00127abd89ee..99620428ad78 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -389,18 +388,6 @@ out: return ret; } -int __init bpf_arch_init_dispatcher_early(void *ip) -{ - const u8 *nop_insn = x86_nops[5]; - - if (is_endbr(*(u32 *)ip)) - ip += ENDBR_INSN_SIZE; - - if (memcmp(ip, nop_insn, X86_PATCH_SIZE)) - text_poke_early(ip, nop_insn, X86_PATCH_SIZE); - return 0; -} - int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *old_addr, void *new_addr) { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0566705c1d4e..5cd95716b441 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -27,7 +27,6 @@ #include #include #include -#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -971,8 +970,6 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs); -int __init bpf_arch_init_dispatcher_early(void *ip); - #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ @@ -986,21 +983,7 @@ int __init bpf_arch_init_dispatcher_early(void *ip); }, \ } -#define BPF_DISPATCHER_INIT_CALL(_name) \ - static int __init _name##_init(void) \ - { \ - return bpf_arch_init_dispatcher_early(_name##_func); \ - } \ - early_initcall(_name##_init) - -#ifdef CONFIG_X86_64 -#define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5))) -#else -#define BPF_DISPATCHER_ATTRIBUTES -#endif - #define DEFINE_BPF_DISPATCHER(name) \ - notrace BPF_DISPATCHER_ATTRIBUTES \ noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ @@ -1010,9 +993,7 @@ int __init bpf_arch_init_dispatcher_early(void *ip); } \ EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ struct bpf_dispatcher bpf_dispatcher_##name = \ - BPF_DISPATCHER_INIT(bpf_dispatcher_##name); \ - BPF_DISPATCHER_INIT_CALL(bpf_dispatcher_##name); - + BPF_DISPATCHER_INIT(bpf_dispatcher_##name); #define DECLARE_BPF_DISPATCHER(name) \ unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index 04f0a045dcaa..fa64b80b8bca 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -4,7 +4,6 @@ #include #include #include -#include /* The BPF dispatcher is a multiway branch code generator. The * dispatcher is a mechanism to avoid the performance penalty of an @@ -91,11 +90,6 @@ int __weak arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int n return -ENOTSUPP; } -int __weak __init bpf_arch_init_dispatcher_early(void *ip) -{ - return -ENOTSUPP; -} - static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *buf) { s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0]; -- cgit v1.2.3 From c86df29d11dfba27c0a1f5039cd6fe387fbf4239 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Nov 2022 13:00:14 +0100 Subject: bpf: Convert BPF_DISPATCHER to use static_call() (not ftrace) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dispatcher function is currently abusing the ftrace __fentry__ call location for its own purposes -- this obviously gives trouble when the dispatcher and ftrace are both in use. A previous solution tried using __attribute__((patchable_function_entry())) which works, except it is GCC-8+ only, breaking the build on the earlier still supported compilers. Instead use static_call() -- which has its own annotations and does not conflict with ftrace -- to rewrite the dispatch function. By using: return static_call()(ctx, insni, bpf_func) you get a perfect forwarding tail call as function body (iow a single jmp instruction). By having the default static_call() target be bpf_dispatcher_nop_func() it retains the default behaviour (an indirect call to the argument function). Only once a dispatcher program is attached is the target rewritten to directly call the JIT'ed image. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Daniel Borkmann Tested-by: Björn Töpel Tested-by: Jiri Olsa Acked-by: Björn Töpel Acked-by: Jiri Olsa Link: https://lkml.kernel.org/r/Y1/oBlK0yFk5c/Im@hirez.programming.kicks-ass.net Link: https://lore.kernel.org/bpf/20221103120647.796772565@infradead.org --- include/linux/bpf.h | 39 ++++++++++++++++++++++++++++++++++++++- kernel/bpf/dispatcher.c | 22 ++++++++-------------- 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5cd95716b441..74c6f449d81e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -27,6 +27,7 @@ #include #include #include +#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -953,6 +954,10 @@ struct bpf_dispatcher { void *rw_image; u32 image_off; struct bpf_ksym ksym; +#ifdef CONFIG_HAVE_STATIC_CALL + struct static_call_key *sc_key; + void *sc_tramp; +#endif }; static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( @@ -970,6 +975,34 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs); + +/* + * When the architecture supports STATIC_CALL replace the bpf_dispatcher_fn + * indirection with a direct call to the bpf program. If the architecture does + * not have STATIC_CALL, avoid a double-indirection. + */ +#ifdef CONFIG_HAVE_STATIC_CALL + +#define __BPF_DISPATCHER_SC_INIT(_name) \ + .sc_key = &STATIC_CALL_KEY(_name), \ + .sc_tramp = STATIC_CALL_TRAMP_ADDR(_name), + +#define __BPF_DISPATCHER_SC(name) \ + DEFINE_STATIC_CALL(bpf_dispatcher_##name##_call, bpf_dispatcher_nop_func) + +#define __BPF_DISPATCHER_CALL(name) \ + static_call(bpf_dispatcher_##name##_call)(ctx, insnsi, bpf_func) + +#define __BPF_DISPATCHER_UPDATE(_d, _new) \ + __static_call_update((_d)->sc_key, (_d)->sc_tramp, (_new)) + +#else +#define __BPF_DISPATCHER_SC_INIT(name) +#define __BPF_DISPATCHER_SC(name) +#define __BPF_DISPATCHER_CALL(name) bpf_func(ctx, insnsi) +#define __BPF_DISPATCHER_UPDATE(_d, _new) +#endif + #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ @@ -981,25 +1014,29 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func .name = #_name, \ .lnode = LIST_HEAD_INIT(_name.ksym.lnode), \ }, \ + __BPF_DISPATCHER_SC_INIT(_name##_call) \ } #define DEFINE_BPF_DISPATCHER(name) \ + __BPF_DISPATCHER_SC(name); \ noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ bpf_func_t bpf_func) \ { \ - return bpf_func(ctx, insnsi); \ + return __BPF_DISPATCHER_CALL(name); \ } \ EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ struct bpf_dispatcher bpf_dispatcher_##name = \ BPF_DISPATCHER_INIT(bpf_dispatcher_##name); + #define DECLARE_BPF_DISPATCHER(name) \ unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ bpf_func_t bpf_func); \ extern struct bpf_dispatcher bpf_dispatcher_##name; + #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func #define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name) void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index fa64b80b8bca..7dfb8d0d5202 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -4,6 +4,7 @@ #include #include #include +#include /* The BPF dispatcher is a multiway branch code generator. The * dispatcher is a mechanism to avoid the performance penalty of an @@ -104,17 +105,11 @@ static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *b static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) { - void *old, *new, *tmp; - u32 noff; - int err; - - if (!prev_num_progs) { - old = NULL; - noff = 0; - } else { - old = d->image + d->image_off; + void *new, *tmp; + u32 noff = 0; + + if (prev_num_progs) noff = d->image_off ^ (PAGE_SIZE / 2); - } new = d->num_progs ? d->image + noff : NULL; tmp = d->num_progs ? d->rw_image + noff : NULL; @@ -128,11 +123,10 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) return; } - err = bpf_arch_text_poke(d->func, BPF_MOD_JUMP, old, new); - if (err || !new) - return; + __BPF_DISPATCHER_UPDATE(d, new ?: &bpf_dispatcher_nop_func); - d->image_off = noff; + if (new) + d->image_off = noff; } void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, -- cgit v1.2.3 From a679120edfcf3d63f066f53afd425d51b480e533 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 7 Nov 2022 10:07:11 -0700 Subject: bpf: Add explicit cast to 'void *' for __BPF_DISPATCHER_UPDATE() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with clang: kernel/bpf/dispatcher.c:126:33: error: pointer type mismatch ('void *' and 'unsigned int (*)(const void *, const struct bpf_insn *, bpf_func_t)' (aka 'unsigned int (*)(const void *, const struct bpf_insn *, unsigned int (*)(const void *, const struct bpf_insn *))')) [-Werror,-Wpointer-type-mismatch] __BPF_DISPATCHER_UPDATE(d, new ?: &bpf_dispatcher_nop_func); ~~~ ^ ~~~~~~~~~~~~~~~~~~~~~~~~ ./include/linux/bpf.h:1045:54: note: expanded from macro '__BPF_DISPATCHER_UPDATE' __static_call_update((_d)->sc_key, (_d)->sc_tramp, (_new)) ^~~~ 1 error generated. The warning is pointing out that the type of new ('void *') and &bpf_dispatcher_nop_func are not compatible, which could have side effects coming out of a conditional operator due to promotion rules. Add the explicit cast to 'void *' to make it clear that this is expected, as __BPF_DISPATCHER_UPDATE() expands to a call to __static_call_update(), which expects a 'void *' as its final argument. Fixes: c86df29d11df ("bpf: Convert BPF_DISPATCHER to use static_call() (not ftrace)") Link: https://github.com/ClangBuiltLinux/linux/issues/1755 Reported-by: kernel test robot Reported-by: "kernelci.org bot" Signed-off-by: Nathan Chancellor Acked-by: Björn Töpel Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20221107170711.42409-1-nathan@kernel.org Signed-off-by: Martin KaFai Lau --- kernel/bpf/dispatcher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index 7dfb8d0d5202..c19719f48ce0 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -123,7 +123,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) return; } - __BPF_DISPATCHER_UPDATE(d, new ?: &bpf_dispatcher_nop_func); + __BPF_DISPATCHER_UPDATE(d, new ?: (void *)&bpf_dispatcher_nop_func); if (new) d->image_off = noff; -- cgit v1.2.3 From eb86559a691cea5fa63e57a03ec3dc9c31e97955 Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Tue, 8 Nov 2022 13:11:31 +0800 Subject: bpf: Fix memory leaks in __check_func_call kmemleak reports this issue: unreferenced object 0xffff88817139d000 (size 2048): comm "test_progs", pid 33246, jiffies 4307381979 (age 45851.820s) hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000045f075f0>] kmalloc_trace+0x27/0xa0 [<0000000098b7c90a>] __check_func_call+0x316/0x1230 [<00000000b4c3c403>] check_helper_call+0x172e/0x4700 [<00000000aa3875b7>] do_check+0x21d8/0x45e0 [<000000001147357b>] do_check_common+0x767/0xaf0 [<00000000b5a595b4>] bpf_check+0x43e3/0x5bc0 [<0000000011e391b1>] bpf_prog_load+0xf26/0x1940 [<0000000007f765c0>] __sys_bpf+0xd2c/0x3650 [<00000000839815d6>] __x64_sys_bpf+0x75/0xc0 [<00000000946ee250>] do_syscall_64+0x3b/0x90 [<0000000000506b7f>] entry_SYSCALL_64_after_hwframe+0x63/0xcd The root case here is: In function prepare_func_exit(), the callee is not released in the abnormal scenario after "state->curframe--;". To fix, move "state->curframe--;" to the very bottom of the function, right when we free callee and reset frame[] pointer to NULL, as Andrii suggested. In addition, function __check_func_call() has a similar problem. In the abnormal scenario before "state->curframe++;", the callee also should be released by free_func_state(). Fixes: 69c087ba6225 ("bpf: Add bpf_for_each_map_elem() helper") Fixes: fd978bf7fd31 ("bpf: Add reference tracking to verifier") Signed-off-by: Wang Yufen Link: https://lore.kernel.org/r/1667884291-15666-1-git-send-email-wangyufen@huawei.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/verifier.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 225666307bba..264b3dc714cc 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6745,11 +6745,11 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn /* Transfer references to the callee */ err = copy_reference_state(callee, caller); if (err) - return err; + goto err_out; err = set_callee_state_cb(env, caller, callee, *insn_idx); if (err) - return err; + goto err_out; clear_caller_saved_regs(env, caller->regs); @@ -6766,6 +6766,11 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn print_verifier_state(env, callee, true); } return 0; + +err_out: + free_func_state(callee); + state->frame[state->curframe + 1] = NULL; + return err; } int map_set_for_each_callback_args(struct bpf_verifier_env *env, @@ -6979,8 +6984,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) return -EINVAL; } - state->curframe--; - caller = state->frame[state->curframe]; + caller = state->frame[state->curframe - 1]; if (callee->in_callback_fn) { /* enforce R0 return value range [0, 1]. */ struct tnum range = callee->callback_ret_range; @@ -7019,7 +7023,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) } /* clear everything in the callee */ free_func_state(callee); - state->frame[state->curframe + 1] = NULL; + state->frame[state->curframe--] = NULL; return 0; } -- cgit v1.2.3 From 0811664da064c6d7ca64c02f5579f758a007e52d Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Tue, 8 Nov 2022 20:19:45 +0800 Subject: selftests/bpf: Fix casting error when cross-compiling test_verifier for 32-bit platforms When cross-compiling test_verifier for 32-bit platforms, the casting error is shown below: test_verifier.c:1263:27: error: cast from pointer to integer of different size [-Werror=pointer-to-int-cast] 1263 | info.xlated_prog_insns = (__u64)*buf; | ^ cc1: all warnings being treated as errors Fix it by adding zero-extension for it. Fixes: 933ff53191eb ("selftests/bpf: specify expected instructions in test_verifier tests") Signed-off-by: Pu Lehui Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20221108121945.4104644-1-pulehui@huaweicloud.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/test_verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 2dbcbf363c18..b605a70d4f6b 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1260,7 +1260,7 @@ static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt) bzero(&info, sizeof(info)); info.xlated_prog_len = xlated_prog_len; - info.xlated_prog_insns = (__u64)*buf; + info.xlated_prog_insns = (__u64)(unsigned long)*buf; if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) { perror("second bpf_obj_get_info_by_fd failed"); goto out_free_buf; -- cgit v1.2.3 From 5704bc7e8991164b14efb748b5afa0715c25fac3 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 8 Nov 2022 09:58:57 +0800 Subject: selftests/bpf: Fix test_progs compilation failure in 32-bit arch test_progs fails to be compiled in the 32-bit arch, log is as follows: test_progs.c:1013:52: error: format '%ld' expects argument of type 'long int', but argument 3 has type 'size_t' {aka 'unsigned int'} [-Werror=format=] 1013 | sprintf(buf, "MSG_TEST_LOG (cnt: %ld, last: %d)", | ~~^ | | | long int | %d 1014 | strlen(msg->test_log.log_buf), | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | | | size_t {aka unsigned int} Fix it. Fixes: 91b2c0afd00c ("selftests/bpf: Add parallelism to test_progs") Signed-off-by: Yang Jihong Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20221108015857.132457-1-yangjihong1@huawei.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/test_progs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 0e9a47f97890..3fef451d8831 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1010,7 +1010,7 @@ static inline const char *str_msg(const struct msg *msg, char *buf) msg->subtest_done.have_log); break; case MSG_TEST_LOG: - sprintf(buf, "MSG_TEST_LOG (cnt: %ld, last: %d)", + sprintf(buf, "MSG_TEST_LOG (cnt: %zu, last: %d)", strlen(msg->test_log.log_buf), msg->test_log.is_last); break; -- cgit v1.2.3 From 8678ea06852cd1f819b870c773d43df888d15d46 Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Thu, 10 Nov 2022 09:56:13 +0100 Subject: maccess: Fix writing offset in case of fault in strncpy_from_kernel_nofault() If a page fault occurs while copying the first byte, this function resets one byte before dst. As a consequence, an address could be modified and leaded to kernel crashes if case the modified address was accessed later. Fixes: b58294ead14c ("maccess: allow architectures to provide kernel probing directly") Signed-off-by: Alban Crequy Signed-off-by: Andrii Nakryiko Tested-by: Francis Laniel Reviewed-by: Andrew Morton Cc: [5.8] Link: https://lore.kernel.org/bpf/20221110085614.111213-2-albancrequy@linux.microsoft.com --- mm/maccess.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/maccess.c b/mm/maccess.c index 5f4d240f67ec..074f6b086671 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -97,7 +97,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) return src - unsafe_addr; Efault: pagefault_enable(); - dst[-1] = '\0'; + dst[0] = '\0'; return -EFAULT; } -- cgit v1.2.3 From 9cd094829dae949a755c18533479c20e74415ab2 Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Thu, 10 Nov 2022 09:56:14 +0100 Subject: selftests: bpf: Add a test when bpf_probe_read_kernel_str() returns EFAULT This commit tests previous fix of bpf_probe_read_kernel_str(). The BPF helper bpf_probe_read_kernel_str should return -EFAULT when given a bad source pointer and the target buffer should only be modified to make the string NULL terminated. bpf_probe_read_kernel_str() was previously inserting a NULL before the beginning of the dst buffer. This test should ensure that the implementation stays correct for now on. Without the fix, this test will fail as follows: $ cd tools/testing/selftests/bpf $ make $ sudo ./test_progs --name=varlen ... test_varlen:FAIL:check got 0 != exp 66 Signed-off-by: Alban Crequy Signed-off-by: Francis Laniel Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20221110085614.111213-3-albancrequy@linux.microsoft.com Changes v1 to v2: - add ack tag - fix my email - rebase on bpf tree and tag for bpf tree --- tools/testing/selftests/bpf/prog_tests/varlen.c | 7 +++++++ tools/testing/selftests/bpf/progs/test_varlen.c | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c index dd324b4933db..4d7056f8f177 100644 --- a/tools/testing/selftests/bpf/prog_tests/varlen.c +++ b/tools/testing/selftests/bpf/prog_tests/varlen.c @@ -63,6 +63,13 @@ void test_varlen(void) CHECK_VAL(data->total4, size1 + size2); CHECK(memcmp(data->payload4, exp_str, size1 + size2), "content_check", "doesn't match!\n"); + + CHECK_VAL(bss->ret_bad_read, -EFAULT); + CHECK_VAL(data->payload_bad[0], 0x42); + CHECK_VAL(data->payload_bad[1], 0x42); + CHECK_VAL(data->payload_bad[2], 0); + CHECK_VAL(data->payload_bad[3], 0x42); + CHECK_VAL(data->payload_bad[4], 0x42); cleanup: test_varlen__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c index 3987ff174f1f..20eb7d422c41 100644 --- a/tools/testing/selftests/bpf/progs/test_varlen.c +++ b/tools/testing/selftests/bpf/progs/test_varlen.c @@ -19,6 +19,7 @@ __u64 payload1_len1 = 0; __u64 payload1_len2 = 0; __u64 total1 = 0; char payload1[MAX_LEN + MAX_LEN] = {}; +__u64 ret_bad_read = 0; /* .data */ int payload2_len1 = -1; @@ -36,6 +37,8 @@ int payload4_len2 = -1; int total4= -1; char payload4[MAX_LEN + MAX_LEN] = { 1 }; +char payload_bad[5] = { 0x42, 0x42, 0x42, 0x42, 0x42 }; + SEC("raw_tp/sys_enter") int handler64_unsigned(void *regs) { @@ -61,6 +64,8 @@ int handler64_unsigned(void *regs) total1 = payload - (void *)payload1; + ret_bad_read = bpf_probe_read_kernel_str(payload_bad + 2, 1, (void *) -1); + return 0; } -- cgit v1.2.3 From 4b45cd81f737d79d0fbfc0d320a1e518e7f0bbf0 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Thu, 10 Nov 2022 07:21:28 -0500 Subject: bpf: Initialize same number of free nodes for each pcpu_freelist pcpu_freelist_populate() initializes nr_elems / num_possible_cpus() + 1 free nodes for some CPUs, and then possibly one CPU with fewer nodes, followed by remaining cpus with 0 nodes. For example, when nr_elems == 256 and num_possible_cpus() == 32, CPU 0~27 each gets 9 free nodes, CPU 28 gets 4 free nodes, CPU 29~31 get 0 free nodes, while in fact each CPU should get 8 nodes equally. This patch initializes nr_elems / num_possible_cpus() free nodes for each CPU firstly, then allocates the remaining free nodes by one for each CPU until no free nodes left. Fixes: e19494edab82 ("bpf: introduce percpu_freelist") Signed-off-by: Xu Kuohai Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20221110122128.105214-1-xukuohai@huawei.com --- kernel/bpf/percpu_freelist.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index b6e7f5c5b9ab..034cf87b54e9 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c @@ -100,22 +100,21 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, u32 nr_elems) { struct pcpu_freelist_head *head; - int i, cpu, pcpu_entries; + unsigned int cpu, cpu_idx, i, j, n, m; - pcpu_entries = nr_elems / num_possible_cpus() + 1; - i = 0; + n = nr_elems / num_possible_cpus(); + m = nr_elems % num_possible_cpus(); + cpu_idx = 0; for_each_possible_cpu(cpu) { -again: head = per_cpu_ptr(s->freelist, cpu); - /* No locking required as this is not visible yet. */ - pcpu_freelist_push_node(head, buf); - i++; - buf += elem_size; - if (i == nr_elems) - break; - if (i % pcpu_entries) - goto again; + j = n + (cpu_idx < m ? 1 : 0); + for (i = 0; i < j; i++) { + /* No locking required as this is not visible yet. */ + pcpu_freelist_push_node(head, buf); + buf += elem_size; + } + cpu_idx++; } } -- cgit v1.2.3 From 1f6e04a1c7b85da3b765ca9f46029e5d1826d839 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Fri, 11 Nov 2022 07:56:20 -0500 Subject: bpf: Fix offset calculation error in __copy_map_value and zero_map_value Function __copy_map_value and zero_map_value miscalculated copy offset, resulting in possible copy of unwanted data to user or kernel. Fix it. Fixes: cc48755808c6 ("bpf: Add zero_map_value to zero map value with special fields") Fixes: 4d7d7f69f4b1 ("bpf: Adapt copy_map_value for multiple offset case") Signed-off-by: Xu Kuohai Signed-off-by: Andrii Nakryiko Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/bpf/20221111125620.754855-1-xukuohai@huaweicloud.com --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 74c6f449d81e..c1bd1bd10506 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -315,7 +315,7 @@ static inline void __copy_map_value(struct bpf_map *map, void *dst, void *src, b u32 next_off = map->off_arr->field_off[i]; memcpy(dst + curr_off, src + curr_off, next_off - curr_off); - curr_off += map->off_arr->field_sz[i]; + curr_off = next_off + map->off_arr->field_sz[i]; } memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off); } @@ -344,7 +344,7 @@ static inline void zero_map_value(struct bpf_map *map, void *dst) u32 next_off = map->off_arr->field_off[i]; memset(dst + curr_off, 0, next_off - curr_off); - curr_off += map->off_arr->field_sz[i]; + curr_off = next_off + map->off_arr->field_sz[i]; } memset(dst + curr_off, 0, map->value_size - curr_off); } -- cgit v1.2.3