From dc7d7447b56bcc9cf79a9c22e4edad200a298e4c Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Tue, 16 Apr 2024 14:42:07 +0800 Subject: bpf, arm64: Fix incorrect runtime stats When __bpf_prog_enter() returns zero, the arm64 register x20 that stores prog start time is not assigned to zero, causing incorrect runtime stats. To fix it, assign the return value of bpf_prog_enter() to x20 register immediately upon its return. Fixes: efc9909fdce0 ("bpf, arm64: Add bpf trampoline for arm64") Reported-by: Ivan Babrou Signed-off-by: Xu Kuohai Signed-off-by: Daniel Borkmann Tested-by: Ivan Babrou Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240416064208.2919073-2-xukuohai@huaweicloud.com --- arch/arm64/net/bpf_jit_comp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 122021f9bdfc..13eac43c632d 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1844,15 +1844,15 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, emit_call(enter_prog, ctx); + /* save return value to callee saved register x20 */ + emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx); + /* if (__bpf_prog_enter(prog) == 0) * goto skip_exec_of_prog; */ branch = ctx->image + ctx->idx; emit(A64_NOP, ctx); - /* save return value to callee saved register x20 */ - emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx); - emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx); if (!p->jited) emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx); -- cgit v1.2.3 From 10541b374aa05c8118cc6a529a615882e53f261b Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Tue, 16 Apr 2024 14:42:08 +0800 Subject: riscv, bpf: Fix incorrect runtime stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When __bpf_prog_enter() returns zero, the s1 register is not set to zero, resulting in incorrect runtime stats. Fix it by setting s1 immediately upon the return of __bpf_prog_enter(). Fixes: 49b5e77ae3e2 ("riscv, bpf: Add bpf trampoline support for RV64") Signed-off-by: Xu Kuohai Signed-off-by: Daniel Borkmann Reviewed-by: Pu Lehui Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20240416064208.2919073-3-xukuohai@huaweicloud.com --- arch/riscv/net/bpf_jit_comp64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 1adf2f39ce59..ec9d692838fc 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -722,6 +722,9 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of if (ret) return ret; + /* store prog start time */ + emit_mv(RV_REG_S1, RV_REG_A0, ctx); + /* if (__bpf_prog_enter(prog) == 0) * goto skip_exec_of_prog; */ @@ -729,9 +732,6 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of /* nop reserved for conditional jump */ emit(rv_nop(), ctx); - /* store prog start time */ - emit_mv(RV_REG_S1, RV_REG_A0, ctx); - /* arg1: &args_off */ emit_addi(RV_REG_A0, RV_REG_FP, -args_off, ctx); if (!p->jited) -- cgit v1.2.3 From c6f48506ba30c722dd9d89aa6a40eb1926277dff Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Fri, 19 Apr 2024 18:28:32 +0000 Subject: arm32, bpf: Reimplement sign-extension mov instruction The current implementation of the mov instruction with sign extension has the following problems: 1. It clobbers the source register if it is not stacked because it sign extends the source and then moves it to the destination. 2. If the dst_reg is stacked, the current code doesn't write the value back in case of 64-bit mov. 3. There is room for improvement by emitting fewer instructions. The steps for fixing this and the instructions emitted by the JIT are explained below with examples in all combinations: Case A: offset == 32: ===================== Case A.1: src and dst are stacked registers: -------------------------------------------- 1. Load src_lo into tmp_lo 2. Store tmp_lo into dst_lo 3. Sign extend tmp_lo into tmp_hi 4. Store tmp_hi to dst_hi Example: r3 = (s32)r3 r3 is a stacked register ldr r6, [r11, #-16] // Load r3_lo into tmp_lo // str to dst_lo is not emitted because src_lo == dst_lo asr r7, r6, #31 // Sign extend tmp_lo into tmp_hi str r7, [r11, #-12] // Store tmp_hi into r3_hi Case A.2: src is stacked but dst is not: ---------------------------------------- 1. Load src_lo into dst_lo 2. Sign extend dst_lo into dst_hi Example: r6 = (s32)r3 r6 maps to {ARM_R5, ARM_R4} and r3 is stacked ldr r4, [r11, #-16] // Load r3_lo into r6_lo asr r5, r4, #31 // Sign extend r6_lo into r6_hi Case A.3: src is not stacked but dst is stacked: ------------------------------------------------ 1. Store src_lo into dst_lo 2. Sign extend src_lo into tmp_hi 3. Store tmp_hi to dst_hi Example: r3 = (s32)r6 r3 is stacked and r6 maps to {ARM_R5, ARM_R4} str r4, [r11, #-16] // Store r6_lo to r3_lo asr r7, r4, #31 // Sign extend r6_lo into tmp_hi str r7, [r11, #-12] // Store tmp_hi to dest_hi Case A.4: Both src and dst are not stacked: ------------------------------------------- 1. Mov src_lo into dst_lo 2. Sign extend src_lo into dst_hi Example: (bf) r6 = (s32)r6 r6 maps to {ARM_R5, ARM_R4} // Mov not emitted because dst == src asr r5, r4, #31 // Sign extend r6_lo into r6_hi Case B: offset != 32: ===================== Case B.1: src and dst are stacked registers: -------------------------------------------- 1. Load src_lo into tmp_lo 2. Sign extend tmp_lo according to offset. 3. Store tmp_lo into dst_lo 4. Sign extend tmp_lo into tmp_hi 5. Store tmp_hi to dst_hi Example: r9 = (s8)r3 r9 and r3 are both stacked registers ldr r6, [r11, #-16] // Load r3_lo into tmp_lo lsl r6, r6, #24 // Sign extend tmp_lo asr r6, r6, #24 // .. str r6, [r11, #-56] // Store tmp_lo to r9_lo asr r7, r6, #31 // Sign extend tmp_lo to tmp_hi str r7, [r11, #-52] // Store tmp_hi to r9_hi Case B.2: src is stacked but dst is not: ---------------------------------------- 1. Load src_lo into dst_lo 2. Sign extend dst_lo according to offset. 3. Sign extend tmp_lo into dst_hi Example: r6 = (s8)r3 r6 maps to {ARM_R5, ARM_R4} and r3 is stacked ldr r4, [r11, #-16] // Load r3_lo to r6_lo lsl r4, r4, #24 // Sign extend r6_lo asr r4, r4, #24 // .. asr r5, r4, #31 // Sign extend r6_lo into r6_hi Case B.3: src is not stacked but dst is stacked: ------------------------------------------------ 1. Sign extend src_lo into tmp_lo according to offset. 2. Store tmp_lo into dst_lo. 3. Sign extend src_lo into tmp_hi. 4. Store tmp_hi to dst_hi. Example: r3 = (s8)r1 r3 is stacked and r1 maps to {ARM_R3, ARM_R2} lsl r6, r2, #24 // Sign extend r1_lo to tmp_lo asr r6, r6, #24 // .. str r6, [r11, #-16] // Store tmp_lo to r3_lo asr r7, r6, #31 // Sign extend tmp_lo to tmp_hi str r7, [r11, #-12] // Store tmp_hi to r3_hi Case B.4: Both src and dst are not stacked: ------------------------------------------- 1. Sign extend src_lo into dst_lo according to offset. 2. Sign extend dst_lo into dst_hi. Example: r6 = (s8)r1 r6 maps to {ARM_R5, ARM_R4} and r1 maps to {ARM_R3, ARM_R2} lsl r4, r2, #24 // Sign extend r1_lo to r6_lo asr r4, r4, #24 // .. asr r5, r4, #31 // Sign extend r6_lo to r6_hi Fixes: fc832653fa0d ("arm32, bpf: add support for sign-extension mov instruction") Reported-by: syzbot+186522670e6722692d86@syzkaller.appspotmail.com Signed-off-by: Puranjay Mohan Signed-off-by: Daniel Borkmann Reviewed-by: Russell King (Oracle) Closes: https://lore.kernel.org/all/000000000000e9a8d80615163f2a@google.com Link: https://lore.kernel.org/bpf/20240419182832.27707-1-puranjay@kernel.org --- arch/arm/net/bpf_jit_32.c | 56 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 1d672457d02f..72b5cd697f5d 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -871,16 +871,11 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], } /* dst = src (4 bytes)*/ -static inline void emit_a32_mov_r(const s8 dst, const s8 src, const u8 off, - struct jit_ctx *ctx) { +static inline void emit_a32_mov_r(const s8 dst, const s8 src, struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; s8 rt; rt = arm_bpf_get_reg32(src, tmp[0], ctx); - if (off && off != 32) { - emit(ARM_LSL_I(rt, rt, 32 - off), ctx); - emit(ARM_ASR_I(rt, rt, 32 - off), ctx); - } arm_bpf_put_reg32(dst, rt, ctx); } @@ -889,15 +884,15 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], const s8 src[], struct jit_ctx *ctx) { if (!is64) { - emit_a32_mov_r(dst_lo, src_lo, 0, ctx); + emit_a32_mov_r(dst_lo, src_lo, ctx); if (!ctx->prog->aux->verifier_zext) /* Zero out high 4 bytes */ emit_a32_mov_i(dst_hi, 0, ctx); } else if (__LINUX_ARM_ARCH__ < 6 && ctx->cpu_architecture < CPU_ARCH_ARMv5TE) { /* complete 8 byte move */ - emit_a32_mov_r(dst_lo, src_lo, 0, ctx); - emit_a32_mov_r(dst_hi, src_hi, 0, ctx); + emit_a32_mov_r(dst_lo, src_lo, ctx); + emit_a32_mov_r(dst_hi, src_hi, ctx); } else if (is_stacked(src_lo) && is_stacked(dst_lo)) { const u8 *tmp = bpf2a32[TMP_REG_1]; @@ -917,17 +912,52 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], static inline void emit_a32_movsx_r64(const bool is64, const u8 off, const s8 dst[], const s8 src[], struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; - const s8 *rt; + s8 rs; + s8 rd; - rt = arm_bpf_get_reg64(dst, tmp, ctx); + if (is_stacked(dst_lo)) + rd = tmp[1]; + else + rd = dst_lo; + rs = arm_bpf_get_reg32(src_lo, rd, ctx); + /* rs may be one of src[1], dst[1], or tmp[1] */ + + /* Sign extend rs if needed. If off == 32, lower 32-bits of src are moved to dst and sign + * extension only happens in the upper 64 bits. + */ + if (off != 32) { + /* Sign extend rs into rd */ + emit(ARM_LSL_I(rd, rs, 32 - off), ctx); + emit(ARM_ASR_I(rd, rd, 32 - off), ctx); + } else { + rd = rs; + } + + /* Write rd to dst_lo + * + * Optimization: + * Assume: + * 1. dst == src and stacked. + * 2. off == 32 + * + * In this case src_lo was loaded into rd(tmp[1]) but rd was not sign extended as off==32. + * So, we don't need to write rd back to dst_lo as they have the same value. + * This saves us one str instruction. + */ + if (dst_lo != src_lo || off != 32) + arm_bpf_put_reg32(dst_lo, rd, ctx); - emit_a32_mov_r(dst_lo, src_lo, off, ctx); if (!is64) { if (!ctx->prog->aux->verifier_zext) /* Zero out high 4 bytes */ emit_a32_mov_i(dst_hi, 0, ctx); } else { - emit(ARM_ASR_I(rt[0], rt[1], 31), ctx); + if (is_stacked(dst_hi)) { + emit(ARM_ASR_I(tmp[0], rd, 31), ctx); + arm_bpf_put_reg32(dst_hi, tmp[0], ctx); + } else { + emit(ARM_ASR_I(dst_hi, rd, 31), ctx); + } } } -- cgit v1.2.3 From 66e13b615a0ce76b785d780ecc9776ba71983629 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Wed, 24 Apr 2024 10:02:08 +0000 Subject: bpf: verifier: prevent userspace memory access With BPF_PROBE_MEM, BPF allows de-referencing an untrusted pointer. To thwart invalid memory accesses, the JITs add an exception table entry for all such accesses. But in case the src_reg + offset is a userspace address, the BPF program might read that memory if the user has mapped it. Make the verifier add guard instructions around such memory accesses and skip the load if the address falls into the userspace region. The JITs need to implement bpf_arch_uaddress_limit() to define where the userspace addresses end for that architecture or TASK_SIZE is taken as default. The implementation is as follows: REG_AX = SRC_REG if(offset) REG_AX += offset; REG_AX >>= 32; if (REG_AX <= (uaddress_limit >> 32)) DST_REG = 0; else DST_REG = *(size *)(SRC_REG + offset); Comparing just the upper 32 bits of the load address with the upper 32 bits of uaddress_limit implies that the values are being aligned down to a 4GB boundary before comparison. The above means that all loads with address <= uaddress_limit + 4GB are skipped. This is acceptable because there is a large hole (much larger than 4GB) between userspace and kernel space memory, therefore a correctly functioning BPF program should not access this 4GB memory above the userspace. Let's analyze what this patch does to the following fentry program dereferencing an untrusted pointer: SEC("fentry/tcp_v4_connect") int BPF_PROG(fentry_tcp_v4_connect, struct sock *sk) { *(volatile long *)sk; return 0; } BPF Program before | BPF Program after ------------------ | ----------------- 0: (79) r1 = *(u64 *)(r1 +0) 0: (79) r1 = *(u64 *)(r1 +0) ----------------------------------------------------------------------- 1: (79) r1 = *(u64 *)(r1 +0) --\ 1: (bf) r11 = r1 ----------------------------\ \ 2: (77) r11 >>= 32 2: (b7) r0 = 0 \ \ 3: (b5) if r11 <= 0x8000 goto pc+2 3: (95) exit \ \-> 4: (79) r1 = *(u64 *)(r1 +0) \ 5: (05) goto pc+1 \ 6: (b7) r1 = 0 \-------------------------------------- 7: (b7) r0 = 0 8: (95) exit As you can see from above, in the best case (off=0), 5 extra instructions are emitted. Now, we analyze the same program after it has gone through the JITs of ARM64 and RISC-V architectures. We follow the single load instruction that has the untrusted pointer and see what instrumentation has been added around it. x86-64 JIT ========== JIT's Instrumentation (upstream) --------------------- 0: nopl 0x0(%rax,%rax,1) 5: xchg %ax,%ax 7: push %rbp 8: mov %rsp,%rbp b: mov 0x0(%rdi),%rdi --------------------------------- f: movabs $0x800000000000,%r11 19: cmp %r11,%rdi 1c: jb 0x000000000000002a 1e: mov %rdi,%r11 21: add $0x0,%r11 28: jae 0x000000000000002e 2a: xor %edi,%edi 2c: jmp 0x0000000000000032 2e: mov 0x0(%rdi),%rdi --------------------------------- 32: xor %eax,%eax 34: leave 35: ret The x86-64 JIT already emits some instructions to protect against user memory access. This patch doesn't make any changes for the x86-64 JIT. ARM64 JIT ========= No Intrumentation Verifier's Instrumentation (upstream) (This patch) ----------------- -------------------------- 0: add x9, x30, #0x0 0: add x9, x30, #0x0 4: nop 4: nop 8: paciasp 8: paciasp c: stp x29, x30, [sp, #-16]! c: stp x29, x30, [sp, #-16]! 10: mov x29, sp 10: mov x29, sp 14: stp x19, x20, [sp, #-16]! 14: stp x19, x20, [sp, #-16]! 18: stp x21, x22, [sp, #-16]! 18: stp x21, x22, [sp, #-16]! 1c: stp x25, x26, [sp, #-16]! 1c: stp x25, x26, [sp, #-16]! 20: stp x27, x28, [sp, #-16]! 20: stp x27, x28, [sp, #-16]! 24: mov x25, sp 24: mov x25, sp 28: mov x26, #0x0 28: mov x26, #0x0 2c: sub x27, x25, #0x0 2c: sub x27, x25, #0x0 30: sub sp, sp, #0x0 30: sub sp, sp, #0x0 34: ldr x0, [x0] 34: ldr x0, [x0] -------------------------------------------------------------------------------- 38: ldr x0, [x0] ----------\ 38: add x9, x0, #0x0 -----------------------------------\\ 3c: lsr x9, x9, #32 3c: mov x7, #0x0 \\ 40: cmp x9, #0x10, lsl #12 40: mov sp, sp \\ 44: b.ls 0x0000000000000050 44: ldp x27, x28, [sp], #16 \\--> 48: ldr x0, [x0] 48: ldp x25, x26, [sp], #16 \ 4c: b 0x0000000000000054 4c: ldp x21, x22, [sp], #16 \ 50: mov x0, #0x0 50: ldp x19, x20, [sp], #16 \--------------------------------------- 54: ldp x29, x30, [sp], #16 54: mov x7, #0x0 58: add x0, x7, #0x0 58: mov sp, sp 5c: autiasp 5c: ldp x27, x28, [sp], #16 60: ret 60: ldp x25, x26, [sp], #16 64: nop 64: ldp x21, x22, [sp], #16 68: ldr x10, 0x0000000000000070 68: ldp x19, x20, [sp], #16 6c: br x10 6c: ldp x29, x30, [sp], #16 70: add x0, x7, #0x0 74: autiasp 78: ret 7c: nop 80: ldr x10, 0x0000000000000088 84: br x10 There are 6 extra instructions added in ARM64 in the best case. This will become 7 in the worst case (off != 0). RISC-V JIT (RISCV_ISA_C Disabled) ========== No Intrumentation Verifier's Instrumentation (upstream) (This patch) ----------------- -------------------------- 0: nop 0: nop 4: nop 4: nop 8: li a6, 33 8: li a6, 33 c: addi sp, sp, -16 c: addi sp, sp, -16 10: sd s0, 8(sp) 10: sd s0, 8(sp) 14: addi s0, sp, 16 14: addi s0, sp, 16 18: ld a0, 0(a0) 18: ld a0, 0(a0) --------------------------------------------------------------- 1c: ld a0, 0(a0) --\ 1c: mv t0, a0 --------------------------\ \ 20: srli t0, t0, 32 20: li a5, 0 \ \ 24: lui t1, 4096 24: ld s0, 8(sp) \ \ 28: sext.w t1, t1 28: addi sp, sp, 16 \ \ 2c: bgeu t1, t0, 12 2c: sext.w a0, a5 \ \--> 30: ld a0, 0(a0) 30: ret \ 34: j 8 \ 38: li a0, 0 \------------------------------ 3c: li a5, 0 40: ld s0, 8(sp) 44: addi sp, sp, 16 48: sext.w a0, a5 4c: ret There are 7 extra instructions added in RISC-V. Fixes: 800834285361 ("bpf, arm64: Add BPF exception tables") Reported-by: Breno Leitao Suggested-by: Alexei Starovoitov Acked-by: Ilya Leoshkevich Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20240424100210.11982-2-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index df5fac428408..b520b66ad14b 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -3473,3 +3473,9 @@ bool bpf_jit_supports_ptr_xchg(void) { return true; } + +/* x86-64 JIT emits its own code to filter user addresses so return 0 here */ +u64 bpf_arch_uaddress_limit(void) +{ + return 0; +} -- cgit v1.2.3 From b599d7d26d6ad1fc9975218574bc2ca6d0293cfd Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Wed, 24 Apr 2024 10:02:09 +0000 Subject: bpf, x86: Fix PROBE_MEM runtime load check When a load is marked PROBE_MEM - e.g. due to PTR_UNTRUSTED access - the address being loaded from is not necessarily valid. The BPF jit sets up exception handlers for each such load which catch page faults and 0 out the destination register. If the address for the load is outside kernel address space, the load will escape the exception handling and crash the kernel. To prevent this from happening, the emits some instruction to verify that addr is > end of userspace addresses. x86 has a legacy vsyscall ABI where a page at address 0xffffffffff600000 is mapped with user accessible permissions. The addresses in this page are considered userspace addresses by the fault handler. Therefore, a BPF program accessing this page will crash the kernel. This patch fixes the runtime checks to also check that the PROBE_MEM address is below VSYSCALL_ADDR. Example BPF program: SEC("fentry/tcp_v4_connect") int BPF_PROG(fentry_tcp_v4_connect, struct sock *sk) { *(volatile unsigned long *)&sk->sk_tsq_flags; return 0; } BPF Assembly: 0: (79) r1 = *(u64 *)(r1 +0) 1: (79) r1 = *(u64 *)(r1 +344) 2: (b7) r0 = 0 3: (95) exit x86-64 JIT ========== BEFORE AFTER ------ ----- 0: nopl 0x0(%rax,%rax,1) 0: nopl 0x0(%rax,%rax,1) 5: xchg %ax,%ax 5: xchg %ax,%ax 7: push %rbp 7: push %rbp 8: mov %rsp,%rbp 8: mov %rsp,%rbp b: mov 0x0(%rdi),%rdi b: mov 0x0(%rdi),%rdi ------------------------------------------------------------------------------- f: movabs $0x100000000000000,%r11 f: movabs $0xffffffffff600000,%r10 19: add $0x2a0,%rdi 19: mov %rdi,%r11 20: cmp %r11,%rdi 1c: add $0x2a0,%r11 23: jae 0x0000000000000029 23: sub %r10,%r11 25: xor %edi,%edi 26: movabs $0x100000000a00000,%r10 27: jmp 0x000000000000002d 30: cmp %r10,%r11 29: mov 0x0(%rdi),%rdi 33: ja 0x0000000000000039 --------------------------------\ 35: xor %edi,%edi 2d: xor %eax,%eax \ 37: jmp 0x0000000000000040 2f: leave \ 39: mov 0x2a0(%rdi),%rdi 30: ret \-------------------------------------------- 40: xor %eax,%eax 42: leave 43: ret Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20240424100210.11982-3-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 57 ++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b520b66ad14b..59cbc94b6e69 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1807,36 +1807,41 @@ populate_extable: if (BPF_MODE(insn->code) == BPF_PROBE_MEM || BPF_MODE(insn->code) == BPF_PROBE_MEMSX) { /* Conservatively check that src_reg + insn->off is a kernel address: - * src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE - * src_reg is used as scratch for src_reg += insn->off and restored - * after emit_ldx if necessary + * src_reg + insn->off > TASK_SIZE_MAX + PAGE_SIZE + * and + * src_reg + insn->off < VSYSCALL_ADDR */ - u64 limit = TASK_SIZE_MAX + PAGE_SIZE; + u64 limit = TASK_SIZE_MAX + PAGE_SIZE - VSYSCALL_ADDR; u8 *end_of_jmp; - /* At end of these emitted checks, insn->off will have been added - * to src_reg, so no need to do relative load with insn->off offset - */ - insn_off = 0; + /* movabsq r10, VSYSCALL_ADDR */ + emit_mov_imm64(&prog, BPF_REG_AX, (long)VSYSCALL_ADDR >> 32, + (u32)(long)VSYSCALL_ADDR); - /* movabsq r11, limit */ - EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG)); - EMIT((u32)limit, 4); - EMIT(limit >> 32, 4); + /* mov src_reg, r11 */ + EMIT_mov(AUX_REG, src_reg); if (insn->off) { - /* add src_reg, insn->off */ - maybe_emit_1mod(&prog, src_reg, true); - EMIT2_off32(0x81, add_1reg(0xC0, src_reg), insn->off); + /* add r11, insn->off */ + maybe_emit_1mod(&prog, AUX_REG, true); + EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off); } - /* cmp src_reg, r11 */ - maybe_emit_mod(&prog, src_reg, AUX_REG, true); - EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG)); + /* sub r11, r10 */ + maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true); + EMIT2(0x29, add_2reg(0xC0, AUX_REG, BPF_REG_AX)); + + /* movabsq r10, limit */ + emit_mov_imm64(&prog, BPF_REG_AX, (long)limit >> 32, + (u32)(long)limit); + + /* cmp r10, r11 */ + maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true); + EMIT2(0x39, add_2reg(0xC0, AUX_REG, BPF_REG_AX)); - /* if unsigned '>=', goto load */ - EMIT2(X86_JAE, 0); + /* if unsigned '>', goto load */ + EMIT2(X86_JA, 0); end_of_jmp = prog; /* xor dst_reg, dst_reg */ @@ -1862,18 +1867,6 @@ populate_extable: /* populate jmp_offset for JMP above */ start_of_ldx[-1] = prog - start_of_ldx; - if (insn->off && src_reg != dst_reg) { - /* sub src_reg, insn->off - * Restore src_reg after "add src_reg, insn->off" in prev - * if statement. But if src_reg == dst_reg, emit_ldx - * above already clobbered src_reg, so no need to restore. - * If add src_reg, insn->off was unnecessary, no need to - * restore either. - */ - maybe_emit_1mod(&prog, src_reg, true); - EMIT2_off32(0x81, add_1reg(0xE8, src_reg), insn->off); - } - if (!bpf_prog->aux->extable) break; -- cgit v1.2.3