aboutsummaryrefslogtreecommitdiff
path: root/arch/loongarch
diff options
context:
space:
mode:
authorGravatar Linus Torvalds <torvalds@linux-foundation.org> 2023-05-04 12:40:16 -0700
committerGravatar Linus Torvalds <torvalds@linux-foundation.org> 2023-05-04 12:40:16 -0700
commit611c9d88302cb9ac3b0f58f4a06c0ffb98345bd2 (patch)
tree3aeefa3eb0499df16ec08965da68d9fa38f1befa /arch/loongarch
parentMerge tag 'csky-for-linus-6.4' of https://github.com/c-sky/csky-linux (diff)
parenttools/perf: Add basic support for LoongArch (diff)
downloadlinux-611c9d88302cb9ac3b0f58f4a06c0ffb98345bd2.tar.gz
linux-611c9d88302cb9ac3b0f58f4a06c0ffb98345bd2.tar.bz2
linux-611c9d88302cb9ac3b0f58f4a06c0ffb98345bd2.zip
Merge tag 'loongarch-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
Pull LoongArch updates from Huacai Chen: - Better backtraces for humanization - Relay BCE exceptions to userland as SIGSEGV - Provide kernel fpu functions - Optimize memory ops (memset/memcpy/memmove) - Optimize checksum and crc32(c) calculation - Add ARCH_HAS_FORTIFY_SOURCE selection - Add function error injection support - Add ftrace with direct call support - Add basic perf tools support * tag 'loongarch-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson: (24 commits) tools/perf: Add basic support for LoongArch LoongArch: ftrace: Add direct call trampoline samples support LoongArch: ftrace: Add direct call support LoongArch: ftrace: Implement ftrace_find_callable_addr() to simplify code LoongArch: ftrace: Fix build error if DYNAMIC_FTRACE_WITH_REGS is not set LoongArch: ftrace: Abstract DYNAMIC_FTRACE_WITH_ARGS accesses LoongArch: Add support for function error injection LoongArch: Add ARCH_HAS_FORTIFY_SOURCE selection LoongArch: crypto: Add crc32 and crc32c hw acceleration LoongArch: Add checksum optimization for 64-bit system LoongArch: Optimize memory ops (memset/memcpy/memmove) LoongArch: Provide kernel fpu functions LoongArch: Relay BCE exceptions to userland as SIGSEGV with si_code=SEGV_BNDERR LoongArch: Tweak the BADV and CPUCFG.PRID lines in show_regs() LoongArch: Humanize the ESTAT line when showing registers LoongArch: Humanize the ECFG line when showing registers LoongArch: Humanize the EUEN line when showing registers LoongArch: Humanize the PRMD line when showing registers LoongArch: Humanize the CRMD line when showing registers LoongArch: Fix format of CSR lines during show_regs() ...
Diffstat (limited to 'arch/loongarch')
-rw-r--r--arch/loongarch/Kconfig5
-rw-r--r--arch/loongarch/Makefile2
-rw-r--r--arch/loongarch/crypto/Kconfig14
-rw-r--r--arch/loongarch/crypto/Makefile6
-rw-r--r--arch/loongarch/crypto/crc32-loongarch.c304
-rw-r--r--arch/loongarch/include/asm/checksum.h66
-rw-r--r--arch/loongarch/include/asm/fpu.h3
-rw-r--r--arch/loongarch/include/asm/ftrace.h37
-rw-r--r--arch/loongarch/include/asm/inst.h26
-rw-r--r--arch/loongarch/include/asm/loongarch.h57
-rw-r--r--arch/loongarch/include/asm/ptrace.h5
-rw-r--r--arch/loongarch/kernel/Makefile2
-rw-r--r--arch/loongarch/kernel/ftrace_dyn.c128
-rw-r--r--arch/loongarch/kernel/genex.S1
-rw-r--r--arch/loongarch/kernel/irq.c2
-rw-r--r--arch/loongarch/kernel/kfpu.c43
-rw-r--r--arch/loongarch/kernel/mcount_dyn.S13
-rw-r--r--arch/loongarch/kernel/perf_event.c2
-rw-r--r--arch/loongarch/kernel/time.c2
-rw-r--r--arch/loongarch/kernel/traps.c318
-rw-r--r--arch/loongarch/lib/Makefile4
-rw-r--r--arch/loongarch/lib/clear_user.S136
-rw-r--r--arch/loongarch/lib/copy_user.S251
-rw-r--r--arch/loongarch/lib/csum.c141
-rw-r--r--arch/loongarch/lib/error-inject.c10
-rw-r--r--arch/loongarch/lib/memcpy.S147
-rw-r--r--arch/loongarch/lib/memmove.S120
-rw-r--r--arch/loongarch/lib/memset.S116
28 files changed, 1666 insertions, 295 deletions
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index f44f6ea54e46..d38b066fc931 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -10,6 +10,7 @@ config LOONGARCH
select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
+ select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
@@ -93,6 +94,7 @@ config LOONGARCH
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_ARGS
+ select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN
@@ -100,6 +102,7 @@ config LOONGARCH
select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ARG_ACCESS_API
+ select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_GENERIC_VDSO
@@ -118,6 +121,8 @@ config LOONGARCH
select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RSEQ
+ select HAVE_SAMPLE_FTRACE_DIRECT
+ select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
select HAVE_SETUP_PER_CPU_AREA if NUMA
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index f71edf574101..a27e264bdaa5 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -115,6 +115,8 @@ endif
libs-y += arch/loongarch/lib/
libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
+drivers-y += arch/loongarch/crypto/
+
# suspend and hibernation support
drivers-$(CONFIG_PM) += arch/loongarch/power/
diff --git a/arch/loongarch/crypto/Kconfig b/arch/loongarch/crypto/Kconfig
new file mode 100644
index 000000000000..200a6e8b43b1
--- /dev/null
+++ b/arch/loongarch/crypto/Kconfig
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "Accelerated Cryptographic Algorithms for CPU (loongarch)"
+
+config CRYPTO_CRC32_LOONGARCH
+ tristate "CRC32c and CRC32"
+ select CRC32
+ select CRYPTO_HASH
+ help
+ CRC32c and CRC32 CRC algorithms
+
+ Architecture: LoongArch with CRC32 instructions
+
+endmenu
diff --git a/arch/loongarch/crypto/Makefile b/arch/loongarch/crypto/Makefile
new file mode 100644
index 000000000000..d22613d27ce9
--- /dev/null
+++ b/arch/loongarch/crypto/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for LoongArch crypto files..
+#
+
+obj-$(CONFIG_CRYPTO_CRC32_LOONGARCH) += crc32-loongarch.o
diff --git a/arch/loongarch/crypto/crc32-loongarch.c b/arch/loongarch/crypto/crc32-loongarch.c
new file mode 100644
index 000000000000..1f2a2c3839bc
--- /dev/null
+++ b/arch/loongarch/crypto/crc32-loongarch.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * crc32.c - CRC32 and CRC32C using LoongArch crc* instructions
+ *
+ * Module based on mips/crypto/crc32-mips.c
+ *
+ * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
+ * Copyright (C) 2018 MIPS Tech, LLC
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/module.h>
+#include <crypto/internal/hash.h>
+
+#include <asm/cpu-features.h>
+#include <asm/unaligned.h>
+
+#define _CRC32(crc, value, size, type) \
+do { \
+ __asm__ __volatile__( \
+ #type ".w." #size ".w" " %0, %1, %0\n\t"\
+ : "+r" (crc) \
+ : "r" (value) \
+ : "memory"); \
+} while (0)
+
+#define CRC32(crc, value, size) _CRC32(crc, value, size, crc)
+#define CRC32C(crc, value, size) _CRC32(crc, value, size, crcc)
+
+static u32 crc32_loongarch_hw(u32 crc_, const u8 *p, unsigned int len)
+{
+ u32 crc = crc_;
+
+ while (len >= sizeof(u64)) {
+ u64 value = get_unaligned_le64(p);
+
+ CRC32(crc, value, d);
+ p += sizeof(u64);
+ len -= sizeof(u64);
+ }
+
+ if (len & sizeof(u32)) {
+ u32 value = get_unaligned_le32(p);
+
+ CRC32(crc, value, w);
+ p += sizeof(u32);
+ len -= sizeof(u32);
+ }
+
+ if (len & sizeof(u16)) {
+ u16 value = get_unaligned_le16(p);
+
+ CRC32(crc, value, h);
+ p += sizeof(u16);
+ }
+
+ if (len & sizeof(u8)) {
+ u8 value = *p++;
+
+ CRC32(crc, value, b);
+ }
+
+ return crc;
+}
+
+static u32 crc32c_loongarch_hw(u32 crc_, const u8 *p, unsigned int len)
+{
+ u32 crc = crc_;
+
+ while (len >= sizeof(u64)) {
+ u64 value = get_unaligned_le64(p);
+
+ CRC32C(crc, value, d);
+ p += sizeof(u64);
+ len -= sizeof(u64);
+ }
+
+ if (len & sizeof(u32)) {
+ u32 value = get_unaligned_le32(p);
+
+ CRC32C(crc, value, w);
+ p += sizeof(u32);
+ len -= sizeof(u32);
+ }
+
+ if (len & sizeof(u16)) {
+ u16 value = get_unaligned_le16(p);
+
+ CRC32C(crc, value, h);
+ p += sizeof(u16);
+ }
+
+ if (len & sizeof(u8)) {
+ u8 value = *p++;
+
+ CRC32C(crc, value, b);
+ }
+
+ return crc;
+}
+
+#define CHKSUM_BLOCK_SIZE 1
+#define CHKSUM_DIGEST_SIZE 4
+
+struct chksum_ctx {
+ u32 key;
+};
+
+struct chksum_desc_ctx {
+ u32 crc;
+};
+
+static int chksum_init(struct shash_desc *desc)
+{
+ struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ ctx->crc = mctx->key;
+
+ return 0;
+}
+
+/*
+ * Setting the seed allows arbitrary accumulators and flexible XOR policy
+ * If your algorithm starts with ~0, then XOR with ~0 before you set the seed.
+ */
+static int chksum_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen)
+{
+ struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
+
+ if (keylen != sizeof(mctx->key))
+ return -EINVAL;
+
+ mctx->key = get_unaligned_le32(key);
+
+ return 0;
+}
+
+static int chksum_update(struct shash_desc *desc, const u8 *data, unsigned int length)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ ctx->crc = crc32_loongarch_hw(ctx->crc, data, length);
+ return 0;
+}
+
+static int chksumc_update(struct shash_desc *desc, const u8 *data, unsigned int length)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ ctx->crc = crc32c_loongarch_hw(ctx->crc, data, length);
+ return 0;
+}
+
+static int chksum_final(struct shash_desc *desc, u8 *out)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ put_unaligned_le32(ctx->crc, out);
+ return 0;
+}
+
+static int chksumc_final(struct shash_desc *desc, u8 *out)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ put_unaligned_le32(~ctx->crc, out);
+ return 0;
+}
+
+static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
+{
+ put_unaligned_le32(crc32_loongarch_hw(crc, data, len), out);
+ return 0;
+}
+
+static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
+{
+ put_unaligned_le32(~crc32c_loongarch_hw(crc, data, len), out);
+ return 0;
+}
+
+static int chksum_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ return __chksum_finup(ctx->crc, data, len, out);
+}
+
+static int chksumc_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ return __chksumc_finup(ctx->crc, data, len, out);
+}
+
+static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out)
+{
+ struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+ return __chksum_finup(mctx->key, data, length, out);
+}
+
+static int chksumc_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out)
+{
+ struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+ return __chksumc_finup(mctx->key, data, length, out);
+}
+
+static int chksum_cra_init(struct crypto_tfm *tfm)
+{
+ struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
+ mctx->key = 0;
+ return 0;
+}
+
+static int chksumc_cra_init(struct crypto_tfm *tfm)
+{
+ struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
+ mctx->key = ~0;
+ return 0;
+}
+
+static struct shash_alg crc32_alg = {
+ .digestsize = CHKSUM_DIGEST_SIZE,
+ .setkey = chksum_setkey,
+ .init = chksum_init,
+ .update = chksum_update,
+ .final = chksum_final,
+ .finup = chksum_finup,
+ .digest = chksum_digest,
+ .descsize = sizeof(struct chksum_desc_ctx),
+ .base = {
+ .cra_name = "crc32",
+ .cra_driver_name = "crc32-loongarch",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .cra_blocksize = CHKSUM_BLOCK_SIZE,
+ .cra_alignmask = 0,
+ .cra_ctxsize = sizeof(struct chksum_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_init = chksum_cra_init,
+ }
+};
+
+static struct shash_alg crc32c_alg = {
+ .digestsize = CHKSUM_DIGEST_SIZE,
+ .setkey = chksum_setkey,
+ .init = chksum_init,
+ .update = chksumc_update,
+ .final = chksumc_final,
+ .finup = chksumc_finup,
+ .digest = chksumc_digest,
+ .descsize = sizeof(struct chksum_desc_ctx),
+ .base = {
+ .cra_name = "crc32c",
+ .cra_driver_name = "crc32c-loongarch",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .cra_blocksize = CHKSUM_BLOCK_SIZE,
+ .cra_alignmask = 0,
+ .cra_ctxsize = sizeof(struct chksum_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_init = chksumc_cra_init,
+ }
+};
+
+static int __init crc32_mod_init(void)
+{
+ int err;
+
+ if (!cpu_has(CPU_FEATURE_CRC32))
+ return 0;
+
+ err = crypto_register_shash(&crc32_alg);
+ if (err)
+ return err;
+
+ err = crypto_register_shash(&crc32c_alg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static void __exit crc32_mod_exit(void)
+{
+ if (!cpu_has(CPU_FEATURE_CRC32))
+ return;
+
+ crypto_unregister_shash(&crc32_alg);
+ crypto_unregister_shash(&crc32c_alg);
+}
+
+module_init(crc32_mod_init);
+module_exit(crc32_mod_exit);
+
+MODULE_AUTHOR("Min Zhou <zhoumin@loongson.cn>");
+MODULE_AUTHOR("Huacai Chen <chenhuacai@loongson.cn>");
+MODULE_DESCRIPTION("CRC32 and CRC32C using LoongArch crc* instructions");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/loongarch/include/asm/checksum.h b/arch/loongarch/include/asm/checksum.h
new file mode 100644
index 000000000000..cabbf6af44c4
--- /dev/null
+++ b/arch/loongarch/include/asm/checksum.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2016 ARM Ltd.
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+#ifndef __ASM_CHECKSUM_H
+#define __ASM_CHECKSUM_H
+
+#include <linux/bitops.h>
+#include <linux/in6.h>
+
+#define _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum sum);
+
+/*
+ * turns a 32-bit partial checksum (e.g. from csum_partial) into a
+ * 1's complement 16-bit checksum.
+ */
+static inline __sum16 csum_fold(__wsum sum)
+{
+ u32 tmp = (__force u32)sum;
+
+ /*
+ * swap the two 16-bit halves of sum
+ * if there is a carry from adding the two 16-bit halves,
+ * it will carry from the lower half into the upper half,
+ * giving us the correct sum in the upper half.
+ */
+ return (__force __sum16)(~(tmp + rol32(tmp, 16)) >> 16);
+}
+#define csum_fold csum_fold
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries. ihl is the number
+ * of 32-bit words and is always >= 5.
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+ u64 sum;
+ __uint128_t tmp;
+ int n = ihl; /* we want it signed */
+
+ tmp = *(const __uint128_t *)iph;
+ iph += 16;
+ n -= 4;
+ tmp += ((tmp >> 64) | (tmp << 64));
+ sum = tmp >> 64;
+ do {
+ sum += *(const u32 *)iph;
+ iph += 4;
+ } while (--n > 0);
+
+ sum += ror64(sum, 32);
+ return csum_fold((__force __wsum)(sum >> 32));
+}
+#define ip_fast_csum ip_fast_csum
+
+extern unsigned int do_csum(const unsigned char *buff, int len);
+#define do_csum do_csum
+
+#include <asm-generic/checksum.h>
+
+#endif /* __ASM_CHECKSUM_H */
diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h
index 358b254d9c1d..192f8e35d912 100644
--- a/arch/loongarch/include/asm/fpu.h
+++ b/arch/loongarch/include/asm/fpu.h
@@ -21,6 +21,9 @@
struct sigcontext;
+extern void kernel_fpu_begin(void);
+extern void kernel_fpu_end(void);
+
extern void _init_fpu(unsigned int);
extern void _save_fp(struct loongarch_fpu *);
extern void _restore_fp(struct loongarch_fpu *);
diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h
index 3418d32d4fc7..23e2ba78dcb0 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -54,9 +54,46 @@ static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *
return &fregs->regs;
}
+static __always_inline unsigned long
+ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs)
+{
+ return instruction_pointer(&fregs->regs);
+}
+
+static __always_inline void
+ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip)
+{
+ regs_set_return_value(&fregs->regs, ip);
+}
+
+#define ftrace_regs_get_argument(fregs, n) \
+ regs_get_kernel_argument(&(fregs)->regs, n)
+#define ftrace_regs_get_stack_pointer(fregs) \
+ kernel_stack_pointer(&(fregs)->regs)
+#define ftrace_regs_return_value(fregs) \
+ regs_return_value(&(fregs)->regs)
+#define ftrace_regs_set_return_value(fregs, ret) \
+ regs_set_return_value(&(fregs)->regs, ret)
+#define ftrace_override_function_with_return(fregs) \
+ override_function_with_return(&(fregs)->regs)
+#define ftrace_regs_query_register_offset(name) \
+ regs_query_register_offset(name)
+
#define ftrace_graph_func ftrace_graph_func
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs);
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+static inline void
+__arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
+{
+ regs->regs[13] = addr; /* t1 */
+}
+
+#define arch_ftrace_set_direct_caller(fregs, addr) \
+ __arch_ftrace_set_direct_caller(&(fregs)->regs, addr)
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
#endif
#endif /* __ASSEMBLY__ */
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index a04fe755d719..b09887ffcd15 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -121,6 +121,8 @@ enum reg2bstrd_op {
};
enum reg3_op {
+ asrtle_op = 0x02,
+ asrtgt_op = 0x03,
addw_op = 0x20,
addd_op = 0x21,
subw_op = 0x22,
@@ -176,6 +178,30 @@ enum reg3_op {
amord_op = 0x70c7,
amxorw_op = 0x70c8,
amxord_op = 0x70c9,
+ fldgts_op = 0x70e8,
+ fldgtd_op = 0x70e9,
+ fldles_op = 0x70ea,
+ fldled_op = 0x70eb,
+ fstgts_op = 0x70ec,
+ fstgtd_op = 0x70ed,
+ fstles_op = 0x70ee,
+ fstled_op = 0x70ef,
+ ldgtb_op = 0x70f0,
+ ldgth_op = 0x70f1,
+ ldgtw_op = 0x70f2,
+ ldgtd_op = 0x70f3,
+ ldleb_op = 0x70f4,
+ ldleh_op = 0x70f5,
+ ldlew_op = 0x70f6,
+ ldled_op = 0x70f7,
+ stgtb_op = 0x70f8,
+ stgth_op = 0x70f9,
+ stgtw_op = 0x70fa,
+ stgtd_op = 0x70fb,
+ stleb_op = 0x70fc,
+ stleh_op = 0x70fd,
+ stlew_op = 0x70fe,
+ stled_op = 0x70ff,
};
enum reg3sa2_op {
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 83da5d29e2d1..b3323ab5b78d 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -311,8 +311,8 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
#define CSR_ECFG_VS_WIDTH 3
#define CSR_ECFG_VS (_ULCAST_(0x7) << CSR_ECFG_VS_SHIFT)
#define CSR_ECFG_IM_SHIFT 0
-#define CSR_ECFG_IM_WIDTH 13
-#define CSR_ECFG_IM (_ULCAST_(0x1fff) << CSR_ECFG_IM_SHIFT)
+#define CSR_ECFG_IM_WIDTH 14
+#define CSR_ECFG_IM (_ULCAST_(0x3fff) << CSR_ECFG_IM_SHIFT)
#define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */
#define CSR_ESTAT_ESUBCODE_SHIFT 22
@@ -322,8 +322,8 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
#define CSR_ESTAT_EXC_WIDTH 6
#define CSR_ESTAT_EXC (_ULCAST_(0x3f) << CSR_ESTAT_EXC_SHIFT)
#define CSR_ESTAT_IS_SHIFT 0
-#define CSR_ESTAT_IS_WIDTH 15
-#define CSR_ESTAT_IS (_ULCAST_(0x7fff) << CSR_ESTAT_IS_SHIFT)
+#define CSR_ESTAT_IS_WIDTH 14
+#define CSR_ESTAT_IS (_ULCAST_(0x3fff) << CSR_ESTAT_IS_SHIFT)
#define LOONGARCH_CSR_ERA 0x6 /* ERA */
@@ -1090,7 +1090,7 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
#define ECFGF_IPI (_ULCAST_(1) << ECFGB_IPI)
#define ECFGF(hwirq) (_ULCAST_(1) << hwirq)
-#define ESTATF_IP 0x00001fff
+#define ESTATF_IP 0x00003fff
#define LOONGARCH_IOCSR_FEATURES 0x8
#define IOCSRF_TEMP BIT_ULL(0)
@@ -1397,7 +1397,7 @@ __BUILD_CSR_OP(tlbidx)
#define EXSUBCODE_ADEF 0 /* Fetch Instruction */
#define EXSUBCODE_ADEM 1 /* Access Memory*/
#define EXCCODE_ALE 9 /* Unalign Access */
-#define EXCCODE_OOB 10 /* Out of bounds */
+#define EXCCODE_BCE 10 /* Bounds Check Error */
#define EXCCODE_SYS 11 /* System call */
#define EXCCODE_BP 12 /* Breakpoint */
#define EXCCODE_INE 13 /* Inst. Not Exist */
@@ -1408,33 +1408,38 @@ __BUILD_CSR_OP(tlbidx)
#define EXCCODE_FPE 18 /* Floating Point Exception */
#define EXCSUBCODE_FPE 0 /* Floating Point Exception */
#define EXCSUBCODE_VFPE 1 /* Vector Exception */
-#define EXCCODE_WATCH 19 /* Watch address reference */
+#define EXCCODE_WATCH 19 /* WatchPoint Exception */
+ #define EXCSUBCODE_WPEF 0 /* ... on Instruction Fetch */
+ #define EXCSUBCODE_WPEM 1 /* ... on Memory Accesses */
#define EXCCODE_BTDIS 20 /* Binary Trans. Disabled */
#define EXCCODE_BTE 21 /* Binary Trans. Exception */
-#define EXCCODE_PSI 22 /* Guest Privileged Error */
-#define EXCCODE_HYP 23 /* Hypercall */
+#define EXCCODE_GSPR 22 /* Guest Privileged Error */
+#define EXCCODE_HVC 23 /* Hypercall */
#define EXCCODE_GCM 24 /* Guest CSR modified */
#define EXCSUBCODE_GCSC 0 /* Software caused */
#define EXCSUBCODE_GCHC 1 /* Hardware caused */
#define EXCCODE_SE 25 /* Security */
-#define EXCCODE_INT_START 64
-#define EXCCODE_SIP0 64
-#define EXCCODE_SIP1 65
-#define EXCCODE_IP0 66
-#define EXCCODE_IP1 67
-#define EXCCODE_IP2 68
-#define EXCCODE_IP3 69
-#define EXCCODE_IP4 70
-#define EXCCODE_IP5 71
-#define EXCCODE_IP6 72
-#define EXCCODE_IP7 73
-#define EXCCODE_PMC 74 /* Performance Counter */
-#define EXCCODE_TIMER 75
-#define EXCCODE_IPI 76
-#define EXCCODE_NMI 77
-#define EXCCODE_INT_END 78
-#define EXCCODE_INT_NUM (EXCCODE_INT_END - EXCCODE_INT_START)
+/* Interrupt numbers */
+#define INT_SWI0 0 /* Software Interrupts */
+#define INT_SWI1 1
+#define INT_HWI0 2 /* Hardware Interrupts */
+#define INT_HWI1 3
+#define INT_HWI2 4
+#define INT_HWI3 5
+#define INT_HWI4 6
+#define INT_HWI5 7
+#define INT_HWI6 8
+#define INT_HWI7 9
+#define INT_PCOV 10 /* Performance Counter Overflow */
+#define INT_TI 11 /* Timer */
+#define INT_IPI 12
+#define INT_NMI 13
+
+/* ExcCodes corresponding to interrupts */
+#define EXCCODE_INT_NUM (INT_NMI + 1)
+#define EXCCODE_INT_START 64
+#define EXCCODE_INT_END (EXCCODE_INT_START + EXCCODE_INT_NUM - 1)
/* FPU register names */
#define LOONGARCH_FCSR0 $r0
diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h
index d761db943335..35f0958163ac 100644
--- a/arch/loongarch/include/asm/ptrace.h
+++ b/arch/loongarch/include/asm/ptrace.h
@@ -154,6 +154,11 @@ static inline long regs_return_value(struct pt_regs *regs)
return regs->regs[4];
}
+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long val)
+{
+ regs->regs[4] = val;
+}
+
#define instruction_pointer(regs) ((regs)->csr_era)
#define profile_pc(regs) instruction_pointer(regs)
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 78d4e3384305..9a72d91cd104 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -13,7 +13,7 @@ obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_EFI) += efi.o
-obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
+obj-$(CONFIG_CPU_HAS_FPU) += fpu.o kfpu.o
obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o
diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c
index 4a3ef8516ccc..73858c9029cc 100644
--- a/arch/loongarch/kernel/ftrace_dyn.c
+++ b/arch/loongarch/kernel/ftrace_dyn.c
@@ -30,19 +30,12 @@ static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate)
return 0;
}
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-
#ifdef CONFIG_MODULES
-static inline int __get_mod(struct module **mod, unsigned long addr)
+static bool reachable_by_bl(unsigned long addr, unsigned long pc)
{
- preempt_disable();
- *mod = __module_text_address(addr);
- preempt_enable();
+ long offset = (long)addr - (long)pc;
- if (WARN_ON(!(*mod)))
- return -EINVAL;
-
- return 0;
+ return offset >= -SZ_128M && offset < SZ_128M;
}
static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
@@ -58,51 +51,88 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
return NULL;
}
-static unsigned long get_plt_addr(struct module *mod, unsigned long addr)
+/*
+ * Find the address the callsite must branch to in order to reach '*addr'.
+ *
+ * Due to the limited range of 'bl' instruction, modules may be placed too far
+ * away to branch directly and we must use a PLT.
+ *
+ * Returns true when '*addr' contains a reachable target address, or has been
+ * modified to contain a PLT address. Returns false otherwise.
+ */
+static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, struct module *mod, unsigned long *addr)
{
+ unsigned long pc = rec->ip + LOONGARCH_INSN_SIZE;
struct plt_entry *plt;
- plt = get_ftrace_plt(mod, addr);
+ /*
+ * If a custom trampoline is unreachable, rely on the ftrace_regs_caller
+ * trampoline which knows how to indirectly reach that trampoline through
+ * ops->direct_call.
+ */
+ if (*addr != FTRACE_ADDR && *addr != FTRACE_REGS_ADDR && !reachable_by_bl(*addr, pc))
+ *addr = FTRACE_REGS_ADDR;
+
+ /*
+ * When the target is within range of the 'bl' instruction, use 'addr'
+ * as-is and branch to that directly.
+ */
+ if (reachable_by_bl(*addr, pc))
+ return true;
+
+ /*
+ * 'mod' is only set at module load time, but if we end up
+ * dealing with an out-of-range condition, we can assume it
+ * is due to a module being loaded far away from the kernel.
+ *
+ * NOTE: __module_text_address() must be called with preemption
+ * disabled, but we can rely on ftrace_lock to ensure that 'mod'
+ * retains its validity throughout the remainder of this code.
+ */
+ if (!mod) {
+ preempt_disable();
+ mod = __module_text_address(pc);
+ preempt_enable();
+ }
+
+ if (WARN_ON(!mod))
+ return false;
+
+ plt = get_ftrace_plt(mod, *addr);
if (!plt) {
- pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
- return -EINVAL;
+ pr_err("ftrace: no module PLT for %ps\n", (void *)*addr);
+ return false;
}
- return (unsigned long)plt;
+ *addr = (unsigned long)plt;
+ return true;
+}
+#else /* !CONFIG_MODULES */
+static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, struct module *mod, unsigned long *addr)
+{
+ return true;
}
#endif
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr)
{
u32 old, new;
unsigned long pc;
- long offset __maybe_unused;
pc = rec->ip + LOONGARCH_INSN_SIZE;
-#ifdef CONFIG_MODULES
- offset = (long)pc - (long)addr;
-
- if (offset < -SZ_128M || offset >= SZ_128M) {
- int ret;
- struct module *mod;
-
- ret = __get_mod(&mod, pc);
- if (ret)
- return ret;
-
- addr = get_plt_addr(mod, addr);
+ if (!ftrace_find_callable_addr(rec, NULL, &addr))
+ return -EINVAL;
- old_addr = get_plt_addr(mod, old_addr);
- }
-#endif
+ if (!ftrace_find_callable_addr(rec, NULL, &old_addr))
+ return -EINVAL;
new = larch_insn_gen_bl(pc, addr);
old = larch_insn_gen_bl(pc, old_addr);
return ftrace_modify_code(pc, old, new, true);
}
-
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
int ftrace_update_ftrace_func(ftrace_func_t func)
@@ -153,24 +183,11 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
u32 old, new;
unsigned long pc;
- long offset __maybe_unused;
pc = rec->ip + LOONGARCH_INSN_SIZE;
-#ifdef CONFIG_MODULES
- offset = (long)pc - (long)addr;
-
- if (offset < -SZ_128M || offset >= SZ_128M) {
- int ret;
- struct module *mod;
-
- ret = __get_mod(&mod, pc);
- if (ret)
- return ret;
-
- addr = get_plt_addr(mod, addr);
- }
-#endif
+ if (!ftrace_find_callable_addr(rec, NULL, &addr))
+ return -EINVAL;
old = larch_insn_gen_nop();
new = larch_insn_gen_bl(pc, addr);
@@ -182,24 +199,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long ad
{
u32 old, new;
unsigned long pc;
- long offset __maybe_unused;
pc = rec->ip + LOONGARCH_INSN_SIZE;
-#ifdef CONFIG_MODULES
- offset = (long)pc - (long)addr;
-
- if (offset < -SZ_128M || offset >= SZ_128M) {
- int ret;
- struct module *mod;
-
- ret = __get_mod(&mod, pc);
- if (ret)
- return ret;
-
- addr = get_plt_addr(mod, addr);
- }
-#endif
+ if (!ftrace_find_callable_addr(rec, NULL, &addr))
+ return -EINVAL;
new = larch_insn_gen_nop();
old = larch_insn_gen_bl(pc, addr);
diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S
index 44ff1ff64260..78f066384657 100644
--- a/arch/loongarch/kernel/genex.S
+++ b/arch/loongarch/kernel/genex.S
@@ -82,6 +82,7 @@ SYM_FUNC_END(except_vec_cex)
BUILD_HANDLER ade ade badv
BUILD_HANDLER ale ale badv
+ BUILD_HANDLER bce bce none
BUILD_HANDLER bp bp none
BUILD_HANDLER fpe fpe fcsr
BUILD_HANDLER fpu fpu none
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index 0524bf1169b7..883e5066ae44 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -92,7 +92,7 @@ static int __init get_ipi_irq(void)
struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
if (d)
- return irq_create_mapping(d, EXCCODE_IPI - EXCCODE_INT_START);
+ return irq_create_mapping(d, INT_IPI);
return -EINVAL;
}
diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c
new file mode 100644
index 000000000000..5c46ae8c6cac
--- /dev/null
+++ b/arch/loongarch/kernel/kfpu.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <asm/fpu.h>
+#include <asm/smp.h>
+
+static DEFINE_PER_CPU(bool, in_kernel_fpu);
+
+void kernel_fpu_begin(void)
+{
+ preempt_disable();
+
+ WARN_ON(this_cpu_read(in_kernel_fpu));
+
+ this_cpu_write(in_kernel_fpu, true);
+
+ if (!is_fpu_owner())
+ enable_fpu();
+ else
+ _save_fp(&current->thread.fpu);
+
+ write_fcsr(LOONGARCH_FCSR0, 0);
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_begin);
+
+void kernel_fpu_end(void)
+{
+ WARN_ON(!this_cpu_read(in_kernel_fpu));
+
+ if (!is_fpu_owner())
+ disable_fpu();
+ else
+ _restore_fp(&current->thread.fpu);
+
+ this_cpu_write(in_kernel_fpu, false);
+
+ preempt_enable();
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_end);
diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S
index bbabf06244c2..c7d961fc72c2 100644
--- a/arch/loongarch/kernel/mcount_dyn.S
+++ b/arch/loongarch/kernel/mcount_dyn.S
@@ -42,7 +42,6 @@
.if \allregs
PTR_S tp, sp, PT_R2
PTR_S t0, sp, PT_R12
- PTR_S t1, sp, PT_R13
PTR_S t2, sp, PT_R14
PTR_S t3, sp, PT_R15
PTR_S t4, sp, PT_R16
@@ -64,6 +63,8 @@
PTR_S zero, sp, PT_R0
.endif
PTR_S ra, sp, PT_ERA /* Save trace function ra at PT_ERA */
+ move t1, zero
+ PTR_S t1, sp, PT_R13
PTR_ADDI t8, sp, PT_SIZE
PTR_S t8, sp, PT_R3
.endm
@@ -104,8 +105,12 @@ ftrace_common_return:
PTR_L a7, sp, PT_R11
PTR_L fp, sp, PT_R22
PTR_L t0, sp, PT_ERA
+ PTR_L t1, sp, PT_R13
PTR_ADDI sp, sp, PT_SIZE
+ bnez t1, .Ldirect
jr t0
+.Ldirect:
+ jr t1
SYM_CODE_END(ftrace_common)
SYM_CODE_START(ftrace_caller)
@@ -147,3 +152,9 @@ SYM_CODE_START(return_to_handler)
jr ra
SYM_CODE_END(return_to_handler)
#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+SYM_CODE_START(ftrace_stub_direct_tramp)
+ jr t0
+SYM_CODE_END(ftrace_stub_direct_tramp)
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c
index 707bd32e5c4f..ff28f99b47d7 100644
--- a/arch/loongarch/kernel/perf_event.c
+++ b/arch/loongarch/kernel/perf_event.c
@@ -461,7 +461,7 @@ static int get_pmc_irq(void)
struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
if (d)
- return irq_create_mapping(d, EXCCODE_PMC - EXCCODE_INT_START);
+ return irq_create_mapping(d, INT_PCOV);
return -EINVAL;
}
diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c
index 4351f69d9950..f377e50f3c66 100644
--- a/arch/loongarch/kernel/time.c
+++ b/arch/loongarch/kernel/time.c
@@ -133,7 +133,7 @@ static int get_timer_irq(void)
struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
if (d)
- return irq_create_mapping(d, EXCCODE_TIMER - EXCCODE_INT_START);
+ return irq_create_mapping(d, INT_TI);
return -EINVAL;
}
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index de8ebe20b666..8db26e4ca447 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -3,6 +3,7 @@
* Author: Huacai Chen <chenhuacai@loongson.cn>
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
+#include <linux/bitfield.h>
#include <linux/bitops.h>
#include <linux/bug.h>
#include <linux/compiler.h>
@@ -35,6 +36,7 @@
#include <asm/break.h>
#include <asm/cpu.h>
#include <asm/fpu.h>
+#include <asm/inst.h>
#include <asm/loongarch.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
@@ -50,6 +52,7 @@
extern asmlinkage void handle_ade(void);
extern asmlinkage void handle_ale(void);
+extern asmlinkage void handle_bce(void);
extern asmlinkage void handle_sys(void);
extern asmlinkage void handle_bp(void);
extern asmlinkage void handle_ri(void);
@@ -153,53 +156,210 @@ static void show_code(unsigned int *pc, bool user)
pr_cont("\n");
}
-static void __show_regs(const struct pt_regs *regs)
+static void print_bool_fragment(const char *key, unsigned long val, bool first)
{
- const int field = 2 * sizeof(unsigned long);
- unsigned int excsubcode;
- unsigned int exccode;
- int i;
+ /* e.g. "+PG", "-DA" */
+ pr_cont("%s%c%s", first ? "" : " ", val ? '+' : '-', key);
+}
- show_regs_print_info(KERN_DEFAULT);
+static void print_plv_fragment(const char *key, int val)
+{
+ /* e.g. "PLV0", "PPLV3" */
+ pr_cont("%s%d", key, val);
+}
- /*
- * Saved main processor registers
- */
- for (i = 0; i < 32; ) {
- if ((i % 4) == 0)
- printk("$%2d :", i);
- pr_cont(" %0*lx", field, regs->regs[i]);
+static void print_memory_type_fragment(const char *key, unsigned long val)
+{
+ const char *humanized_type;
- i++;
- if ((i % 4) == 0)
- pr_cont("\n");
+ switch (val) {
+ case 0:
+ humanized_type = "SUC";
+ break;
+ case 1:
+ humanized_type = "CC";
+ break;
+ case 2:
+ humanized_type = "WUC";
+ break;
+ default:
+ pr_cont(" %s=Reserved(%lu)", key, val);
+ return;
}
+ /* e.g. " DATM=WUC" */
+ pr_cont(" %s=%s", key, humanized_type);
+}
+
+static void print_intr_fragment(const char *key, unsigned long val)
+{
+ /* e.g. "LIE=0-1,3,5-7" */
+ pr_cont("%s=%*pbl", key, EXCCODE_INT_NUM, &val);
+}
+
+static void print_crmd(unsigned long x)
+{
+ printk(" CRMD: %08lx (", x);
+ print_plv_fragment("PLV", (int) FIELD_GET(CSR_CRMD_PLV, x));
+ print_bool_fragment("IE", FIELD_GET(CSR_CRMD_IE, x), false);
+ print_bool_fragment("DA", FIELD_GET(CSR_CRMD_DA, x), false);
+ print_bool_fragment("PG", FIELD_GET(CSR_CRMD_PG, x), false);
+ print_memory_type_fragment("DACF", FIELD_GET(CSR_CRMD_DACF, x));
+ print_memory_type_fragment("DACM", FIELD_GET(CSR_CRMD_DACM, x));
+ print_bool_fragment("WE", FIELD_GET(CSR_CRMD_WE, x), false);
+ pr_cont(")\n");
+}
+
+static void print_prmd(unsigned long x)
+{
+ printk(" PRMD: %08lx (", x);
+ print_plv_fragment("PPLV", (int) FIELD_GET(CSR_PRMD_PPLV, x));
+ print_bool_fragment("PIE", FIELD_GET(CSR_PRMD_PIE, x), false);
+ print_bool_fragment("PWE", FIELD_GET(CSR_PRMD_PWE, x), false);
+ pr_cont(")\n");
+}
+
+static void print_euen(unsigned long x)
+{
+ printk(" EUEN: %08lx (", x);
+ print_bool_fragment("FPE", FIELD_GET(CSR_EUEN_FPEN, x), true);
+ print_bool_fragment("SXE", FIELD_GET(CSR_EUEN_LSXEN, x), false);
+ print_bool_fragment("ASXE", FIELD_GET(CSR_EUEN_LASXEN, x), false);
+ print_bool_fragment("BTE", FIELD_GET(CSR_EUEN_LBTEN, x), false);
+ pr_cont(")\n");
+}
+
+static void print_ecfg(unsigned long x)
+{
+ printk(" ECFG: %08lx (", x);
+ print_intr_fragment("LIE", FIELD_GET(CSR_ECFG_IM, x));
+ pr_cont(" VS=%d)\n", (int) FIELD_GET(CSR_ECFG_VS, x));
+}
+
+static const char *humanize_exc_name(unsigned int ecode, unsigned int esubcode)
+{
+ /*
+ * LoongArch users and developers are probably more familiar with
+ * those names found in the ISA manual, so we are going to print out
+ * the latter. This will require some mapping.
+ */
+ switch (ecode) {
+ case EXCCODE_RSV: return "INT";
+ case EXCCODE_TLBL: return "PIL";
+ case EXCCODE_TLBS: return "PIS";
+ case EXCCODE_TLBI: return "PIF";
+ case EXCCODE_TLBM: return "PME";
+ case EXCCODE_TLBNR: return "PNR";
+ case EXCCODE_TLBNX: return "PNX";
+ case EXCCODE_TLBPE: return "PPI";
+ case EXCCODE_ADE:
+ switch (esubcode) {
+ case EXSUBCODE_ADEF: return "ADEF";
+ case EXSUBCODE_ADEM: return "ADEM";
+ }
+ break;
+ case EXCCODE_ALE: return "ALE";
+ case EXCCODE_BCE: return "BCE";
+ case EXCCODE_SYS: return "SYS";
+ case EXCCODE_BP: return "BRK";
+ case EXCCODE_INE: return "INE";
+ case EXCCODE_IPE: return "IPE";
+ case EXCCODE_FPDIS: return "FPD";
+ case EXCCODE_LSXDIS: return "SXD";
+ case EXCCODE_LASXDIS: return "ASXD";
+ case EXCCODE_FPE:
+ switch (esubcode) {
+ case EXCSUBCODE_FPE: return "FPE";
+ case EXCSUBCODE_VFPE: return "VFPE";
+ }
+ break;
+ case EXCCODE_WATCH:
+ switch (esubcode) {
+ case EXCSUBCODE_WPEF: return "WPEF";
+ case EXCSUBCODE_WPEM: return "WPEM";
+ }
+ break;
+ case EXCCODE_BTDIS: return "BTD";
+ case EXCCODE_BTE: return "BTE";
+ case EXCCODE_GSPR: return "GSPR";
+ case EXCCODE_HVC: return "HVC";
+ case EXCCODE_GCM:
+ switch (esubcode) {
+ case EXCSUBCODE_GCSC: return "GCSC";
+ case EXCSUBCODE_GCHC: return "GCHC";
+ }
+ break;
/*
- * Saved csr registers
+ * The manual did not mention the EXCCODE_SE case, but print out it
+ * nevertheless.
*/
- printk("era : %0*lx %pS\n", field, regs->csr_era,
- (void *) regs->csr_era);
- printk("ra : %0*lx %pS\n", field, regs->regs[1],
- (void *) regs->regs[1]);
+ case EXCCODE_SE: return "SE";
+ }
- printk("CSR crmd: %08lx ", regs->csr_crmd);
- printk("CSR prmd: %08lx ", regs->csr_prmd);
- printk("CSR euen: %08lx ", regs->csr_euen);
- printk("CSR ecfg: %08lx ", regs->csr_ecfg);
- printk("CSR estat: %08lx ", regs->csr_estat);
+ return "???";
+}
- pr_cont("\n");
+static void print_estat(unsigned long x)
+{
+ unsigned int ecode = FIELD_GET(CSR_ESTAT_EXC, x);
+ unsigned int esubcode = FIELD_GET(CSR_ESTAT_ESUBCODE, x);
+
+ printk("ESTAT: %08lx [%s] (", x, humanize_exc_name(ecode, esubcode));
+ print_intr_fragment("IS", FIELD_GET(CSR_ESTAT_IS, x));
+ pr_cont(" ECode=%d EsubCode=%d)\n", (int) ecode, (int) esubcode);
+}
+
+static void __show_regs(const struct pt_regs *regs)
+{
+ const int field = 2 * sizeof(unsigned long);
+ unsigned int exccode = FIELD_GET(CSR_ESTAT_EXC, regs->csr_estat);
+
+ show_regs_print_info(KERN_DEFAULT);
+
+ /* Print saved GPRs except $zero (substituting with PC/ERA) */
+#define GPR_FIELD(x) field, regs->regs[x]
+ printk("pc %0*lx ra %0*lx tp %0*lx sp %0*lx\n",
+ field, regs->csr_era, GPR_FIELD(1), GPR_FIELD(2), GPR_FIELD(3));
+ printk("a0 %0*lx a1 %0*lx a2 %0*lx a3 %0*lx\n",
+ GPR_FIELD(4), GPR_FIELD(5), GPR_FIELD(6), GPR_FIELD(7));
+ printk("a4 %0*lx a5 %0*lx a6 %0*lx a7 %0*lx\n",
+ GPR_FIELD(8), GPR_FIELD(9), GPR_FIELD(10), GPR_FIELD(11));
+ printk("t0 %0*lx t1 %0*lx t2 %0*lx t3 %0*lx\n",
+ GPR_FIELD(12), GPR_FIELD(13), GPR_FIELD(14), GPR_FIELD(15));
+ printk("t4 %0*lx t5 %0*lx t6 %0*lx t7 %0*lx\n",
+ GPR_FIELD(16), GPR_FIELD(17), GPR_FIELD(18), GPR_FIELD(19));
+ printk("t8 %0*lx u0 %0*lx s9 %0*lx s0 %0*lx\n",
+ GPR_FIELD(20), GPR_FIELD(21), GPR_FIELD(22), GPR_FIELD(23));
+ printk("s1 %0*lx s2 %0*lx s3 %0*lx s4 %0*lx\n",
+ GPR_FIELD(24), GPR_FIELD(25), GPR_FIELD(26), GPR_FIELD(27));
+ printk("s5 %0*lx s6 %0*lx s7 %0*lx s8 %0*lx\n",
+ GPR_FIELD(28), GPR_FIELD(29), GPR_FIELD(30), GPR_FIELD(31));
+
+ /* The slot for $zero is reused as the syscall restart flag */
+ if (regs->regs[0])
+ printk("syscall restart flag: %0*lx\n", GPR_FIELD(0));
+
+ if (user_mode(regs)) {
+ printk(" ra: %0*lx\n", GPR_FIELD(1));
+ printk(" ERA: %0*lx\n", field, regs->csr_era);
+ } else {
+ printk(" ra: %0*lx %pS\n", GPR_FIELD(1), (void *) regs->regs[1]);
+ printk(" ERA: %0*lx %pS\n", field, regs->csr_era, (void *) regs->csr_era);
+ }
+#undef GPR_FIELD
- exccode = ((regs->csr_estat) & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT;
- excsubcode = ((regs->csr_estat) & CSR_ESTAT_ESUBCODE) >> CSR_ESTAT_ESUBCODE_SHIFT;
- printk("ExcCode : %x (SubCode %x)\n", exccode, excsubcode);
+ /* Print saved important CSRs */
+ print_crmd(regs->csr_crmd);
+ print_prmd(regs->csr_prmd);
+ print_euen(regs->csr_euen);
+ print_ecfg(regs->csr_ecfg);
+ print_estat(regs->csr_estat);
if (exccode >= EXCCODE_TLBL && exccode <= EXCCODE_ALE)
- printk("BadVA : %0*lx\n", field, regs->csr_badvaddr);
+ printk(" BADV: %0*lx\n", field, regs->csr_badvaddr);
- printk("PrId : %08x (%s)\n", read_cpucfg(LOONGARCH_CPUCFG0),
- cpu_family_string());
+ printk(" PRID: %08x (%s, %s)\n", read_cpucfg(LOONGARCH_CPUCFG0),
+ cpu_family_string(), cpu_full_name_string());
}
void show_regs(struct pt_regs *regs)
@@ -430,6 +590,95 @@ static void bug_handler(struct pt_regs *regs)
}
}
+asmlinkage void noinstr do_bce(struct pt_regs *regs)
+{
+ bool user = user_mode(regs);
+ unsigned long era = exception_era(regs);
+ u64 badv = 0, lower = 0, upper = ULONG_MAX;
+ union loongarch_instruction insn;
+ irqentry_state_t state = irqentry_enter(regs);
+
+ if (regs->csr_prmd & CSR_PRMD_PIE)
+ local_irq_enable();
+
+ current->thread.trap_nr = read_csr_excode();
+
+ die_if_kernel("Bounds check error in kernel code", regs);
+
+ /*
+ * Pull out the address that failed bounds checking, and the lower /
+ * upper bound, by minimally looking at the faulting instruction word
+ * and reading from the correct register.
+ */
+ if (__get_inst(&insn.word, (u32 *)era, user))
+ goto bad_era;
+
+ switch (insn.reg3_format.opcode) {
+ case asrtle_op:
+ if (insn.reg3_format.rd != 0)
+ break; /* not asrtle */
+ badv = regs->regs[insn.reg3_format.rj];
+ upper = regs->regs[insn.reg3_format.rk];
+ break;
+
+ case asrtgt_op:
+ if (insn.reg3_format.rd != 0)
+ break; /* not asrtgt */
+ badv = regs->regs[insn.reg3_format.rj];
+ lower = regs->regs[insn.reg3_format.rk];
+ break;
+
+ case ldleb_op:
+ case ldleh_op:
+ case ldlew_op:
+ case ldled_op:
+ case stleb_op:
+ case stleh_op:
+ case stlew_op:
+ case stled_op:
+ case fldles_op:
+ case fldled_op:
+ case fstles_op:
+ case fstled_op:
+ badv = regs->regs[insn.reg3_format.rj];
+ upper = regs->regs[insn.reg3_format.rk];
+ break;
+
+ case ldgtb_op:
+ case ldgth_op:
+ case ldgtw_op:
+ case ldgtd_op:
+ case stgtb_op:
+ case stgth_op:
+ case stgtw_op:
+ case stgtd_op:
+ case fldgts_op:
+ case fldgtd_op:
+ case fstgts_op:
+ case fstgtd_op:
+ badv = regs->regs[insn.reg3_format.rj];
+ lower = regs->regs[insn.reg3_format.rk];
+ break;
+ }
+
+ force_sig_bnderr((void __user *)badv, (void __user *)lower, (void __user *)upper);
+
+out:
+ if (regs->csr_prmd & CSR_PRMD_PIE)
+ local_irq_disable();
+
+ irqentry_exit(regs, state);
+ return;
+
+bad_era:
+ /*
+ * Cannot pull out the instruction word, hence cannot provide more
+ * info than a regular SIGSEGV in this case.
+ */
+ force_sig(SIGSEGV);
+ goto out;
+}
+
asmlinkage void noinstr do_bp(struct pt_regs *regs)
{
bool user = user_mode(regs);
@@ -792,11 +1041,12 @@ void __init trap_init(void)
long i;
/* Set interrupt vector handler */
- for (i = EXCCODE_INT_START; i < EXCCODE_INT_END; i++)
+ for (i = EXCCODE_INT_START; i <= EXCCODE_INT_END; i++)
set_handler(i * VECSIZE, handle_vint, VECSIZE);
set_handler(EXCCODE_ADE * VECSIZE, handle_ade, VECSIZE);
set_handler(EXCCODE_ALE * VECSIZE, handle_ale, VECSIZE);
+ set_handler(EXCCODE_BCE * VECSIZE, handle_bce, VECSIZE);
set_handler(EXCCODE_SYS * VECSIZE, handle_sys, VECSIZE);
set_handler(EXCCODE_BP * VECSIZE, handle_bp, VECSIZE);
set_handler(EXCCODE_INE * VECSIZE, handle_ri, VECSIZE);
diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
index 40bde632900f..d60d4e096cfa 100644
--- a/arch/loongarch/lib/Makefile
+++ b/arch/loongarch/lib/Makefile
@@ -4,4 +4,6 @@
#
lib-y += delay.o memset.o memcpy.o memmove.o \
- clear_user.o copy_user.o dump_tlb.o unaligned.o
+ clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o
+
+obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S
index 2dc48e61a2c8..fd1d62b244f2 100644
--- a/arch/loongarch/lib/clear_user.S
+++ b/arch/loongarch/lib/clear_user.S
@@ -13,7 +13,14 @@
.irp to, 0, 1, 2, 3, 4, 5, 6, 7
.L_fixup_handle_\to\():
- addi.d a0, a1, (\to) * (-8)
+ sub.d a0, a2, a0
+ addi.d a0, a0, (\to) * (-8)
+ jr ra
+.endr
+
+.irp to, 0, 2, 4
+.L_fixup_handle_s\to\():
+ addi.d a0, a1, -\to
jr ra
.endr
@@ -44,7 +51,7 @@ SYM_FUNC_START(__clear_user_generic)
2: move a0, a1
jr ra
- _asm_extable 1b, .L_fixup_handle_0
+ _asm_extable 1b, .L_fixup_handle_s0
SYM_FUNC_END(__clear_user_generic)
/*
@@ -54,12 +61,21 @@ SYM_FUNC_END(__clear_user_generic)
* a1: size
*/
SYM_FUNC_START(__clear_user_fast)
- beqz a1, 10f
+ sltui t0, a1, 9
+ bnez t0, .Lsmall
- ori a2, zero, 64
- blt a1, a2, 9f
+ add.d a2, a0, a1
+0: st.d zero, a0, 0
+
+ /* align up address */
+ addi.d a0, a0, 8
+ bstrins.d a0, zero, 2, 0
+
+ addi.d a3, a2, -64
+ bgeu a0, a3, .Llt64
/* set 64 bytes at a time */
+.Lloop64:
1: st.d zero, a0, 0
2: st.d zero, a0, 8
3: st.d zero, a0, 16
@@ -68,24 +84,95 @@ SYM_FUNC_START(__clear_user_fast)
6: st.d zero, a0, 40
7: st.d zero, a0, 48
8: st.d zero, a0, 56
-
addi.d a0, a0, 64
- addi.d a1, a1, -64
- bge a1, a2, 1b
-
- beqz a1, 10f
+ bltu a0, a3, .Lloop64
/* set the remaining bytes */
-9: st.b zero, a0, 0
- addi.d a0, a0, 1
- addi.d a1, a1, -1
- bgt a1, zero, 9b
+.Llt64:
+ addi.d a3, a2, -32
+ bgeu a0, a3, .Llt32
+9: st.d zero, a0, 0
+10: st.d zero, a0, 8
+11: st.d zero, a0, 16
+12: st.d zero, a0, 24
+ addi.d a0, a0, 32
+
+.Llt32:
+ addi.d a3, a2, -16
+ bgeu a0, a3, .Llt16
+13: st.d zero, a0, 0
+14: st.d zero, a0, 8
+ addi.d a0, a0, 16
+
+.Llt16:
+ addi.d a3, a2, -8
+ bgeu a0, a3, .Llt8
+15: st.d zero, a0, 0
+
+.Llt8:
+16: st.d zero, a2, -8
/* return */
-10: move a0, a1
+ move a0, zero
+ jr ra
+
+ .align 4
+.Lsmall:
+ pcaddi t0, 4
+ slli.d a2, a1, 4
+ add.d t0, t0, a2
+ jr t0
+
+ .align 4
+ move a0, zero
+ jr ra
+
+ .align 4
+17: st.b zero, a0, 0
+ move a0, zero
+ jr ra
+
+ .align 4
+18: st.h zero, a0, 0
+ move a0, zero
+ jr ra
+
+ .align 4
+19: st.h zero, a0, 0
+20: st.b zero, a0, 2
+ move a0, zero
+ jr ra
+
+ .align 4
+21: st.w zero, a0, 0
+ move a0, zero
+ jr ra
+
+ .align 4
+22: st.w zero, a0, 0
+23: st.b zero, a0, 4
+ move a0, zero
+ jr ra
+
+ .align 4
+24: st.w zero, a0, 0
+25: st.h zero, a0, 4
+ move a0, zero
+ jr ra
+
+ .align 4
+26: st.w zero, a0, 0
+27: st.w zero, a0, 3
+ move a0, zero
+ jr ra
+
+ .align 4
+28: st.d zero, a0, 0
+ move a0, zero
jr ra
/* fixup and ex_table */
+ _asm_extable 0b, .L_fixup_handle_0
_asm_extable 1b, .L_fixup_handle_0
_asm_extable 2b, .L_fixup_handle_1
_asm_extable 3b, .L_fixup_handle_2
@@ -95,4 +182,23 @@ SYM_FUNC_START(__clear_user_fast)
_asm_extable 7b, .L_fixup_handle_6
_asm_extable 8b, .L_fixup_handle_7
_asm_extable 9b, .L_fixup_handle_0
+ _asm_extable 10b, .L_fixup_handle_1
+ _asm_extable 11b, .L_fixup_handle_2
+ _asm_extable 12b, .L_fixup_handle_3
+ _asm_extable 13b, .L_fixup_handle_0
+ _asm_extable 14b, .L_fixup_handle_1
+ _asm_extable 15b, .L_fixup_handle_0
+ _asm_extable 16b, .L_fixup_handle_1
+ _asm_extable 17b, .L_fixup_handle_s0
+ _asm_extable 18b, .L_fixup_handle_s0
+ _asm_extable 19b, .L_fixup_handle_s0
+ _asm_extable 20b, .L_fixup_handle_s2
+ _asm_extable 21b, .L_fixup_handle_s0
+ _asm_extable 22b, .L_fixup_handle_s0
+ _asm_extable 23b, .L_fixup_handle_s4
+ _asm_extable 24b, .L_fixup_handle_s0
+ _asm_extable 25b, .L_fixup_handle_s4
+ _asm_extable 26b, .L_fixup_handle_s0
+ _asm_extable 27b, .L_fixup_handle_s4
+ _asm_extable 28b, .L_fixup_handle_s0
SYM_FUNC_END(__clear_user_fast)
diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S
index 55ac6020a1ad..b21f6d5d38f5 100644
--- a/arch/loongarch/lib/copy_user.S
+++ b/arch/loongarch/lib/copy_user.S
@@ -13,7 +13,14 @@
.irp to, 0, 1, 2, 3, 4, 5, 6, 7
.L_fixup_handle_\to\():
- addi.d a0, a2, (\to) * (-8)
+ sub.d a0, a2, a0
+ addi.d a0, a0, (\to) * (-8)
+ jr ra
+.endr
+
+.irp to, 0, 2, 4
+.L_fixup_handle_s\to\():
+ addi.d a0, a2, -\to
jr ra
.endr
@@ -47,8 +54,8 @@ SYM_FUNC_START(__copy_user_generic)
3: move a0, a2
jr ra
- _asm_extable 1b, .L_fixup_handle_0
- _asm_extable 2b, .L_fixup_handle_0
+ _asm_extable 1b, .L_fixup_handle_s0
+ _asm_extable 2b, .L_fixup_handle_s0
SYM_FUNC_END(__copy_user_generic)
/*
@@ -59,65 +66,209 @@ SYM_FUNC_END(__copy_user_generic)
* a2: n
*/
SYM_FUNC_START(__copy_user_fast)
- beqz a2, 19f
+ sltui t0, a2, 9
+ bnez t0, .Lsmall
- ori a3, zero, 64
- blt a2, a3, 17f
+ add.d a3, a1, a2
+ add.d a2, a0, a2
+0: ld.d t0, a1, 0
+1: st.d t0, a0, 0
- /* copy 64 bytes at a time */
-1: ld.d t0, a1, 0
-2: ld.d t1, a1, 8
-3: ld.d t2, a1, 16
-4: ld.d t3, a1, 24
-5: ld.d t4, a1, 32
-6: ld.d t5, a1, 40
-7: ld.d t6, a1, 48
-8: ld.d t7, a1, 56
-9: st.d t0, a0, 0
-10: st.d t1, a0, 8
-11: st.d t2, a0, 16
-12: st.d t3, a0, 24
-13: st.d t4, a0, 32
-14: st.d t5, a0, 40
-15: st.d t6, a0, 48
-16: st.d t7, a0, 56
+ /* align up destination address */
+ andi t1, a0, 7
+ sub.d t0, zero, t1
+ addi.d t0, t0, 8
+ add.d a1, a1, t0
+ add.d a0, a0, t0
- addi.d a0, a0, 64
- addi.d a1, a1, 64
- addi.d a2, a2, -64
- bge a2, a3, 1b
+ addi.d a4, a3, -64
+ bgeu a1, a4, .Llt64
- beqz a2, 19f
+ /* copy 64 bytes at a time */
+.Lloop64:
+2: ld.d t0, a1, 0
+3: ld.d t1, a1, 8
+4: ld.d t2, a1, 16
+5: ld.d t3, a1, 24
+6: ld.d t4, a1, 32
+7: ld.d t5, a1, 40
+8: ld.d t6, a1, 48
+9: ld.d t7, a1, 56
+ addi.d a1, a1, 64
+10: st.d t0, a0, 0
+11: st.d t1, a0, 8
+12: st.d t2, a0, 16
+13: st.d t3, a0, 24
+14: st.d t4, a0, 32
+15: st.d t5, a0, 40
+16: st.d t6, a0, 48
+17: st.d t7, a0, 56
+ addi.d a0, a0, 64
+ bltu a1, a4, .Lloop64
/* copy the remaining bytes */
-17: ld.b t0, a1, 0
-18: st.b t0, a0, 0
- addi.d a0, a0, 1
- addi.d a1, a1, 1
- addi.d a2, a2, -1
- bgt a2, zero, 17b
+.Llt64:
+ addi.d a4, a3, -32
+ bgeu a1, a4, .Llt32
+18: ld.d t0, a1, 0
+19: ld.d t1, a1, 8
+20: ld.d t2, a1, 16
+21: ld.d t3, a1, 24
+ addi.d a1, a1, 32
+22: st.d t0, a0, 0
+23: st.d t1, a0, 8
+24: st.d t2, a0, 16
+25: st.d t3, a0, 24
+ addi.d a0, a0, 32
+
+.Llt32:
+ addi.d a4, a3, -16
+ bgeu a1, a4, .Llt16
+26: ld.d t0, a1, 0
+27: ld.d t1, a1, 8
+ addi.d a1, a1, 16
+28: st.d t0, a0, 0
+29: st.d t1, a0, 8
+ addi.d a0, a0, 16
+
+.Llt16:
+ addi.d a4, a3, -8
+ bgeu a1, a4, .Llt8
+30: ld.d t0, a1, 0
+31: st.d t0, a0, 0
+
+.Llt8:
+32: ld.d t0, a3, -8
+33: st.d t0, a2, -8
/* return */
-19: move a0, a2
+ move a0, zero
+ jr ra
+
+ .align 5
+.Lsmall:
+ pcaddi t0, 8
+ slli.d a3, a2, 5
+ add.d t0, t0, a3
+ jr t0
+
+ .align 5
+ move a0, zero
+ jr ra
+
+ .align 5
+34: ld.b t0, a1, 0
+35: st.b t0, a0, 0
+ move a0, zero
+ jr ra
+
+ .align 5
+36: ld.h t0, a1, 0
+37: st.h t0, a0, 0
+ move a0, zero
+ jr ra
+
+ .align 5
+38: ld.h t0, a1, 0
+39: ld.b t1, a1, 2
+40: st.h t0, a0, 0
+41: st.b t1, a0, 2
+ move a0, zero
+ jr ra
+
+ .align 5
+42: ld.w t0, a1, 0
+43: st.w t0, a0, 0
+ move a0, zero
+ jr ra
+
+ .align 5
+44: ld.w t0, a1, 0
+45: ld.b t1, a1, 4
+46: st.w t0, a0, 0
+47: st.b t1, a0, 4
+ move a0, zero
+ jr ra
+
+ .align 5
+48: ld.w t0, a1, 0
+49: ld.h t1, a1, 4
+50: st.w t0, a0, 0
+51: st.h t1, a0, 4
+ move a0, zero
+ jr ra
+
+ .align 5
+52: ld.w t0, a1, 0
+53: ld.w t1, a1, 3
+54: st.w t0, a0, 0
+55: st.w t1, a0, 3
+ move a0, zero
+ jr ra
+
+ .align 5
+56: ld.d t0, a1, 0
+57: st.d t0, a0, 0
+ move a0, zero
jr ra
/* fixup and ex_table */
+ _asm_extable 0b, .L_fixup_handle_0
_asm_extable 1b, .L_fixup_handle_0
- _asm_extable 2b, .L_fixup_handle_1
- _asm_extable 3b, .L_fixup_handle_2
- _asm_extable 4b, .L_fixup_handle_3
- _asm_extable 5b, .L_fixup_handle_4
- _asm_extable 6b, .L_fixup_handle_5
- _asm_extable 7b, .L_fixup_handle_6
- _asm_extable 8b, .L_fixup_handle_7
+ _asm_extable 2b, .L_fixup_handle_0
+ _asm_extable 3b, .L_fixup_handle_0
+ _asm_extable 4b, .L_fixup_handle_0
+ _asm_extable 5b, .L_fixup_handle_0
+ _asm_extable 6b, .L_fixup_handle_0
+ _asm_extable 7b, .L_fixup_handle_0
+ _asm_extable 8b, .L_fixup_handle_0
_asm_extable 9b, .L_fixup_handle_0
- _asm_extable 10b, .L_fixup_handle_1
- _asm_extable 11b, .L_fixup_handle_2
- _asm_extable 12b, .L_fixup_handle_3
- _asm_extable 13b, .L_fixup_handle_4
- _asm_extable 14b, .L_fixup_handle_5
- _asm_extable 15b, .L_fixup_handle_6
- _asm_extable 16b, .L_fixup_handle_7
- _asm_extable 17b, .L_fixup_handle_0
+ _asm_extable 10b, .L_fixup_handle_0
+ _asm_extable 11b, .L_fixup_handle_1
+ _asm_extable 12b, .L_fixup_handle_2
+ _asm_extable 13b, .L_fixup_handle_3
+ _asm_extable 14b, .L_fixup_handle_4
+ _asm_extable 15b, .L_fixup_handle_5
+ _asm_extable 16b, .L_fixup_handle_6
+ _asm_extable 17b, .L_fixup_handle_7
_asm_extable 18b, .L_fixup_handle_0
+ _asm_extable 19b, .L_fixup_handle_0
+ _asm_extable 20b, .L_fixup_handle_0
+ _asm_extable 21b, .L_fixup_handle_0
+ _asm_extable 22b, .L_fixup_handle_0
+ _asm_extable 23b, .L_fixup_handle_1
+ _asm_extable 24b, .L_fixup_handle_2
+ _asm_extable 25b, .L_fixup_handle_3
+ _asm_extable 26b, .L_fixup_handle_0
+ _asm_extable 27b, .L_fixup_handle_0
+ _asm_extable 28b, .L_fixup_handle_0
+ _asm_extable 29b, .L_fixup_handle_1
+ _asm_extable 30b, .L_fixup_handle_0
+ _asm_extable 31b, .L_fixup_handle_0
+ _asm_extable 32b, .L_fixup_handle_0
+ _asm_extable 33b, .L_fixup_handle_1
+ _asm_extable 34b, .L_fixup_handle_s0
+ _asm_extable 35b, .L_fixup_handle_s0
+ _asm_extable 36b, .L_fixup_handle_s0
+ _asm_extable 37b, .L_fixup_handle_s0
+ _asm_extable 38b, .L_fixup_handle_s0
+ _asm_extable 39b, .L_fixup_handle_s0
+ _asm_extable 40b, .L_fixup_handle_s0
+ _asm_extable 41b, .L_fixup_handle_s2
+ _asm_extable 42b, .L_fixup_handle_s0
+ _asm_extable 43b, .L_fixup_handle_s0
+ _asm_extable 44b, .L_fixup_handle_s0
+ _asm_extable 45b, .L_fixup_handle_s0
+ _asm_extable 46b, .L_fixup_handle_s0
+ _asm_extable 47b, .L_fixup_handle_s4
+ _asm_extable 48b, .L_fixup_handle_s0
+ _asm_extable 49b, .L_fixup_handle_s0
+ _asm_extable 50b, .L_fixup_handle_s0
+ _asm_extable 51b, .L_fixup_handle_s4
+ _asm_extable 52b, .L_fixup_handle_s0
+ _asm_extable 53b, .L_fixup_handle_s0
+ _asm_extable 54b, .L_fixup_handle_s0
+ _asm_extable 55b, .L_fixup_handle_s4
+ _asm_extable 56b, .L_fixup_handle_s0
+ _asm_extable 57b, .L_fixup_handle_s0
SYM_FUNC_END(__copy_user_fast)
diff --git a/arch/loongarch/lib/csum.c b/arch/loongarch/lib/csum.c
new file mode 100644
index 000000000000..a5e84b403c3b
--- /dev/null
+++ b/arch/loongarch/lib/csum.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2019-2020 Arm Ltd.
+
+#include <linux/compiler.h>
+#include <linux/kasan-checks.h>
+#include <linux/kernel.h>
+
+#include <net/checksum.h>
+
+static u64 accumulate(u64 sum, u64 data)
+{
+ sum += data;
+ if (sum < data)
+ sum += 1;
+ return sum;
+}
+
+/*
+ * We over-read the buffer and this makes KASAN unhappy. Instead, disable
+ * instrumentation and call kasan explicitly.
+ */
+unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len)
+{
+ unsigned int offset, shift, sum;
+ const u64 *ptr;
+ u64 data, sum64 = 0;
+
+ if (unlikely(len == 0))
+ return 0;
+
+ offset = (unsigned long)buff & 7;
+ /*
+ * This is to all intents and purposes safe, since rounding down cannot
+ * result in a different page or cache line being accessed, and @buff
+ * should absolutely not be pointing to anything read-sensitive. We do,
+ * however, have to be careful not to piss off KASAN, which means using
+ * unchecked reads to accommodate the head and tail, for which we'll
+ * compensate with an explicit check up-front.
+ */
+ kasan_check_read(buff, len);
+ ptr = (u64 *)(buff - offset);
+ len = len + offset - 8;
+
+ /*
+ * Head: zero out any excess leading bytes. Shifting back by the same
+ * amount should be at least as fast as any other way of handling the
+ * odd/even alignment, and means we can ignore it until the very end.
+ */
+ shift = offset * 8;
+ data = *ptr++;
+ data = (data >> shift) << shift;
+
+ /*
+ * Body: straightforward aligned loads from here on (the paired loads
+ * underlying the quadword type still only need dword alignment). The
+ * main loop strictly excludes the tail, so the second loop will always
+ * run at least once.
+ */
+ while (unlikely(len > 64)) {
+ __uint128_t tmp1, tmp2, tmp3, tmp4;
+
+ tmp1 = *(__uint128_t *)ptr;
+ tmp2 = *(__uint128_t *)(ptr + 2);
+ tmp3 = *(__uint128_t *)(ptr + 4);
+ tmp4 = *(__uint128_t *)(ptr + 6);
+
+ len -= 64;
+ ptr += 8;
+
+ /* This is the "don't dump the carry flag into a GPR" idiom */
+ tmp1 += (tmp1 >> 64) | (tmp1 << 64);
+ tmp2 += (tmp2 >> 64) | (tmp2 << 64);
+ tmp3 += (tmp3 >> 64) | (tmp3 << 64);
+ tmp4 += (tmp4 >> 64) | (tmp4 << 64);
+ tmp1 = ((tmp1 >> 64) << 64) | (tmp2 >> 64);
+ tmp1 += (tmp1 >> 64) | (tmp1 << 64);
+ tmp3 = ((tmp3 >> 64) << 64) | (tmp4 >> 64);
+ tmp3 += (tmp3 >> 64) | (tmp3 << 64);
+ tmp1 = ((tmp1 >> 64) << 64) | (tmp3 >> 64);
+ tmp1 += (tmp1 >> 64) | (tmp1 << 64);
+ tmp1 = ((tmp1 >> 64) << 64) | sum64;
+ tmp1 += (tmp1 >> 64) | (tmp1 << 64);
+ sum64 = tmp1 >> 64;
+ }
+ while (len > 8) {
+ __uint128_t tmp;
+
+ sum64 = accumulate(sum64, data);
+ tmp = *(__uint128_t *)ptr;
+
+ len -= 16;
+ ptr += 2;
+
+ data = tmp >> 64;
+ sum64 = accumulate(sum64, tmp);
+ }
+ if (len > 0) {
+ sum64 = accumulate(sum64, data);
+ data = *ptr;
+ len -= 8;
+ }
+ /*
+ * Tail: zero any over-read bytes similarly to the head, again
+ * preserving odd/even alignment.
+ */
+ shift = len * -8;
+ data = (data << shift) >> shift;
+ sum64 = accumulate(sum64, data);
+
+ /* Finally, folding */
+ sum64 += (sum64 >> 32) | (sum64 << 32);
+ sum = sum64 >> 32;
+ sum += (sum >> 16) | (sum << 16);
+ if (offset & 1)
+ return (u16)swab32(sum);
+
+ return sum >> 16;
+}
+
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum csum)
+{
+ __uint128_t src, dst;
+ u64 sum = (__force u64)csum;
+
+ src = *(const __uint128_t *)saddr->s6_addr;
+ dst = *(const __uint128_t *)daddr->s6_addr;
+
+ sum += (__force u32)htonl(len);
+ sum += (u32)proto << 24;
+ src += (src >> 64) | (src << 64);
+ dst += (dst >> 64) | (dst << 64);
+
+ sum = accumulate(sum, src >> 64);
+ sum = accumulate(sum, dst >> 64);
+
+ sum += ((sum >> 32) | (sum << 32));
+ return csum_fold((__force __wsum)(sum >> 32));
+}
+EXPORT_SYMBOL(csum_ipv6_magic);
diff --git a/arch/loongarch/lib/error-inject.c b/arch/loongarch/lib/error-inject.c
new file mode 100644
index 000000000000..afc9e1c7c973
--- /dev/null
+++ b/arch/loongarch/lib/error-inject.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/error-injection.h>
+#include <linux/kprobes.h>
+
+void override_function_with_return(struct pt_regs *regs)
+{
+ instruction_pointer_set(regs, regs->regs[1]);
+}
+NOKPROBE_SYMBOL(override_function_with_return);
diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S
index 3b7e1dec7109..39ce6621c704 100644
--- a/arch/loongarch/lib/memcpy.S
+++ b/arch/loongarch/lib/memcpy.S
@@ -44,6 +44,66 @@ SYM_FUNC_START(__memcpy_generic)
SYM_FUNC_END(__memcpy_generic)
_ASM_NOKPROBE(__memcpy_generic)
+ .align 5
+SYM_FUNC_START_NOALIGN(__memcpy_small)
+ pcaddi t0, 8
+ slli.d a2, a2, 5
+ add.d t0, t0, a2
+ jr t0
+
+ .align 5
+0: jr ra
+
+ .align 5
+1: ld.b t0, a1, 0
+ st.b t0, a0, 0
+ jr ra
+
+ .align 5
+2: ld.h t0, a1, 0
+ st.h t0, a0, 0
+ jr ra
+
+ .align 5
+3: ld.h t0, a1, 0
+ ld.b t1, a1, 2
+ st.h t0, a0, 0
+ st.b t1, a0, 2
+ jr ra
+
+ .align 5
+4: ld.w t0, a1, 0
+ st.w t0, a0, 0
+ jr ra
+
+ .align 5
+5: ld.w t0, a1, 0
+ ld.b t1, a1, 4
+ st.w t0, a0, 0
+ st.b t1, a0, 4
+ jr ra
+
+ .align 5
+6: ld.w t0, a1, 0
+ ld.h t1, a1, 4
+ st.w t0, a0, 0
+ st.h t1, a0, 4
+ jr ra
+
+ .align 5
+7: ld.w t0, a1, 0
+ ld.w t1, a1, 3
+ st.w t0, a0, 0
+ st.w t1, a0, 3
+ jr ra
+
+ .align 5
+8: ld.d t0, a1, 0
+ st.d t0, a0, 0
+ jr ra
+SYM_FUNC_END(__memcpy_small)
+_ASM_NOKPROBE(__memcpy_small)
+
/*
* void *__memcpy_fast(void *dst, const void *src, size_t n)
*
@@ -52,14 +112,27 @@ _ASM_NOKPROBE(__memcpy_generic)
* a2: n
*/
SYM_FUNC_START(__memcpy_fast)
- move a3, a0
- beqz a2, 3f
+ sltui t0, a2, 9
+ bnez t0, __memcpy_small
+
+ add.d a3, a1, a2
+ add.d a2, a0, a2
+ ld.d a6, a1, 0
+ ld.d a7, a3, -8
+
+ /* align up destination address */
+ andi t1, a0, 7
+ sub.d t0, zero, t1
+ addi.d t0, t0, 8
+ add.d a1, a1, t0
+ add.d a5, a0, t0
- ori a4, zero, 64
- blt a2, a4, 2f
+ addi.d a4, a3, -64
+ bgeu a1, a4, .Llt64
/* copy 64 bytes at a time */
-1: ld.d t0, a1, 0
+.Lloop64:
+ ld.d t0, a1, 0
ld.d t1, a1, 8
ld.d t2, a1, 16
ld.d t3, a1, 24
@@ -67,32 +140,54 @@ SYM_FUNC_START(__memcpy_fast)
ld.d t5, a1, 40
ld.d t6, a1, 48
ld.d t7, a1, 56
- st.d t0, a0, 0
- st.d t1, a0, 8
- st.d t2, a0, 16
- st.d t3, a0, 24
- st.d t4, a0, 32
- st.d t5, a0, 40
- st.d t6, a0, 48
- st.d t7, a0, 56
-
- addi.d a0, a0, 64
addi.d a1, a1, 64
- addi.d a2, a2, -64
- bge a2, a4, 1b
-
- beqz a2, 3f
+ st.d t0, a5, 0
+ st.d t1, a5, 8
+ st.d t2, a5, 16
+ st.d t3, a5, 24
+ st.d t4, a5, 32
+ st.d t5, a5, 40
+ st.d t6, a5, 48
+ st.d t7, a5, 56
+ addi.d a5, a5, 64
+ bltu a1, a4, .Lloop64
/* copy the remaining bytes */
-2: ld.b t0, a1, 0
- st.b t0, a0, 0
- addi.d a0, a0, 1
- addi.d a1, a1, 1
- addi.d a2, a2, -1
- bgt a2, zero, 2b
+.Llt64:
+ addi.d a4, a3, -32
+ bgeu a1, a4, .Llt32
+ ld.d t0, a1, 0
+ ld.d t1, a1, 8
+ ld.d t2, a1, 16
+ ld.d t3, a1, 24
+ addi.d a1, a1, 32
+ st.d t0, a5, 0
+ st.d t1, a5, 8
+ st.d t2, a5, 16
+ st.d t3, a5, 24
+ addi.d a5, a5, 32
+
+.Llt32:
+ addi.d a4, a3, -16
+ bgeu a1, a4, .Llt16
+ ld.d t0, a1, 0
+ ld.d t1, a1, 8
+ addi.d a1, a1, 16
+ st.d t0, a5, 0
+ st.d t1, a5, 8
+ addi.d a5, a5, 16
+
+.Llt16:
+ addi.d a4, a3, -8
+ bgeu a1, a4, .Llt8
+ ld.d t0, a1, 0
+ st.d t0, a5, 0
+
+.Llt8:
+ st.d a6, a0, 0
+ st.d a7, a2, -8
/* return */
-3: move a0, a3
jr ra
SYM_FUNC_END(__memcpy_fast)
_ASM_NOKPROBE(__memcpy_fast)
diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S
index b796c3d6da05..45b725ba7867 100644
--- a/arch/loongarch/lib/memmove.S
+++ b/arch/loongarch/lib/memmove.S
@@ -11,23 +11,9 @@
#include <asm/regdef.h>
SYM_FUNC_START(memmove)
- blt a0, a1, 1f /* dst < src, memcpy */
- blt a1, a0, 3f /* src < dst, rmemcpy */
+ blt a0, a1, memcpy /* dst < src, memcpy */
+ blt a1, a0, rmemcpy /* src < dst, rmemcpy */
jr ra /* dst == src, return */
-
- /* if (src - dst) < 64, copy 1 byte at a time */
-1: ori a3, zero, 64
- sub.d t0, a1, a0
- blt t0, a3, 2f
- b memcpy
-2: b __memcpy_generic
-
- /* if (dst - src) < 64, copy 1 byte at a time */
-3: ori a3, zero, 64
- sub.d t0, a0, a1
- blt t0, a3, 4f
- b rmemcpy
-4: b __rmemcpy_generic
SYM_FUNC_END(memmove)
_ASM_NOKPROBE(memmove)
@@ -76,50 +62,80 @@ _ASM_NOKPROBE(__rmemcpy_generic)
* a2: n
*/
SYM_FUNC_START(__rmemcpy_fast)
- move a3, a0
- beqz a2, 3f
+ sltui t0, a2, 9
+ bnez t0, __memcpy_small
- add.d a0, a0, a2
- add.d a1, a1, a2
+ add.d a3, a1, a2
+ add.d a2, a0, a2
+ ld.d a6, a1, 0
+ ld.d a7, a3, -8
+
+ /* align up destination address */
+ andi t1, a2, 7
+ sub.d a3, a3, t1
+ sub.d a5, a2, t1
- ori a4, zero, 64
- blt a2, a4, 2f
+ addi.d a4, a1, 64
+ bgeu a4, a3, .Llt64
/* copy 64 bytes at a time */
-1: ld.d t0, a1, -8
- ld.d t1, a1, -16
- ld.d t2, a1, -24
- ld.d t3, a1, -32
- ld.d t4, a1, -40
- ld.d t5, a1, -48
- ld.d t6, a1, -56
- ld.d t7, a1, -64
- st.d t0, a0, -8
- st.d t1, a0, -16
- st.d t2, a0, -24
- st.d t3, a0, -32
- st.d t4, a0, -40
- st.d t5, a0, -48
- st.d t6, a0, -56
- st.d t7, a0, -64
-
- addi.d a0, a0, -64
- addi.d a1, a1, -64
- addi.d a2, a2, -64
- bge a2, a4, 1b
-
- beqz a2, 3f
+.Lloop64:
+ ld.d t0, a3, -8
+ ld.d t1, a3, -16
+ ld.d t2, a3, -24
+ ld.d t3, a3, -32
+ ld.d t4, a3, -40
+ ld.d t5, a3, -48
+ ld.d t6, a3, -56
+ ld.d t7, a3, -64
+ addi.d a3, a3, -64
+ st.d t0, a5, -8
+ st.d t1, a5, -16
+ st.d t2, a5, -24
+ st.d t3, a5, -32
+ st.d t4, a5, -40
+ st.d t5, a5, -48
+ st.d t6, a5, -56
+ st.d t7, a5, -64
+ addi.d a5, a5, -64
+ bltu a4, a3, .Lloop64
/* copy the remaining bytes */
-2: ld.b t0, a1, -1
- st.b t0, a0, -1
- addi.d a0, a0, -1
- addi.d a1, a1, -1
- addi.d a2, a2, -1
- bgt a2, zero, 2b
+.Llt64:
+ addi.d a4, a1, 32
+ bgeu a4, a3, .Llt32
+ ld.d t0, a3, -8
+ ld.d t1, a3, -16
+ ld.d t2, a3, -24
+ ld.d t3, a3, -32
+ addi.d a3, a3, -32
+ st.d t0, a5, -8
+ st.d t1, a5, -16
+ st.d t2, a5, -24
+ st.d t3, a5, -32
+ addi.d a5, a5, -32
+
+.Llt32:
+ addi.d a4, a1, 16
+ bgeu a4, a3, .Llt16
+ ld.d t0, a3, -8
+ ld.d t1, a3, -16
+ addi.d a3, a3, -16
+ st.d t0, a5, -8
+ st.d t1, a5, -16
+ addi.d a5, a5, -16
+
+.Llt16:
+ addi.d a4, a1, 8
+ bgeu a4, a3, .Llt8
+ ld.d t0, a3, -8
+ st.d t0, a5, -8
+
+.Llt8:
+ st.d a6, a0, 0
+ st.d a7, a2, -8
/* return */
-3: move a0, a3
jr ra
SYM_FUNC_END(__rmemcpy_fast)
_ASM_NOKPROBE(__rmemcpy_fast)
diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S
index a9eb732ab2ad..b39c6194e3ae 100644
--- a/arch/loongarch/lib/memset.S
+++ b/arch/loongarch/lib/memset.S
@@ -56,39 +56,107 @@ _ASM_NOKPROBE(__memset_generic)
* a2: n
*/
SYM_FUNC_START(__memset_fast)
- move a3, a0
- beqz a2, 3f
-
- ori a4, zero, 64
- blt a2, a4, 2f
-
/* fill a1 to 64 bits */
fill_to_64 a1
- /* set 64 bytes at a time */
-1: st.d a1, a0, 0
- st.d a1, a0, 8
- st.d a1, a0, 16
- st.d a1, a0, 24
- st.d a1, a0, 32
- st.d a1, a0, 40
- st.d a1, a0, 48
- st.d a1, a0, 56
+ sltui t0, a2, 9
+ bnez t0, .Lsmall
- addi.d a0, a0, 64
- addi.d a2, a2, -64
- bge a2, a4, 1b
+ add.d a2, a0, a2
+ st.d a1, a0, 0
- beqz a2, 3f
+ /* align up address */
+ addi.d a3, a0, 8
+ bstrins.d a3, zero, 2, 0
+
+ addi.d a4, a2, -64
+ bgeu a3, a4, .Llt64
+
+ /* set 64 bytes at a time */
+.Lloop64:
+ st.d a1, a3, 0
+ st.d a1, a3, 8
+ st.d a1, a3, 16
+ st.d a1, a3, 24
+ st.d a1, a3, 32
+ st.d a1, a3, 40
+ st.d a1, a3, 48
+ st.d a1, a3, 56
+ addi.d a3, a3, 64
+ bltu a3, a4, .Lloop64
/* set the remaining bytes */
-2: st.b a1, a0, 0
- addi.d a0, a0, 1
- addi.d a2, a2, -1
- bgt a2, zero, 2b
+.Llt64:
+ addi.d a4, a2, -32
+ bgeu a3, a4, .Llt32
+ st.d a1, a3, 0
+ st.d a1, a3, 8
+ st.d a1, a3, 16
+ st.d a1, a3, 24
+ addi.d a3, a3, 32
+
+.Llt32:
+ addi.d a4, a2, -16
+ bgeu a3, a4, .Llt16
+ st.d a1, a3, 0
+ st.d a1, a3, 8
+ addi.d a3, a3, 16
+
+.Llt16:
+ addi.d a4, a2, -8
+ bgeu a3, a4, .Llt8
+ st.d a1, a3, 0
+
+.Llt8:
+ st.d a1, a2, -8
/* return */
-3: move a0, a3
+ jr ra
+
+ .align 4
+.Lsmall:
+ pcaddi t0, 4
+ slli.d a2, a2, 4
+ add.d t0, t0, a2
+ jr t0
+
+ .align 4
+0: jr ra
+
+ .align 4
+1: st.b a1, a0, 0
+ jr ra
+
+ .align 4
+2: st.h a1, a0, 0
+ jr ra
+
+ .align 4
+3: st.h a1, a0, 0
+ st.b a1, a0, 2
+ jr ra
+
+ .align 4
+4: st.w a1, a0, 0
+ jr ra
+
+ .align 4
+5: st.w a1, a0, 0
+ st.b a1, a0, 4
+ jr ra
+
+ .align 4
+6: st.w a1, a0, 0
+ st.h a1, a0, 4
+ jr ra
+
+ .align 4
+7: st.w a1, a0, 0
+ st.w a1, a0, 3
+ jr ra
+
+ .align 4
+8: st.d a1, a0, 0
jr ra
SYM_FUNC_END(__memset_fast)
_ASM_NOKPROBE(__memset_fast)