aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/crypto/aria-avx.h
diff options
context:
space:
mode:
authorGravatar Taehee Yoo <ap420073@gmail.com> 2023-01-01 09:12:52 +0000
committerGravatar Herbert Xu <herbert@gondor.apana.org.au> 2023-01-06 17:15:47 +0800
commitc970d42001f2dffd587cc38458fea4130796be43 (patch)
tree20c1189e2f1e6f7958c132d6d63206f1550851fd /arch/x86/crypto/aria-avx.h
parentcrypto: x86/aria - implement aria-avx2 (diff)
downloadlinux-c970d42001f2dffd587cc38458fea4130796be43.tar.gz
linux-c970d42001f2dffd587cc38458fea4130796be43.tar.bz2
linux-c970d42001f2dffd587cc38458fea4130796be43.zip
crypto: x86/aria - implement aria-avx512
aria-avx512 implementation uses AVX512 and GFNI. It supports 64way parallel processing. So, byteslicing code is changed to support 64way parallel. And it exports some aria-avx2 functions such as encrypt() and decrypt(). AVX and AVX2 have 16 registers. They should use memory to store/load state because of lack of registers. But AVX512 supports 32 registers. So, it doesn't require store/load in the s-box layer. It means that it can reduce overhead of store/load in the s-box layer. Also code become much simpler. Benchmark with modprobe tcrypt mode=610 num_mb=8192, i3-12100: ARIA-AVX512(128bit and 256bit) testing speed of multibuffer ecb(aria) (ecb-aria-avx512) encryption tcrypt: 1 operation in 1504 cycles (1024 bytes) tcrypt: 1 operation in 4595 cycles (4096 bytes) tcrypt: 1 operation in 1763 cycles (1024 bytes) tcrypt: 1 operation in 5540 cycles (4096 bytes) testing speed of multibuffer ecb(aria) (ecb-aria-avx512) decryption tcrypt: 1 operation in 1502 cycles (1024 bytes) tcrypt: 1 operation in 4615 cycles (4096 bytes) tcrypt: 1 operation in 1759 cycles (1024 bytes) tcrypt: 1 operation in 5554 cycles (4096 bytes) ARIA-AVX2 with GFNI(128bit and 256bit) testing speed of multibuffer ecb(aria) (ecb-aria-avx2) encryption tcrypt: 1 operation in 2003 cycles (1024 bytes) tcrypt: 1 operation in 5867 cycles (4096 bytes) tcrypt: 1 operation in 2358 cycles (1024 bytes) tcrypt: 1 operation in 7295 cycles (4096 bytes) testing speed of multibuffer ecb(aria) (ecb-aria-avx2) decryption tcrypt: 1 operation in 2004 cycles (1024 bytes) tcrypt: 1 operation in 5956 cycles (4096 bytes) tcrypt: 1 operation in 2409 cycles (1024 bytes) tcrypt: 1 operation in 7564 cycles (4096 bytes) Signed-off-by: Taehee Yoo <ap420073@gmail.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto/aria-avx.h')
-rw-r--r--arch/x86/crypto/aria-avx.h8
1 files changed, 8 insertions, 0 deletions
diff --git a/arch/x86/crypto/aria-avx.h b/arch/x86/crypto/aria-avx.h
index 9f2d847c460a..6e1b2d8a31ed 100644
--- a/arch/x86/crypto/aria-avx.h
+++ b/arch/x86/crypto/aria-avx.h
@@ -10,6 +10,9 @@
#define ARIA_AESNI_AVX2_PARALLEL_BLOCKS 32
#define ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * ARIA_AESNI_AVX2_PARALLEL_BLOCKS)
+#define ARIA_GFNI_AVX512_PARALLEL_BLOCKS 64
+#define ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * ARIA_GFNI_AVX512_PARALLEL_BLOCKS)
+
asmlinkage void aria_aesni_avx_encrypt_16way(const void *ctx, u8 *dst,
const u8 *src);
asmlinkage void aria_aesni_avx_decrypt_16way(const void *ctx, u8 *dst,
@@ -49,6 +52,11 @@ struct aria_avx_ops {
void (*aria_decrypt_32way)(const void *ctx, u8 *dst, const u8 *src);
void (*aria_ctr_crypt_32way)(const void *ctx, u8 *dst, const u8 *src,
u8 *keystream, u8 *iv);
+ void (*aria_encrypt_64way)(const void *ctx, u8 *dst, const u8 *src);
+ void (*aria_decrypt_64way)(const void *ctx, u8 *dst, const u8 *src);
+ void (*aria_ctr_crypt_64way)(const void *ctx, u8 *dst, const u8 *src,
+ u8 *keystream, u8 *iv);
+
};
#endif