From c970d42001f2dffd587cc38458fea4130796be43 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 1 Jan 2023 09:12:52 +0000 Subject: crypto: x86/aria - implement aria-avx512 aria-avx512 implementation uses AVX512 and GFNI. It supports 64way parallel processing. So, byteslicing code is changed to support 64way parallel. And it exports some aria-avx2 functions such as encrypt() and decrypt(). AVX and AVX2 have 16 registers. They should use memory to store/load state because of lack of registers. But AVX512 supports 32 registers. So, it doesn't require store/load in the s-box layer. It means that it can reduce overhead of store/load in the s-box layer. Also code become much simpler. Benchmark with modprobe tcrypt mode=610 num_mb=8192, i3-12100: ARIA-AVX512(128bit and 256bit) testing speed of multibuffer ecb(aria) (ecb-aria-avx512) encryption tcrypt: 1 operation in 1504 cycles (1024 bytes) tcrypt: 1 operation in 4595 cycles (4096 bytes) tcrypt: 1 operation in 1763 cycles (1024 bytes) tcrypt: 1 operation in 5540 cycles (4096 bytes) testing speed of multibuffer ecb(aria) (ecb-aria-avx512) decryption tcrypt: 1 operation in 1502 cycles (1024 bytes) tcrypt: 1 operation in 4615 cycles (4096 bytes) tcrypt: 1 operation in 1759 cycles (1024 bytes) tcrypt: 1 operation in 5554 cycles (4096 bytes) ARIA-AVX2 with GFNI(128bit and 256bit) testing speed of multibuffer ecb(aria) (ecb-aria-avx2) encryption tcrypt: 1 operation in 2003 cycles (1024 bytes) tcrypt: 1 operation in 5867 cycles (4096 bytes) tcrypt: 1 operation in 2358 cycles (1024 bytes) tcrypt: 1 operation in 7295 cycles (4096 bytes) testing speed of multibuffer ecb(aria) (ecb-aria-avx2) decryption tcrypt: 1 operation in 2004 cycles (1024 bytes) tcrypt: 1 operation in 5956 cycles (4096 bytes) tcrypt: 1 operation in 2409 cycles (1024 bytes) tcrypt: 1 operation in 7564 cycles (4096 bytes) Signed-off-by: Taehee Yoo Signed-off-by: Herbert Xu --- arch/x86/crypto/aria-avx.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/crypto/aria-avx.h') diff --git a/arch/x86/crypto/aria-avx.h b/arch/x86/crypto/aria-avx.h index 9f2d847c460a..6e1b2d8a31ed 100644 --- a/arch/x86/crypto/aria-avx.h +++ b/arch/x86/crypto/aria-avx.h @@ -10,6 +10,9 @@ #define ARIA_AESNI_AVX2_PARALLEL_BLOCKS 32 #define ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * ARIA_AESNI_AVX2_PARALLEL_BLOCKS) +#define ARIA_GFNI_AVX512_PARALLEL_BLOCKS 64 +#define ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * ARIA_GFNI_AVX512_PARALLEL_BLOCKS) + asmlinkage void aria_aesni_avx_encrypt_16way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void aria_aesni_avx_decrypt_16way(const void *ctx, u8 *dst, @@ -49,6 +52,11 @@ struct aria_avx_ops { void (*aria_decrypt_32way)(const void *ctx, u8 *dst, const u8 *src); void (*aria_ctr_crypt_32way)(const void *ctx, u8 *dst, const u8 *src, u8 *keystream, u8 *iv); + void (*aria_encrypt_64way)(const void *ctx, u8 *dst, const u8 *src); + void (*aria_decrypt_64way)(const void *ctx, u8 *dst, const u8 *src); + void (*aria_ctr_crypt_64way)(const void *ctx, u8 *dst, const u8 *src, + u8 *keystream, u8 *iv); + }; #endif -- cgit v1.2.3