mirror of
https://github.com/torvalds/linux.git
synced 2026-06-01 02:53:36 +02:00
crypto: x86/aes - drop the avx10_256 AES-XTS and AES-CTR code
Intel made a late change to the AVX10 specification that removes support for a 256-bit maximum vector length and enumeration of the maximum vector length. AVX10 will imply a maximum vector length of 512 bits. I.e. there won't be any such thing as AVX10/256 or AVX10/512; there will just be AVX10, and it will essentially just consolidate AVX512 features. As a result of this new development, my strategy of providing both *_avx10_256 and *_avx10_512 functions didn't turn out to be that useful. The only remaining motivation for the 256-bit AVX512 / AVX10 functions is to avoid downclocking on older Intel CPUs. But in the case of AES-XTS and AES-CTR, I already wrote *_avx2 code too (primarily to support CPUs without AVX512), which performs almost as well as *_avx10_256. So we should just use that. Therefore, remove the *_avx10_256 AES-XTS and AES-CTR functions and algorithms, and rename the *_avx10_512 AES-XTS and AES-CTR functions and algorithms to *_avx512. Make Ice Lake and Tiger Lake use *_avx2 instead of *_avx10_256 which they previously used. I've left AES-GCM unchanged for now. There is no VAES+AVX2 optimized AES-GCM in the kernel yet, so the path forward for that is not as clear. However, I did write a VAES+AVX2 optimized AES-GCM for BoringSSL. So one option is to port that to the kernel and then do the same cleanup. Signed-off-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
parent
5ebc052d3b
commit
7d14fbc569
|
|
@ -48,8 +48,7 @@
|
|||
// using the following sets of CPU features:
|
||||
// - AES-NI && AVX
|
||||
// - VAES && AVX2
|
||||
// - VAES && (AVX10/256 || (AVX512BW && AVX512VL)) && BMI2
|
||||
// - VAES && (AVX10/512 || (AVX512BW && AVX512VL)) && BMI2
|
||||
// - VAES && AVX512BW && AVX512VL && BMI2
|
||||
//
|
||||
// See the function definitions at the bottom of the file for more information.
|
||||
|
||||
|
|
@ -76,7 +75,6 @@
|
|||
.text
|
||||
|
||||
// Move a vector between memory and a register.
|
||||
// The register operand must be in the first 16 vector registers.
|
||||
.macro _vmovdqu src, dst
|
||||
.if VL < 64
|
||||
vmovdqu \src, \dst
|
||||
|
|
@ -86,7 +84,6 @@
|
|||
.endm
|
||||
|
||||
// Move a vector between registers.
|
||||
// The registers must be in the first 16 vector registers.
|
||||
.macro _vmovdqa src, dst
|
||||
.if VL < 64
|
||||
vmovdqa \src, \dst
|
||||
|
|
@ -96,7 +93,7 @@
|
|||
.endm
|
||||
|
||||
// Broadcast a 128-bit value from memory to all 128-bit lanes of a vector
|
||||
// register. The register operand must be in the first 16 vector registers.
|
||||
// register.
|
||||
.macro _vbroadcast128 src, dst
|
||||
.if VL == 16
|
||||
vmovdqu \src, \dst
|
||||
|
|
@ -108,7 +105,6 @@
|
|||
.endm
|
||||
|
||||
// XOR two vectors together.
|
||||
// Any register operands must be in the first 16 vector registers.
|
||||
.macro _vpxor src1, src2, dst
|
||||
.if VL < 64
|
||||
vpxor \src1, \src2, \dst
|
||||
|
|
@ -199,8 +195,8 @@
|
|||
// XOR each with the zero-th round key. Also update LE_CTR if !\final.
|
||||
.macro _prepare_2_ctr_vecs is_xctr, i0, i1, final=0
|
||||
.if \is_xctr
|
||||
.if USE_AVX10
|
||||
_vmovdqa LE_CTR, AESDATA\i0
|
||||
.if USE_AVX512
|
||||
vmovdqa64 LE_CTR, AESDATA\i0
|
||||
vpternlogd $0x96, XCTR_IV, RNDKEY0, AESDATA\i0
|
||||
.else
|
||||
vpxor XCTR_IV, LE_CTR, AESDATA\i0
|
||||
|
|
@ -208,7 +204,7 @@
|
|||
.endif
|
||||
vpaddq LE_CTR_INC1, LE_CTR, AESDATA\i1
|
||||
|
||||
.if USE_AVX10
|
||||
.if USE_AVX512
|
||||
vpternlogd $0x96, XCTR_IV, RNDKEY0, AESDATA\i1
|
||||
.else
|
||||
vpxor XCTR_IV, AESDATA\i1, AESDATA\i1
|
||||
|
|
@ -481,18 +477,12 @@
|
|||
.Lxor_tail_partial_vec_0\@:
|
||||
// XOR the remaining 1 <= LEN < VL bytes. It's easy if masked
|
||||
// loads/stores are available; otherwise it's a bit harder...
|
||||
.if USE_AVX10
|
||||
.if VL <= 32
|
||||
mov $-1, %eax
|
||||
bzhi LEN, %eax, %eax
|
||||
kmovd %eax, %k1
|
||||
.else
|
||||
.if USE_AVX512
|
||||
mov $-1, %rax
|
||||
bzhi LEN64, %rax, %rax
|
||||
kmovq %rax, %k1
|
||||
.endif
|
||||
vmovdqu8 (SRC), AESDATA1{%k1}{z}
|
||||
_vpxor AESDATA1, AESDATA0, AESDATA0
|
||||
vpxord AESDATA1, AESDATA0, AESDATA0
|
||||
vmovdqu8 AESDATA0, (DST){%k1}
|
||||
.else
|
||||
.if VL == 32
|
||||
|
|
@ -554,7 +544,7 @@
|
|||
// eliminates carries. |ctr| is the per-message block counter starting at 1.
|
||||
|
||||
.set VL, 16
|
||||
.set USE_AVX10, 0
|
||||
.set USE_AVX512, 0
|
||||
SYM_TYPED_FUNC_START(aes_ctr64_crypt_aesni_avx)
|
||||
_aes_ctr_crypt 0
|
||||
SYM_FUNC_END(aes_ctr64_crypt_aesni_avx)
|
||||
|
|
@ -564,7 +554,7 @@ SYM_FUNC_END(aes_xctr_crypt_aesni_avx)
|
|||
|
||||
#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
|
||||
.set VL, 32
|
||||
.set USE_AVX10, 0
|
||||
.set USE_AVX512, 0
|
||||
SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx2)
|
||||
_aes_ctr_crypt 0
|
||||
SYM_FUNC_END(aes_ctr64_crypt_vaes_avx2)
|
||||
|
|
@ -572,21 +562,12 @@ SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx2)
|
|||
_aes_ctr_crypt 1
|
||||
SYM_FUNC_END(aes_xctr_crypt_vaes_avx2)
|
||||
|
||||
.set VL, 32
|
||||
.set USE_AVX10, 1
|
||||
SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx10_256)
|
||||
_aes_ctr_crypt 0
|
||||
SYM_FUNC_END(aes_ctr64_crypt_vaes_avx10_256)
|
||||
SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx10_256)
|
||||
_aes_ctr_crypt 1
|
||||
SYM_FUNC_END(aes_xctr_crypt_vaes_avx10_256)
|
||||
|
||||
.set VL, 64
|
||||
.set USE_AVX10, 1
|
||||
SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx10_512)
|
||||
.set USE_AVX512, 1
|
||||
SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx512)
|
||||
_aes_ctr_crypt 0
|
||||
SYM_FUNC_END(aes_ctr64_crypt_vaes_avx10_512)
|
||||
SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx10_512)
|
||||
SYM_FUNC_END(aes_ctr64_crypt_vaes_avx512)
|
||||
SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx512)
|
||||
_aes_ctr_crypt 1
|
||||
SYM_FUNC_END(aes_xctr_crypt_vaes_avx10_512)
|
||||
SYM_FUNC_END(aes_xctr_crypt_vaes_avx512)
|
||||
#endif // CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ
|
||||
|
|
|
|||
|
|
@ -52,32 +52,25 @@
|
|||
* different code, it uses a macro to generate several implementations that
|
||||
* share similar source code but are targeted at different CPUs, listed below:
|
||||
*
|
||||
* AES-NI + AVX
|
||||
* AES-NI && AVX
|
||||
* - 128-bit vectors (1 AES block per vector)
|
||||
* - VEX-coded instructions
|
||||
* - xmm0-xmm15
|
||||
* - This is for older CPUs that lack VAES but do have AVX.
|
||||
*
|
||||
* VAES + VPCLMULQDQ + AVX2
|
||||
* VAES && VPCLMULQDQ && AVX2
|
||||
* - 256-bit vectors (2 AES blocks per vector)
|
||||
* - VEX-coded instructions
|
||||
* - ymm0-ymm15
|
||||
* - This is for CPUs that have VAES but lack AVX512 or AVX10,
|
||||
* e.g. Intel's Alder Lake and AMD's Zen 3.
|
||||
* - This is for CPUs that have VAES but either lack AVX512 (e.g. Intel's
|
||||
* Alder Lake and AMD's Zen 3) or downclock too eagerly when using zmm
|
||||
* registers (e.g. Intel's Ice Lake).
|
||||
*
|
||||
* VAES + VPCLMULQDQ + AVX10/256 + BMI2
|
||||
* - 256-bit vectors (2 AES blocks per vector)
|
||||
* VAES && VPCLMULQDQ && AVX512BW && AVX512VL && BMI2
|
||||
* - 512-bit vectors (4 AES blocks per vector)
|
||||
* - EVEX-coded instructions
|
||||
* - ymm0-ymm31
|
||||
* - This is for CPUs that have AVX512 but where using zmm registers causes
|
||||
* downclocking, and for CPUs that have AVX10/256 but not AVX10/512.
|
||||
* - By "AVX10/256" we really mean (AVX512BW + AVX512VL) || AVX10/256.
|
||||
* To avoid confusion with 512-bit, we just write AVX10/256.
|
||||
*
|
||||
* VAES + VPCLMULQDQ + AVX10/512 + BMI2
|
||||
* - Same as the previous one, but upgrades to 512-bit vectors
|
||||
* (4 AES blocks per vector) in zmm0-zmm31.
|
||||
* - This is for CPUs that have good AVX512 or AVX10/512 support.
|
||||
* - zmm0-zmm31
|
||||
* - This is for CPUs that have good AVX512 support.
|
||||
*
|
||||
* This file doesn't have an implementation for AES-NI alone (without AVX), as
|
||||
* the lack of VEX would make all the assembly code different.
|
||||
|
|
@ -109,7 +102,7 @@
|
|||
|
||||
// This table contains constants for vpshufb and vpblendvb, used to
|
||||
// handle variable byte shifts and blending during ciphertext stealing
|
||||
// on CPUs that don't support AVX10-style masking.
|
||||
// on CPUs that don't support AVX512-style masking.
|
||||
.Lcts_permute_table:
|
||||
.byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
|
||||
.byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
|
||||
|
|
@ -138,7 +131,7 @@
|
|||
.irp i, 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
|
||||
_define_Vi \i
|
||||
.endr
|
||||
.if USE_AVX10
|
||||
.if USE_AVX512
|
||||
.irp i, 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
|
||||
_define_Vi \i
|
||||
.endr
|
||||
|
|
@ -193,7 +186,7 @@
|
|||
// keys to the *end* of this register range. I.e., AES-128 uses
|
||||
// KEY5-KEY14, AES-192 uses KEY3-KEY14, and AES-256 uses KEY1-KEY14.
|
||||
// (All also use KEY0 for the XOR-only "round" at the beginning.)
|
||||
.if USE_AVX10
|
||||
.if USE_AVX512
|
||||
.set KEY1_XMM, %xmm16
|
||||
.set KEY1, V16
|
||||
.set KEY2_XMM, %xmm17
|
||||
|
|
@ -227,7 +220,6 @@
|
|||
.endm
|
||||
|
||||
// Move a vector between memory and a register.
|
||||
// The register operand must be in the first 16 vector registers.
|
||||
.macro _vmovdqu src, dst
|
||||
.if VL < 64
|
||||
vmovdqu \src, \dst
|
||||
|
|
@ -238,9 +230,9 @@
|
|||
|
||||
// Broadcast a 128-bit value into a vector.
|
||||
.macro _vbroadcast128 src, dst
|
||||
.if VL == 16 && !USE_AVX10
|
||||
.if VL == 16
|
||||
vmovdqu \src, \dst
|
||||
.elseif VL == 32 && !USE_AVX10
|
||||
.elseif VL == 32
|
||||
vbroadcasti128 \src, \dst
|
||||
.else
|
||||
vbroadcasti32x4 \src, \dst
|
||||
|
|
@ -248,7 +240,6 @@
|
|||
.endm
|
||||
|
||||
// XOR two vectors together.
|
||||
// Any register operands must be in the first 16 vector registers.
|
||||
.macro _vpxor src1, src2, dst
|
||||
.if VL < 64
|
||||
vpxor \src1, \src2, \dst
|
||||
|
|
@ -259,7 +250,7 @@
|
|||
|
||||
// XOR three vectors together.
|
||||
.macro _xor3 src1, src2, src3_and_dst
|
||||
.if USE_AVX10
|
||||
.if USE_AVX512
|
||||
// vpternlogd with immediate 0x96 is a three-argument XOR.
|
||||
vpternlogd $0x96, \src1, \src2, \src3_and_dst
|
||||
.else
|
||||
|
|
@ -274,7 +265,7 @@
|
|||
vpshufd $0x13, \src, \tmp
|
||||
vpaddq \src, \src, \dst
|
||||
vpsrad $31, \tmp, \tmp
|
||||
.if USE_AVX10
|
||||
.if USE_AVX512
|
||||
vpternlogd $0x78, GF_POLY_XMM, \tmp, \dst
|
||||
.else
|
||||
vpand GF_POLY_XMM, \tmp, \tmp
|
||||
|
|
@ -337,7 +328,7 @@
|
|||
vpsllq $1*VL/16, TWEAK0, TWEAK1
|
||||
vpsllq $2*VL/16, TWEAK0, TWEAK2
|
||||
vpsllq $3*VL/16, TWEAK0, TWEAK3
|
||||
.if USE_AVX10
|
||||
.if USE_AVX512
|
||||
vpternlogd $0x96, V0, V1, TWEAK1
|
||||
vpternlogd $0x96, V2, V3, TWEAK2
|
||||
vpternlogd $0x96, V4, V5, TWEAK3
|
||||
|
|
@ -474,26 +465,26 @@
|
|||
lea OFFS-16(KEY, KEYLEN64, 4), KEY
|
||||
|
||||
// If all 32 SIMD registers are available, cache all the round keys.
|
||||
.if USE_AVX10
|
||||
.if USE_AVX512
|
||||
cmp $24, KEYLEN
|
||||
jl .Laes128\@
|
||||
je .Laes192\@
|
||||
_vbroadcast128 -6*16(KEY), KEY1
|
||||
_vbroadcast128 -5*16(KEY), KEY2
|
||||
vbroadcasti32x4 -6*16(KEY), KEY1
|
||||
vbroadcasti32x4 -5*16(KEY), KEY2
|
||||
.Laes192\@:
|
||||
_vbroadcast128 -4*16(KEY), KEY3
|
||||
_vbroadcast128 -3*16(KEY), KEY4
|
||||
vbroadcasti32x4 -4*16(KEY), KEY3
|
||||
vbroadcasti32x4 -3*16(KEY), KEY4
|
||||
.Laes128\@:
|
||||
_vbroadcast128 -2*16(KEY), KEY5
|
||||
_vbroadcast128 -1*16(KEY), KEY6
|
||||
_vbroadcast128 0*16(KEY), KEY7
|
||||
_vbroadcast128 1*16(KEY), KEY8
|
||||
_vbroadcast128 2*16(KEY), KEY9
|
||||
_vbroadcast128 3*16(KEY), KEY10
|
||||
_vbroadcast128 4*16(KEY), KEY11
|
||||
_vbroadcast128 5*16(KEY), KEY12
|
||||
_vbroadcast128 6*16(KEY), KEY13
|
||||
_vbroadcast128 7*16(KEY), KEY14
|
||||
vbroadcasti32x4 -2*16(KEY), KEY5
|
||||
vbroadcasti32x4 -1*16(KEY), KEY6
|
||||
vbroadcasti32x4 0*16(KEY), KEY7
|
||||
vbroadcasti32x4 1*16(KEY), KEY8
|
||||
vbroadcasti32x4 2*16(KEY), KEY9
|
||||
vbroadcasti32x4 3*16(KEY), KEY10
|
||||
vbroadcasti32x4 4*16(KEY), KEY11
|
||||
vbroadcasti32x4 5*16(KEY), KEY12
|
||||
vbroadcasti32x4 6*16(KEY), KEY13
|
||||
vbroadcasti32x4 7*16(KEY), KEY14
|
||||
.endif
|
||||
.endm
|
||||
|
||||
|
|
@ -521,7 +512,7 @@
|
|||
// using the same key for all block(s). The round key is loaded from the
|
||||
// appropriate register or memory location for round \i. May clobber \tmp.
|
||||
.macro _vaes_1x enc, i, xmm_suffix, data, tmp
|
||||
.if USE_AVX10
|
||||
.if USE_AVX512
|
||||
_vaes \enc, KEY\i\xmm_suffix, \data
|
||||
.else
|
||||
.ifnb \xmm_suffix
|
||||
|
|
@ -538,7 +529,7 @@
|
|||
// appropriate register or memory location for round \i. In addition, does two
|
||||
// steps of the computation of the next set of tweaks. May clobber V4 and V5.
|
||||
.macro _vaes_4x enc, i
|
||||
.if USE_AVX10
|
||||
.if USE_AVX512
|
||||
_tweak_step (2*(\i-5))
|
||||
_vaes \enc, KEY\i, V0
|
||||
_vaes \enc, KEY\i, V1
|
||||
|
|
@ -574,7 +565,7 @@
|
|||
.irp i, 5,6,7,8,9,10,11,12,13
|
||||
_vaes_1x \enc, \i, \xmm_suffix, \data, tmp=\tmp
|
||||
.endr
|
||||
.if USE_AVX10
|
||||
.if USE_AVX512
|
||||
vpxord KEY14\xmm_suffix, \tweak, \tmp
|
||||
.else
|
||||
.ifnb \xmm_suffix
|
||||
|
|
@ -617,11 +608,11 @@
|
|||
// This is the main loop, en/decrypting 4*VL bytes per iteration.
|
||||
|
||||
// XOR each source block with its tweak and the zero-th round key.
|
||||
.if USE_AVX10
|
||||
_vmovdqu 0*VL(SRC), V0
|
||||
_vmovdqu 1*VL(SRC), V1
|
||||
_vmovdqu 2*VL(SRC), V2
|
||||
_vmovdqu 3*VL(SRC), V3
|
||||
.if USE_AVX512
|
||||
vmovdqu8 0*VL(SRC), V0
|
||||
vmovdqu8 1*VL(SRC), V1
|
||||
vmovdqu8 2*VL(SRC), V2
|
||||
vmovdqu8 3*VL(SRC), V3
|
||||
vpternlogd $0x96, TWEAK0, KEY0, V0
|
||||
vpternlogd $0x96, TWEAK1, KEY0, V1
|
||||
vpternlogd $0x96, TWEAK2, KEY0, V2
|
||||
|
|
@ -654,7 +645,7 @@
|
|||
// Reduce latency by doing the XOR before the vaesenclast, utilizing the
|
||||
// property vaesenclast(key, a) ^ b == vaesenclast(key ^ b, a)
|
||||
// (and likewise for vaesdeclast).
|
||||
.if USE_AVX10
|
||||
.if USE_AVX512
|
||||
_tweak_step 18
|
||||
_tweak_step 19
|
||||
vpxord TWEAK0, KEY14, V4
|
||||
|
|
@ -762,7 +753,7 @@
|
|||
_aes_crypt \enc, _XMM, TWEAK1_XMM, %xmm0, tmp=%xmm1
|
||||
.endif
|
||||
|
||||
.if USE_AVX10
|
||||
.if USE_AVX512
|
||||
// Create a mask that has the first LEN bits set.
|
||||
mov $-1, %r9d
|
||||
bzhi LEN, %r9d, %r9d
|
||||
|
|
@ -811,7 +802,7 @@
|
|||
// u8 iv[AES_BLOCK_SIZE]);
|
||||
//
|
||||
// Encrypt |iv| using the AES key |tweak_key| to get the first tweak. Assumes
|
||||
// that the CPU supports AES-NI and AVX, but not necessarily VAES or AVX10.
|
||||
// that the CPU supports AES-NI and AVX, but not necessarily VAES or AVX512.
|
||||
SYM_TYPED_FUNC_START(aes_xts_encrypt_iv)
|
||||
.set TWEAK_KEY, %rdi
|
||||
.set IV, %rsi
|
||||
|
|
@ -853,7 +844,7 @@ SYM_FUNC_END(aes_xts_encrypt_iv)
|
|||
// multiple of 16, then this function updates |tweak| to contain the next tweak.
|
||||
|
||||
.set VL, 16
|
||||
.set USE_AVX10, 0
|
||||
.set USE_AVX512, 0
|
||||
SYM_TYPED_FUNC_START(aes_xts_encrypt_aesni_avx)
|
||||
_aes_xts_crypt 1
|
||||
SYM_FUNC_END(aes_xts_encrypt_aesni_avx)
|
||||
|
|
@ -863,7 +854,7 @@ SYM_FUNC_END(aes_xts_decrypt_aesni_avx)
|
|||
|
||||
#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
|
||||
.set VL, 32
|
||||
.set USE_AVX10, 0
|
||||
.set USE_AVX512, 0
|
||||
SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx2)
|
||||
_aes_xts_crypt 1
|
||||
SYM_FUNC_END(aes_xts_encrypt_vaes_avx2)
|
||||
|
|
@ -871,21 +862,12 @@ SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx2)
|
|||
_aes_xts_crypt 0
|
||||
SYM_FUNC_END(aes_xts_decrypt_vaes_avx2)
|
||||
|
||||
.set VL, 32
|
||||
.set USE_AVX10, 1
|
||||
SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx10_256)
|
||||
_aes_xts_crypt 1
|
||||
SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_256)
|
||||
SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_256)
|
||||
_aes_xts_crypt 0
|
||||
SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_256)
|
||||
|
||||
.set VL, 64
|
||||
.set USE_AVX10, 1
|
||||
SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx10_512)
|
||||
.set USE_AVX512, 1
|
||||
SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx512)
|
||||
_aes_xts_crypt 1
|
||||
SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_512)
|
||||
SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_512)
|
||||
SYM_FUNC_END(aes_xts_encrypt_vaes_avx512)
|
||||
SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx512)
|
||||
_aes_xts_crypt 0
|
||||
SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_512)
|
||||
SYM_FUNC_END(aes_xts_decrypt_vaes_avx512)
|
||||
#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
|
||||
|
|
|
|||
|
|
@ -844,8 +844,7 @@ simd_skcipher_algs_##suffix[ARRAY_SIZE(skcipher_algs_##suffix)]
|
|||
DEFINE_AVX_SKCIPHER_ALGS(aesni_avx, "aesni-avx", 500);
|
||||
#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
|
||||
DEFINE_AVX_SKCIPHER_ALGS(vaes_avx2, "vaes-avx2", 600);
|
||||
DEFINE_AVX_SKCIPHER_ALGS(vaes_avx10_256, "vaes-avx10_256", 700);
|
||||
DEFINE_AVX_SKCIPHER_ALGS(vaes_avx10_512, "vaes-avx10_512", 800);
|
||||
DEFINE_AVX_SKCIPHER_ALGS(vaes_avx512, "vaes-avx512", 800);
|
||||
#endif
|
||||
|
||||
/* The common part of the x86_64 AES-GCM key struct */
|
||||
|
|
@ -1592,11 +1591,6 @@ static int __init register_avx_algs(void)
|
|||
XFEATURE_MASK_AVX512, NULL))
|
||||
return 0;
|
||||
|
||||
err = simd_register_skciphers_compat(skcipher_algs_vaes_avx10_256,
|
||||
ARRAY_SIZE(skcipher_algs_vaes_avx10_256),
|
||||
simd_skcipher_algs_vaes_avx10_256);
|
||||
if (err)
|
||||
return err;
|
||||
err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_256,
|
||||
ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256),
|
||||
aes_gcm_simdalgs_vaes_avx10_256);
|
||||
|
|
@ -1606,15 +1600,15 @@ static int __init register_avx_algs(void)
|
|||
if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(skcipher_algs_vaes_avx10_512); i++)
|
||||
skcipher_algs_vaes_avx10_512[i].base.cra_priority = 1;
|
||||
for (i = 0; i < ARRAY_SIZE(skcipher_algs_vaes_avx512); i++)
|
||||
skcipher_algs_vaes_avx512[i].base.cra_priority = 1;
|
||||
for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512); i++)
|
||||
aes_gcm_algs_vaes_avx10_512[i].base.cra_priority = 1;
|
||||
}
|
||||
|
||||
err = simd_register_skciphers_compat(skcipher_algs_vaes_avx10_512,
|
||||
ARRAY_SIZE(skcipher_algs_vaes_avx10_512),
|
||||
simd_skcipher_algs_vaes_avx10_512);
|
||||
err = simd_register_skciphers_compat(skcipher_algs_vaes_avx512,
|
||||
ARRAY_SIZE(skcipher_algs_vaes_avx512),
|
||||
simd_skcipher_algs_vaes_avx512);
|
||||
if (err)
|
||||
return err;
|
||||
err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_512,
|
||||
|
|
@ -1641,18 +1635,14 @@ static void unregister_avx_algs(void)
|
|||
simd_unregister_skciphers(skcipher_algs_vaes_avx2,
|
||||
ARRAY_SIZE(skcipher_algs_vaes_avx2),
|
||||
simd_skcipher_algs_vaes_avx2);
|
||||
if (simd_skcipher_algs_vaes_avx10_256[0])
|
||||
simd_unregister_skciphers(skcipher_algs_vaes_avx10_256,
|
||||
ARRAY_SIZE(skcipher_algs_vaes_avx10_256),
|
||||
simd_skcipher_algs_vaes_avx10_256);
|
||||
if (aes_gcm_simdalgs_vaes_avx10_256[0])
|
||||
simd_unregister_aeads(aes_gcm_algs_vaes_avx10_256,
|
||||
ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256),
|
||||
aes_gcm_simdalgs_vaes_avx10_256);
|
||||
if (simd_skcipher_algs_vaes_avx10_512[0])
|
||||
simd_unregister_skciphers(skcipher_algs_vaes_avx10_512,
|
||||
ARRAY_SIZE(skcipher_algs_vaes_avx10_512),
|
||||
simd_skcipher_algs_vaes_avx10_512);
|
||||
if (simd_skcipher_algs_vaes_avx512[0])
|
||||
simd_unregister_skciphers(skcipher_algs_vaes_avx512,
|
||||
ARRAY_SIZE(skcipher_algs_vaes_avx512),
|
||||
simd_skcipher_algs_vaes_avx512);
|
||||
if (aes_gcm_simdalgs_vaes_avx10_512[0])
|
||||
simd_unregister_aeads(aes_gcm_algs_vaes_avx10_512,
|
||||
ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512),
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user