lib/crypto: x86/sha512: Migrate optimized SHA-512 code to library

Instead of exposing the x86-optimized SHA-512 code via x86-specific
crypto_shash algorithms, instead just implement the sha512_blocks()
library function.  This is much simpler, it makes the SHA-512 (and
SHA-384) library functions be x86-optimized, and it fixes the
longstanding issue where the x86-optimized SHA-512 code was disabled by
default.  SHA-512 still remains available through crypto_shash, but
individual architectures no longer need to handle it.

To match sha512_blocks(), change the type of the nblocks parameter of
the assembly functions from int to size_t.  The assembly functions
actually already treated it as size_t.

Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250630160320.2888-15-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
This commit is contained in:
Eric Biggers 2025-06-30 09:03:18 -07:00
parent 02b35bab7e
commit 484c18119f
9 changed files with 72 additions and 352 deletions

View File

@ -390,19 +390,6 @@ config CRYPTO_SHA1_SSSE3
- AVX2 (Advanced Vector Extensions 2)
- SHA-NI (SHA Extensions New Instructions)
config CRYPTO_SHA512_SSSE3
tristate "Hash functions: SHA-384 and SHA-512 (SSSE3/AVX/AVX2)"
depends on 64BIT
select CRYPTO_SHA512
select CRYPTO_HASH
help
SHA-384 and SHA-512 secure hash algorithms (FIPS 180)
Architecture: x86_64 using:
- SSSE3 (Supplemental SSE3)
- AVX (Advanced Vector Extensions)
- AVX2 (Advanced Vector Extensions 2)
config CRYPTO_SM3_AVX_X86_64
tristate "Hash functions: SM3 (AVX)"
depends on 64BIT

View File

@ -54,9 +54,6 @@ endif
obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
sha1-ssse3-y := sha1_avx2_x86_64_asm.o sha1_ssse3_asm.o sha1_ni_asm.o sha1_ssse3_glue.o
obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o

View File

@ -1,322 +0,0 @@
/*
* Cryptographic API.
*
* Glue code for the SHA512 Secure Hash Algorithm assembler
* implementation using supplemental SSE3 / AVX / AVX2 instructions.
*
* This file is based on sha512_generic.c
*
* Copyright (C) 2013 Intel Corporation
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <asm/cpu_device_id.h>
#include <asm/simd.h>
#include <crypto/internal/hash.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <crypto/sha2.h>
#include <crypto/sha512_base.h>
asmlinkage void sha512_transform_ssse3(struct sha512_state *state,
const u8 *data, int blocks);
static int sha512_update_x86(struct shash_desc *desc, const u8 *data,
unsigned int len, sha512_block_fn *sha512_xform)
{
int remain;
/*
* Make sure struct sha512_state begins directly with the SHA512
* 512-bit internal state, as this is what the asm functions expect.
*/
BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0);
kernel_fpu_begin();
remain = sha512_base_do_update_blocks(desc, data, len, sha512_xform);
kernel_fpu_end();
return remain;
}
static int sha512_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out, sha512_block_fn *sha512_xform)
{
kernel_fpu_begin();
sha512_base_do_finup(desc, data, len, sha512_xform);
kernel_fpu_end();
return sha512_base_finish(desc, out);
}
static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
return sha512_update_x86(desc, data, len, sha512_transform_ssse3);
}
static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return sha512_finup(desc, data, len, out, sha512_transform_ssse3);
}
static struct shash_alg sha512_ssse3_algs[] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_base_init,
.update = sha512_ssse3_update,
.finup = sha512_ssse3_finup,
.descsize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-ssse3",
.cra_priority = 150,
.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_base_init,
.update = sha512_ssse3_update,
.finup = sha512_ssse3_finup,
.descsize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-ssse3",
.cra_priority = 150,
.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static int register_sha512_ssse3(void)
{
if (boot_cpu_has(X86_FEATURE_SSSE3))
return crypto_register_shashes(sha512_ssse3_algs,
ARRAY_SIZE(sha512_ssse3_algs));
return 0;
}
static void unregister_sha512_ssse3(void)
{
if (boot_cpu_has(X86_FEATURE_SSSE3))
crypto_unregister_shashes(sha512_ssse3_algs,
ARRAY_SIZE(sha512_ssse3_algs));
}
asmlinkage void sha512_transform_avx(struct sha512_state *state,
const u8 *data, int blocks);
static bool avx_usable(void)
{
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
if (boot_cpu_has(X86_FEATURE_AVX))
pr_info("AVX detected but unusable.\n");
return false;
}
return true;
}
static int sha512_avx_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
return sha512_update_x86(desc, data, len, sha512_transform_avx);
}
static int sha512_avx_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return sha512_finup(desc, data, len, out, sha512_transform_avx);
}
static struct shash_alg sha512_avx_algs[] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_base_init,
.update = sha512_avx_update,
.finup = sha512_avx_finup,
.descsize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-avx",
.cra_priority = 160,
.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_base_init,
.update = sha512_avx_update,
.finup = sha512_avx_finup,
.descsize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-avx",
.cra_priority = 160,
.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static int register_sha512_avx(void)
{
if (avx_usable())
return crypto_register_shashes(sha512_avx_algs,
ARRAY_SIZE(sha512_avx_algs));
return 0;
}
static void unregister_sha512_avx(void)
{
if (avx_usable())
crypto_unregister_shashes(sha512_avx_algs,
ARRAY_SIZE(sha512_avx_algs));
}
asmlinkage void sha512_transform_rorx(struct sha512_state *state,
const u8 *data, int blocks);
static int sha512_avx2_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
return sha512_update_x86(desc, data, len, sha512_transform_rorx);
}
static int sha512_avx2_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return sha512_finup(desc, data, len, out, sha512_transform_rorx);
}
static struct shash_alg sha512_avx2_algs[] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_base_init,
.update = sha512_avx2_update,
.finup = sha512_avx2_finup,
.descsize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-avx2",
.cra_priority = 170,
.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_base_init,
.update = sha512_avx2_update,
.finup = sha512_avx2_finup,
.descsize = SHA512_STATE_SIZE,
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-avx2",
.cra_priority = 170,
.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
CRYPTO_AHASH_ALG_FINUP_MAX,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static bool avx2_usable(void)
{
if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) &&
boot_cpu_has(X86_FEATURE_BMI2))
return true;
return false;
}
static int register_sha512_avx2(void)
{
if (avx2_usable())
return crypto_register_shashes(sha512_avx2_algs,
ARRAY_SIZE(sha512_avx2_algs));
return 0;
}
static const struct x86_cpu_id module_cpu_ids[] = {
X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL),
{}
};
MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
static void unregister_sha512_avx2(void)
{
if (avx2_usable())
crypto_unregister_shashes(sha512_avx2_algs,
ARRAY_SIZE(sha512_avx2_algs));
}
static int __init sha512_ssse3_mod_init(void)
{
if (!x86_match_cpu(module_cpu_ids))
return -ENODEV;
if (register_sha512_ssse3())
goto fail;
if (register_sha512_avx()) {
unregister_sha512_ssse3();
goto fail;
}
if (register_sha512_avx2()) {
unregister_sha512_avx();
unregister_sha512_ssse3();
goto fail;
}
return 0;
fail:
return -ENODEV;
}
static void __exit sha512_ssse3_mod_fini(void)
{
unregister_sha512_avx2();
unregister_sha512_avx();
unregister_sha512_ssse3();
}
module_init(sha512_ssse3_mod_init);
module_exit(sha512_ssse3_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated");
MODULE_ALIAS_CRYPTO("sha512");
MODULE_ALIAS_CRYPTO("sha512-ssse3");
MODULE_ALIAS_CRYPTO("sha512-avx");
MODULE_ALIAS_CRYPTO("sha512-avx2");
MODULE_ALIAS_CRYPTO("sha384");
MODULE_ALIAS_CRYPTO("sha384-ssse3");
MODULE_ALIAS_CRYPTO("sha384-avx");
MODULE_ALIAS_CRYPTO("sha384-avx2");

View File

@ -183,6 +183,7 @@ config CRYPTO_LIB_SHA512_ARCH
default y if RISCV && 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
default y if S390
default y if SPARC64
default y if X86_64
config CRYPTO_LIB_SM3
tristate

View File

@ -95,6 +95,9 @@ endif
libsha512-$(CONFIG_RISCV) += riscv/sha512-riscv64-zvknhb-zvkb.o
libsha512-$(CONFIG_SPARC) += sparc/sha512_asm.o
libsha512-$(CONFIG_X86) += x86/sha512-ssse3-asm.o \
x86/sha512-avx-asm.o \
x86/sha512-avx2-asm.o
endif # CONFIG_CRYPTO_LIB_SHA512_ARCH
obj-$(CONFIG_MPILIB) += mpi/

View File

@ -48,7 +48,6 @@
########################################################################
#include <linux/linkage.h>
#include <linux/cfi_types.h>
.text
@ -267,14 +266,16 @@ frame_size = frame_WK + WK_SIZE
.endm
########################################################################
# void sha512_transform_avx(sha512_state *state, const u8 *data, int blocks)
# void sha512_transform_avx(struct sha512_block_state *state,
# const u8 *data, size_t nblocks);
# Purpose: Updates the SHA512 digest stored at "state" with the message
# stored in "data".
# The size of the message pointed to by "data" must be an integer multiple
# of SHA512 message blocks.
# "blocks" is the message length in SHA512 blocks
# "nblocks" is the message length in SHA512 blocks
########################################################################
SYM_TYPED_FUNC_START(sha512_transform_avx)
SYM_FUNC_START(sha512_transform_avx)
test msglen, msglen
je .Lnowork

View File

@ -50,7 +50,6 @@
########################################################################
#include <linux/linkage.h>
#include <linux/cfi_types.h>
.text
@ -559,14 +558,16 @@ frame_size = frame_CTX + CTX_SIZE
.endm
########################################################################
# void sha512_transform_rorx(sha512_state *state, const u8 *data, int blocks)
# void sha512_transform_rorx(struct sha512_block_state *state,
# const u8 *data, size_t nblocks);
# Purpose: Updates the SHA512 digest stored at "state" with the message
# stored in "data".
# The size of the message pointed to by "data" must be an integer multiple
# of SHA512 message blocks.
# "blocks" is the message length in SHA512 blocks
# "nblocks" is the message length in SHA512 blocks
########################################################################
SYM_TYPED_FUNC_START(sha512_transform_rorx)
SYM_FUNC_START(sha512_transform_rorx)
# Save GPRs
push %rbx
push %r12

View File

@ -48,7 +48,6 @@
########################################################################
#include <linux/linkage.h>
#include <linux/cfi_types.h>
.text
@ -266,16 +265,15 @@ frame_size = frame_WK + WK_SIZE
.endm
########################################################################
## void sha512_transform_ssse3(struct sha512_state *state, const u8 *data,
## int blocks);
# (struct sha512_state is assumed to begin with u64 state[8])
# void sha512_transform_ssse3(struct sha512_block_state *state,
# const u8 *data, size_t nblocks);
# Purpose: Updates the SHA512 digest stored at "state" with the message
# stored in "data".
# The size of the message pointed to by "data" must be an integer multiple
# of SHA512 message blocks.
# "blocks" is the message length in SHA512 blocks.
# "nblocks" is the message length in SHA512 blocks
########################################################################
SYM_TYPED_FUNC_START(sha512_transform_ssse3)
SYM_FUNC_START(sha512_transform_ssse3)
test msglen, msglen
je .Lnowork

54
lib/crypto/x86/sha512.h Normal file
View File

@ -0,0 +1,54 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* x86-optimized SHA-512 block function
*
* Copyright 2025 Google LLC
*/
#include <asm/fpu/api.h>
#include <crypto/internal/simd.h>
#include <linux/static_call.h>
DEFINE_STATIC_CALL(sha512_blocks_x86, sha512_blocks_generic);
#define DEFINE_X86_SHA512_FN(c_fn, asm_fn) \
asmlinkage void asm_fn(struct sha512_block_state *state, \
const u8 *data, size_t nblocks); \
static void c_fn(struct sha512_block_state *state, const u8 *data, \
size_t nblocks) \
{ \
if (likely(crypto_simd_usable())) { \
kernel_fpu_begin(); \
asm_fn(state, data, nblocks); \
kernel_fpu_end(); \
} else { \
sha512_blocks_generic(state, data, nblocks); \
} \
}
DEFINE_X86_SHA512_FN(sha512_blocks_ssse3, sha512_transform_ssse3);
DEFINE_X86_SHA512_FN(sha512_blocks_avx, sha512_transform_avx);
DEFINE_X86_SHA512_FN(sha512_blocks_avx2, sha512_transform_rorx);
static void sha512_blocks(struct sha512_block_state *state,
const u8 *data, size_t nblocks)
{
static_call(sha512_blocks_x86)(state, data, nblocks);
}
#define sha512_mod_init_arch sha512_mod_init_arch
static inline void sha512_mod_init_arch(void)
{
if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL) &&
boot_cpu_has(X86_FEATURE_AVX)) {
if (boot_cpu_has(X86_FEATURE_AVX2) &&
boot_cpu_has(X86_FEATURE_BMI2))
static_call_update(sha512_blocks_x86,
sha512_blocks_avx2);
else
static_call_update(sha512_blocks_x86,
sha512_blocks_avx);
} else if (boot_cpu_has(X86_FEATURE_SSSE3)) {
static_call_update(sha512_blocks_x86, sha512_blocks_ssse3);
}
}