lib/crypto: arm64/aes: Migrate optimized CBC-based MACs into library

Instead of exposing the arm64-optimized CMAC, XCBC-MAC, and CBC-MAC code
via arm64-specific crypto_shash algorithms, instead just implement the
aes_cbcmac_blocks_arch() library function.  This is much simpler, it
makes the corresponding library functions be arm64-optimized, and it
fixes the longstanding issue where this optimized code was disabled by
default.  The corresponding algorithms still remain available through
crypto_shash, but individual architectures no longer need to handle it.

Note that to be compatible with the library using 'size_t' lengths, the
type of the return value and 'blocks' parameter to the assembly
functions had to be changed to 'size_t', and the assembly code had to be
updated accordingly to use the corresponding 64-bit registers.

Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20260218213501.136844-6-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
This commit is contained in:
Eric Biggers 2026-02-18 13:34:51 -08:00
parent 4b90840320
commit 58286738b1
5 changed files with 61 additions and 231 deletions

View File

@ -144,7 +144,7 @@ config CRYPTO_AES_ARM64_CE_CCM
select CRYPTO_ALGAPI
select CRYPTO_AES_ARM64_CE_BLK
select CRYPTO_AEAD
select CRYPTO_LIB_AES
select CRYPTO_LIB_AES_CBC_MACS
help
AEAD cipher: AES cipher algorithms (FIPS-197) with
CCM (Counter with Cipher Block Chaining-Message Authentication Code)

View File

@ -7,7 +7,6 @@
#include <crypto/aes.h>
#include <crypto/ctr.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <crypto/sha2.h>
@ -37,7 +36,6 @@
#define aes_xctr_encrypt ce_aes_xctr_encrypt
#define aes_xts_encrypt ce_aes_xts_encrypt
#define aes_xts_decrypt ce_aes_xts_decrypt
#define aes_mac_update ce_aes_mac_update
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS/XCTR using ARMv8 Crypto Extensions");
#else
#define MODE "neon"
@ -54,7 +52,6 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS/XCTR using ARMv8 Crypto Extensions");
#define aes_xctr_encrypt neon_aes_xctr_encrypt
#define aes_xts_encrypt neon_aes_xts_encrypt
#define aes_xts_decrypt neon_aes_xts_decrypt
#define aes_mac_update neon_aes_mac_update
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS/XCTR using ARMv8 NEON");
#endif
#if defined(USE_V8_CRYPTO_EXTENSIONS) || !IS_ENABLED(CONFIG_CRYPTO_AES_ARM64_BS)
@ -66,9 +63,6 @@ MODULE_ALIAS_CRYPTO("xctr(aes)");
#endif
MODULE_ALIAS_CRYPTO("cts(cbc(aes))");
MODULE_ALIAS_CRYPTO("essiv(cbc(aes),sha256)");
MODULE_ALIAS_CRYPTO("cmac(aes)");
MODULE_ALIAS_CRYPTO("xcbc(aes)");
MODULE_ALIAS_CRYPTO("cbcmac(aes)");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_IMPORT_NS("CRYPTO_INTERNAL");
@ -84,15 +78,6 @@ struct crypto_aes_essiv_cbc_ctx {
struct crypto_aes_ctx __aligned(8) key2;
};
struct mac_tfm_ctx {
struct crypto_aes_ctx key;
u8 __aligned(8) consts[];
};
struct mac_desc_ctx {
u8 dg[AES_BLOCK_SIZE];
};
static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
@ -723,211 +708,14 @@ static struct skcipher_alg aes_algs[] = { {
.decrypt = essiv_cbc_decrypt,
} };
static int cbcmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
unsigned int key_len)
{
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
return aes_expandkey(&ctx->key, in_key, key_len);
}
static void cmac_gf128_mul_by_x(be128 *y, const be128 *x)
{
u64 a = be64_to_cpu(x->a);
u64 b = be64_to_cpu(x->b);
y->a = cpu_to_be64((a << 1) | (b >> 63));
y->b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0));
}
static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
unsigned int key_len)
{
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
be128 *consts = (be128 *)ctx->consts;
int rounds = 6 + key_len / 4;
int err;
err = cbcmac_setkey(tfm, in_key, key_len);
if (err)
return err;
/* encrypt the zero vector */
scoped_ksimd()
aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){},
ctx->key.key_enc, rounds, 1);
cmac_gf128_mul_by_x(consts, consts);
cmac_gf128_mul_by_x(consts + 1, consts);
return 0;
}
static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
unsigned int key_len)
{
static u8 const ks[3][AES_BLOCK_SIZE] = {
{ [0 ... AES_BLOCK_SIZE - 1] = 0x1 },
{ [0 ... AES_BLOCK_SIZE - 1] = 0x2 },
{ [0 ... AES_BLOCK_SIZE - 1] = 0x3 },
};
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
int rounds = 6 + key_len / 4;
u8 key[AES_BLOCK_SIZE];
int err;
err = cbcmac_setkey(tfm, in_key, key_len);
if (err)
return err;
scoped_ksimd() {
aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1);
aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2);
}
return cbcmac_setkey(tfm, key, sizeof(key));
}
static int mac_init(struct shash_desc *desc)
{
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
memset(ctx->dg, 0, AES_BLOCK_SIZE);
return 0;
}
static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks,
u8 dg[], int enc_before)
{
int rounds = 6 + ctx->key_length / 4;
int rem;
do {
scoped_ksimd()
rem = aes_mac_update(in, ctx->key_enc, rounds, blocks,
dg, enc_before, !enc_before);
in += (blocks - rem) * AES_BLOCK_SIZE;
blocks = rem;
} while (blocks);
}
static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
{
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
int blocks = len / AES_BLOCK_SIZE;
len %= AES_BLOCK_SIZE;
mac_do_update(&tctx->key, p, blocks, ctx->dg, 0);
return len;
}
static int cbcmac_finup(struct shash_desc *desc, const u8 *src,
unsigned int len, u8 *out)
{
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
if (len) {
crypto_xor(ctx->dg, src, len);
mac_do_update(&tctx->key, NULL, 0, ctx->dg, 1);
}
memcpy(out, ctx->dg, AES_BLOCK_SIZE);
return 0;
}
static int cmac_finup(struct shash_desc *desc, const u8 *src, unsigned int len,
u8 *out)
{
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
u8 *consts = tctx->consts;
crypto_xor(ctx->dg, src, len);
if (len != AES_BLOCK_SIZE) {
ctx->dg[len] ^= 0x80;
consts += AES_BLOCK_SIZE;
}
mac_do_update(&tctx->key, consts, 1, ctx->dg, 0);
memcpy(out, ctx->dg, AES_BLOCK_SIZE);
return 0;
}
static struct shash_alg mac_algs[] = { {
.base.cra_name = "cmac(aes)",
.base.cra_driver_name = "cmac-aes-" MODE,
.base.cra_priority = PRIO,
.base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
CRYPTO_AHASH_ALG_FINAL_NONZERO,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx) +
2 * AES_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
.digestsize = AES_BLOCK_SIZE,
.init = mac_init,
.update = mac_update,
.finup = cmac_finup,
.setkey = cmac_setkey,
.descsize = sizeof(struct mac_desc_ctx),
}, {
.base.cra_name = "xcbc(aes)",
.base.cra_driver_name = "xcbc-aes-" MODE,
.base.cra_priority = PRIO,
.base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY |
CRYPTO_AHASH_ALG_FINAL_NONZERO,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx) +
2 * AES_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
.digestsize = AES_BLOCK_SIZE,
.init = mac_init,
.update = mac_update,
.finup = cmac_finup,
.setkey = xcbc_setkey,
.descsize = sizeof(struct mac_desc_ctx),
}, {
.base.cra_name = "cbcmac(aes)",
.base.cra_driver_name = "cbcmac-aes-" MODE,
.base.cra_priority = PRIO,
.base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx),
.base.cra_module = THIS_MODULE,
.digestsize = AES_BLOCK_SIZE,
.init = mac_init,
.update = mac_update,
.finup = cbcmac_finup,
.setkey = cbcmac_setkey,
.descsize = sizeof(struct mac_desc_ctx),
} };
static void aes_exit(void)
{
crypto_unregister_shashes(mac_algs, ARRAY_SIZE(mac_algs));
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
}
static int __init aes_init(void)
{
int err;
err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
if (err)
return err;
err = crypto_register_shashes(mac_algs, ARRAY_SIZE(mac_algs));
if (err)
goto unregister_ciphers;
return 0;
unregister_ciphers:
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
return err;
return crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
}
#ifdef USE_V8_CRYPTO_EXTENSIONS

View File

@ -200,9 +200,6 @@ asmlinkage void neon_aes_essiv_cbc_decrypt(u8 out[], u8 const in[],
u32 const rk1[], int rounds,
int blocks, u8 iv[],
u32 const rk2[]);
asmlinkage int neon_aes_mac_update(u8 const in[], u32 const rk[], int rounds,
int blocks, u8 dg[], int enc_before,
int enc_after);
asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks);
@ -233,9 +230,9 @@ asmlinkage void ce_aes_essiv_cbc_encrypt(u8 out[], u8 const in[],
asmlinkage void ce_aes_essiv_cbc_decrypt(u8 out[], u8 const in[],
u32 const rk1[], int rounds,
int blocks, u8 iv[], u32 const rk2[]);
asmlinkage int ce_aes_mac_update(u8 const in[], u32 const rk[], int rounds,
int blocks, u8 dg[], int enc_before,
int enc_after);
asmlinkage size_t ce_aes_mac_update(u8 const in[], u32 const rk[], int rounds,
size_t blocks, u8 dg[], int enc_before,
int enc_after);
#elif defined(CONFIG_PPC)
void ppc_expand_key_128(u32 *key_enc, const u8 *key);
void ppc_expand_key_192(u32 *key_enc, const u8 *key);

View File

@ -815,9 +815,11 @@ AES_FUNC_START(aes_xts_decrypt)
b .Lxtsdecctsout
AES_FUNC_END(aes_xts_decrypt)
#if IS_ENABLED(CONFIG_CRYPTO_LIB_AES_CBC_MACS)
/*
* aes_mac_update(u8 const in[], u32 const rk[], int rounds,
* int blocks, u8 dg[], int enc_before, int enc_after)
* size_t aes_mac_update(u8 const in[], u32 const rk[], int rounds,
* size_t blocks, u8 dg[], int enc_before,
* int enc_after);
*/
AES_FUNC_START(aes_mac_update)
ld1 {v0.16b}, [x4] /* get dg */
@ -827,7 +829,7 @@ AES_FUNC_START(aes_mac_update)
encrypt_block v0, w2, x1, x7, w8
.Lmacloop4x:
subs w3, w3, #4
subs x3, x3, #4
bmi .Lmac1x
ld1 {v1.16b-v4.16b}, [x0], #64 /* get next pt block */
eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
@ -837,7 +839,7 @@ AES_FUNC_START(aes_mac_update)
eor v0.16b, v0.16b, v3.16b
encrypt_block v0, w2, x1, x7, w8
eor v0.16b, v0.16b, v4.16b
cmp w3, wzr
cmp x3, xzr
csinv w5, w6, wzr, eq
cbz w5, .Lmacout
encrypt_block v0, w2, x1, x7, w8
@ -845,13 +847,13 @@ AES_FUNC_START(aes_mac_update)
cond_yield .Lmacout, x7, x8
b .Lmacloop4x
.Lmac1x:
add w3, w3, #4
add x3, x3, #4
.Lmacloop:
cbz w3, .Lmacout
cbz x3, .Lmacout
ld1 {v1.16b}, [x0], #16 /* get next pt block */
eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
subs w3, w3, #1
subs x3, x3, #1
csinv w5, w6, wzr, eq
cbz w5, .Lmacout
@ -861,6 +863,7 @@ AES_FUNC_START(aes_mac_update)
.Lmacout:
st1 {v0.16b}, [x4] /* return dg */
mov w0, w3
mov x0, x3
ret
AES_FUNC_END(aes_mac_update)
#endif /* CONFIG_CRYPTO_LIB_AES_CBC_MACS */

View File

@ -11,6 +11,7 @@
#include <linux/unaligned.h>
#include <linux/cpufeature.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_aes);
struct aes_block {
@ -28,6 +29,9 @@ asmlinkage void __aes_ce_decrypt(const u32 inv_rk[], u8 out[AES_BLOCK_SIZE],
asmlinkage u32 __aes_ce_sub(u32 l);
asmlinkage void __aes_ce_invert(struct aes_block *out,
const struct aes_block *in);
asmlinkage size_t neon_aes_mac_update(u8 const in[], u32 const rk[], int rounds,
size_t blocks, u8 dg[], int enc_before,
int enc_after);
/*
* Expand an AES key using the crypto extensions if supported and usable or
@ -139,7 +143,6 @@ EXPORT_SYMBOL_NS_GPL(neon_aes_xts_encrypt, "CRYPTO_INTERNAL");
EXPORT_SYMBOL_NS_GPL(neon_aes_xts_decrypt, "CRYPTO_INTERNAL");
EXPORT_SYMBOL_NS_GPL(neon_aes_essiv_cbc_encrypt, "CRYPTO_INTERNAL");
EXPORT_SYMBOL_NS_GPL(neon_aes_essiv_cbc_decrypt, "CRYPTO_INTERNAL");
EXPORT_SYMBOL_NS_GPL(neon_aes_mac_update, "CRYPTO_INTERNAL");
EXPORT_SYMBOL_NS_GPL(ce_aes_ecb_encrypt, "CRYPTO_INTERNAL");
EXPORT_SYMBOL_NS_GPL(ce_aes_ecb_decrypt, "CRYPTO_INTERNAL");
@ -153,6 +156,8 @@ EXPORT_SYMBOL_NS_GPL(ce_aes_xts_encrypt, "CRYPTO_INTERNAL");
EXPORT_SYMBOL_NS_GPL(ce_aes_xts_decrypt, "CRYPTO_INTERNAL");
EXPORT_SYMBOL_NS_GPL(ce_aes_essiv_cbc_encrypt, "CRYPTO_INTERNAL");
EXPORT_SYMBOL_NS_GPL(ce_aes_essiv_cbc_decrypt, "CRYPTO_INTERNAL");
#endif
#if IS_MODULE(CONFIG_CRYPTO_AES_ARM64_CE_CCM)
EXPORT_SYMBOL_NS_GPL(ce_aes_mac_update, "CRYPTO_INTERNAL");
#endif
@ -184,11 +189,48 @@ static void aes_decrypt_arch(const struct aes_key *key,
}
}
#if IS_ENABLED(CONFIG_CRYPTO_LIB_AES_CBC_MACS)
#define aes_cbcmac_blocks_arch aes_cbcmac_blocks_arch
static bool aes_cbcmac_blocks_arch(u8 h[AES_BLOCK_SIZE],
const struct aes_enckey *key, const u8 *data,
size_t nblocks, bool enc_before,
bool enc_after)
{
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_neon) && likely(may_use_simd())) {
do {
size_t rem;
scoped_ksimd() {
if (static_branch_likely(&have_aes))
rem = ce_aes_mac_update(
data, key->k.rndkeys,
key->nrounds, nblocks, h,
enc_before, enc_after);
else
rem = neon_aes_mac_update(
data, key->k.rndkeys,
key->nrounds, nblocks, h,
enc_before, enc_after);
}
data += (nblocks - rem) * AES_BLOCK_SIZE;
nblocks = rem;
enc_before = false;
} while (nblocks);
return true;
}
return false;
}
#endif /* CONFIG_CRYPTO_LIB_AES_CBC_MACS */
#ifdef CONFIG_KERNEL_MODE_NEON
#define aes_mod_init_arch aes_mod_init_arch
static void aes_mod_init_arch(void)
{
if (cpu_have_named_feature(AES))
static_branch_enable(&have_aes);
if (cpu_have_named_feature(ASIMD)) {
static_branch_enable(&have_neon);
if (cpu_have_named_feature(AES))
static_branch_enable(&have_aes);
}
}
#endif /* CONFIG_KERNEL_MODE_NEON */