From ee908cc15dda86b8e7f57a1eb62eace469e4477d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 6 Mar 2014 16:23:33 +0800 Subject: [PATCH 01/30] arm64/crypto: SHA-1 using ARMv8 Crypto Extensions This patch adds support for the SHA-1 Secure Hash Algorithm for CPUs that have support for the SHA-1 part of the ARM v8 Crypto Extensions. Signed-off-by: Ard Biesheuvel Acked-by: Herbert Xu (cherry picked from commit 2c98833a42cd194ba0f537cd21917e15e5593715) Signed-off-by: Mark Brown Conflicts: arch/arm64/Makefile --- arch/arm64/Kconfig | 3 + arch/arm64/Makefile | 1 + arch/arm64/crypto/Kconfig | 16 +++ arch/arm64/crypto/Makefile | 12 +++ arch/arm64/crypto/sha1-ce-core.S | 153 +++++++++++++++++++++++++++ arch/arm64/crypto/sha1-ce-glue.c | 174 +++++++++++++++++++++++++++++++ 6 files changed, 359 insertions(+) create mode 100644 arch/arm64/crypto/Kconfig create mode 100644 arch/arm64/crypto/Makefile create mode 100644 arch/arm64/crypto/sha1-ce-core.S create mode 100644 arch/arm64/crypto/sha1-ce-glue.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 56b3f6d447ae..f7ef3f6ee979 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -240,5 +240,8 @@ source "arch/arm64/Kconfig.debug" source "security/Kconfig" source "crypto/Kconfig" +if CRYPTO +source "arch/arm64/crypto/Kconfig" +endif source "lib/Kconfig" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index c95c5cb212fd..793247fcb83d 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -37,6 +37,7 @@ TEXT_OFFSET := 0x00080000 export TEXT_OFFSET GZFLAGS core-y += arch/arm64/kernel/ arch/arm64/mm/ +core-$(CONFIG_CRYPTO) += arch/arm64/crypto/ libs-y := arch/arm64/lib/ $(libs-y) libs-y += $(LIBGCC) diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig new file mode 100644 index 000000000000..7956881b5986 --- /dev/null +++ b/arch/arm64/crypto/Kconfig @@ -0,0 +1,16 @@ + +menuconfig ARM64_CRYPTO + bool "ARM64 Accelerated Cryptographic Algorithms" + depends on ARM64 + help + Say Y here to choose from a selection of cryptographic algorithms + implemented using ARM64 specific CPU features or instructions. + +if ARM64_CRYPTO + +config CRYPTO_SHA1_ARM64_CE + tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_HASH + +endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile new file mode 100644 index 000000000000..0ed3caaec81b --- /dev/null +++ b/arch/arm64/crypto/Makefile @@ -0,0 +1,12 @@ +# +# linux/arch/arm64/crypto/Makefile +# +# Copyright (C) 2014 Linaro Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# + +obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o +sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S new file mode 100644 index 000000000000..09d57d98609c --- /dev/null +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -0,0 +1,153 @@ +/* + * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions + * + * Copyright (C) 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + + .text + .arch armv8-a+crypto + + k0 .req v0 + k1 .req v1 + k2 .req v2 + k3 .req v3 + + t0 .req v4 + t1 .req v5 + + dga .req q6 + dgav .req v6 + dgb .req s7 + dgbv .req v7 + + dg0q .req q12 + dg0s .req s12 + dg0v .req v12 + dg1s .req s13 + dg1v .req v13 + dg2s .req s14 + + .macro add_only, op, ev, rc, s0, dg1 + .ifc \ev, ev + add t1.4s, v\s0\().4s, \rc\().4s + sha1h dg2s, dg0s + .ifnb \dg1 + sha1\op dg0q, \dg1, t0.4s + .else + sha1\op dg0q, dg1s, t0.4s + .endif + .else + .ifnb \s0 + add t0.4s, v\s0\().4s, \rc\().4s + .endif + sha1h dg1s, dg0s + sha1\op dg0q, dg2s, t1.4s + .endif + .endm + + .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 + sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s + add_only \op, \ev, \rc, \s1, \dg1 + sha1su1 v\s0\().4s, v\s3\().4s + .endm + + /* + * The SHA1 round constants + */ + .align 4 +.Lsha1_rcon: + .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 + + /* + * void sha1_ce_transform(int blocks, u8 const *src, u32 *state, + * u8 *head, long bytes) + */ +ENTRY(sha1_ce_transform) + /* load round constants */ + adr x6, .Lsha1_rcon + ld1r {k0.4s}, [x6], #4 + ld1r {k1.4s}, [x6], #4 + ld1r {k2.4s}, [x6], #4 + ld1r {k3.4s}, [x6] + + /* load state */ + ldr dga, [x2] + ldr dgb, [x2, #16] + + /* load partial state (if supplied) */ + cbz x3, 0f + ld1 {v8.4s-v11.4s}, [x3] + b 1f + + /* load input */ +0: ld1 {v8.4s-v11.4s}, [x1], #64 + sub w0, w0, #1 + +1: +CPU_LE( rev32 v8.16b, v8.16b ) +CPU_LE( rev32 v9.16b, v9.16b ) +CPU_LE( rev32 v10.16b, v10.16b ) +CPU_LE( rev32 v11.16b, v11.16b ) + +2: add t0.4s, v8.4s, k0.4s + mov dg0v.16b, dgav.16b + + add_update c, ev, k0, 8, 9, 10, 11, dgb + add_update c, od, k0, 9, 10, 11, 8 + add_update c, ev, k0, 10, 11, 8, 9 + add_update c, od, k0, 11, 8, 9, 10 + add_update c, ev, k1, 8, 9, 10, 11 + + add_update p, od, k1, 9, 10, 11, 8 + add_update p, ev, k1, 10, 11, 8, 9 + add_update p, od, k1, 11, 8, 9, 10 + add_update p, ev, k1, 8, 9, 10, 11 + add_update p, od, k2, 9, 10, 11, 8 + + add_update m, ev, k2, 10, 11, 8, 9 + add_update m, od, k2, 11, 8, 9, 10 + add_update m, ev, k2, 8, 9, 10, 11 + add_update m, od, k2, 9, 10, 11, 8 + add_update m, ev, k3, 10, 11, 8, 9 + + add_update p, od, k3, 11, 8, 9, 10 + add_only p, ev, k3, 9 + add_only p, od, k3, 10 + add_only p, ev, k3, 11 + add_only p, od + + /* update state */ + add dgbv.2s, dgbv.2s, dg1v.2s + add dgav.4s, dgav.4s, dg0v.4s + + cbnz w0, 0b + + /* + * Final block: add padding and total bit count. + * Skip if we have no total byte count in x4. In that case, the input + * size was not a round multiple of the block size, and the padding is + * handled by the C code. + */ + cbz x4, 3f + movi v9.2d, #0 + mov x8, #0x80000000 + movi v10.2d, #0 + ror x7, x4, #29 // ror(lsl(x4, 3), 32) + fmov d8, x8 + mov x4, #0 + mov v11.d[0], xzr + mov v11.d[1], x7 + b 2b + + /* store new state */ +3: str dga, [x2] + str dgb, [x2, #16] + ret +ENDPROC(sha1_ce_transform) diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c new file mode 100644 index 000000000000..6fe83f37a750 --- /dev/null +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -0,0 +1,174 @@ +/* + * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions + * + * Copyright (C) 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); + +asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, + u8 *head, long bytes); + +static int sha1_init(struct shash_desc *desc) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha1_state){ + .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, + }; + return 0; +} + +static int sha1_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; + + sctx->count += len; + + if ((partial + len) >= SHA1_BLOCK_SIZE) { + int blocks; + + if (partial) { + int p = SHA1_BLOCK_SIZE - partial; + + memcpy(sctx->buffer + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA1_BLOCK_SIZE; + len %= SHA1_BLOCK_SIZE; + + kernel_neon_begin_partial(16); + sha1_ce_transform(blocks, data, sctx->state, + partial ? sctx->buffer : NULL, 0); + kernel_neon_end(); + + data += blocks * SHA1_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx->buffer + partial, data, len); + return 0; +} + +static int sha1_final(struct shash_desc *desc, u8 *out) +{ + static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; + + struct sha1_state *sctx = shash_desc_ctx(desc); + __be64 bits = cpu_to_be64(sctx->count << 3); + __be32 *dst = (__be32 *)out; + int i; + + u32 padlen = SHA1_BLOCK_SIZE + - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE); + + sha1_update(desc, padding, padlen); + sha1_update(desc, (const u8 *)&bits, sizeof(bits)); + + for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha1_state){}; + return 0; +} + +static int sha1_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int blocks; + int i; + + if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) { + sha1_update(desc, data, len); + return sha1_final(desc, out); + } + + /* + * Use a fast path if the input is a multiple of 64 bytes. In + * this case, there is no need to copy data around, and we can + * perform the entire digest calculation in a single invocation + * of sha1_ce_transform() + */ + blocks = len / SHA1_BLOCK_SIZE; + + kernel_neon_begin_partial(16); + sha1_ce_transform(blocks, data, sctx->state, NULL, len); + kernel_neon_end(); + + for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha1_state){}; + return 0; +} + +static int sha1_export(struct shash_desc *desc, void *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + struct sha1_state *dst = out; + + *dst = *sctx; + return 0; +} + +static int sha1_import(struct shash_desc *desc, const void *in) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + struct sha1_state const *src = in; + + *sctx = *src; + return 0; +} + +static struct shash_alg alg = { + .init = sha1_init, + .update = sha1_update, + .final = sha1_final, + .finup = sha1_finup, + .export = sha1_export, + .import = sha1_import, + .descsize = sizeof(struct sha1_state), + .digestsize = SHA1_DIGEST_SIZE, + .statesize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-ce", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init sha1_ce_mod_init(void) +{ + return crypto_register_shash(&alg); +} + +static void __exit sha1_ce_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_cpu_feature_match(SHA1, sha1_ce_mod_init); +module_exit(sha1_ce_mod_fini); From e882c7d0126ca2849c3dd4873107d04dc9245ebc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 20 Mar 2014 15:35:40 +0100 Subject: [PATCH 02/30] arm64/crypto: SHA-224/SHA-256 using ARMv8 Crypto Extensions This patch adds support for the SHA-224 and SHA-256 Secure Hash Algorithms for CPUs that have support for the SHA-2 part of the ARM v8 Crypto Extensions. Signed-off-by: Ard Biesheuvel Acked-by: Herbert Xu (cherry picked from commit 6ba6c74dfc6bcf43312ef572592f7d4ebb3aedfa) Signed-off-by: Mark Brown --- arch/arm64/crypto/Kconfig | 5 + arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/sha2-ce-core.S | 156 +++++++++++++++++++ arch/arm64/crypto/sha2-ce-glue.c | 255 +++++++++++++++++++++++++++++++ 4 files changed, 419 insertions(+) create mode 100644 arch/arm64/crypto/sha2-ce-core.S create mode 100644 arch/arm64/crypto/sha2-ce-glue.c diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 7956881b5986..eb1e99770c21 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -13,4 +13,9 @@ config CRYPTO_SHA1_ARM64_CE depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_HASH +config CRYPTO_SHA2_ARM64_CE + tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_HASH + endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 0ed3caaec81b..0b3885a60d43 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -10,3 +10,6 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o + +obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o +sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S new file mode 100644 index 000000000000..7f29fc031ea8 --- /dev/null +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -0,0 +1,156 @@ +/* + * sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions + * + * Copyright (C) 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + + .text + .arch armv8-a+crypto + + dga .req q20 + dgav .req v20 + dgb .req q21 + dgbv .req v21 + + t0 .req v22 + t1 .req v23 + + dg0q .req q24 + dg0v .req v24 + dg1q .req q25 + dg1v .req v25 + dg2q .req q26 + dg2v .req v26 + + .macro add_only, ev, rc, s0 + mov dg2v.16b, dg0v.16b + .ifeq \ev + add t1.4s, v\s0\().4s, \rc\().4s + sha256h dg0q, dg1q, t0.4s + sha256h2 dg1q, dg2q, t0.4s + .else + .ifnb \s0 + add t0.4s, v\s0\().4s, \rc\().4s + .endif + sha256h dg0q, dg1q, t1.4s + sha256h2 dg1q, dg2q, t1.4s + .endif + .endm + + .macro add_update, ev, rc, s0, s1, s2, s3 + sha256su0 v\s0\().4s, v\s1\().4s + add_only \ev, \rc, \s1 + sha256su1 v\s0\().4s, v\s2\().4s, v\s3\().4s + .endm + + /* + * The SHA-256 round constants + */ + .align 4 +.Lsha2_rcon: + .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 + .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 + .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 + .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 + .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc + .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da + .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 + .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 + .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 + .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 + .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 + .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 + .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 + .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 + .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 + .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 + + /* + * void sha2_ce_transform(int blocks, u8 const *src, u32 *state, + * u8 *head, long bytes) + */ +ENTRY(sha2_ce_transform) + /* load round constants */ + adr x8, .Lsha2_rcon + ld1 { v0.4s- v3.4s}, [x8], #64 + ld1 { v4.4s- v7.4s}, [x8], #64 + ld1 { v8.4s-v11.4s}, [x8], #64 + ld1 {v12.4s-v15.4s}, [x8] + + /* load state */ + ldp dga, dgb, [x2] + + /* load partial input (if supplied) */ + cbz x3, 0f + ld1 {v16.4s-v19.4s}, [x3] + b 1f + + /* load input */ +0: ld1 {v16.4s-v19.4s}, [x1], #64 + sub w0, w0, #1 + +1: +CPU_LE( rev32 v16.16b, v16.16b ) +CPU_LE( rev32 v17.16b, v17.16b ) +CPU_LE( rev32 v18.16b, v18.16b ) +CPU_LE( rev32 v19.16b, v19.16b ) + +2: add t0.4s, v16.4s, v0.4s + mov dg0v.16b, dgav.16b + mov dg1v.16b, dgbv.16b + + add_update 0, v1, 16, 17, 18, 19 + add_update 1, v2, 17, 18, 19, 16 + add_update 0, v3, 18, 19, 16, 17 + add_update 1, v4, 19, 16, 17, 18 + + add_update 0, v5, 16, 17, 18, 19 + add_update 1, v6, 17, 18, 19, 16 + add_update 0, v7, 18, 19, 16, 17 + add_update 1, v8, 19, 16, 17, 18 + + add_update 0, v9, 16, 17, 18, 19 + add_update 1, v10, 17, 18, 19, 16 + add_update 0, v11, 18, 19, 16, 17 + add_update 1, v12, 19, 16, 17, 18 + + add_only 0, v13, 17 + add_only 1, v14, 18 + add_only 0, v15, 19 + add_only 1 + + /* update state */ + add dgav.4s, dgav.4s, dg0v.4s + add dgbv.4s, dgbv.4s, dg1v.4s + + /* handled all input blocks? */ + cbnz w0, 0b + + /* + * Final block: add padding and total bit count. + * Skip if we have no total byte count in x4. In that case, the input + * size was not a round multiple of the block size, and the padding is + * handled by the C code. + */ + cbz x4, 3f + movi v17.2d, #0 + mov x8, #0x80000000 + movi v18.2d, #0 + ror x7, x4, #29 // ror(lsl(x4, 3), 32) + fmov d16, x8 + mov x4, #0 + mov v19.d[0], xzr + mov v19.d[1], x7 + b 2b + + /* store new state */ +3: stp dga, dgb, [x2] + ret +ENDPROC(sha2_ce_transform) diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c new file mode 100644 index 000000000000..c294e67d3925 --- /dev/null +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -0,0 +1,255 @@ +/* + * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions + * + * Copyright (C) 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); + +asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state, + u8 *head, long bytes); + +static int sha224_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha256_state){ + .state = { + SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, + SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, + } + }; + return 0; +} + +static int sha256_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha256_state){ + .state = { + SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, + SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, + } + }; + return 0; +} + +static int sha2_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; + + sctx->count += len; + + if ((partial + len) >= SHA256_BLOCK_SIZE) { + int blocks; + + if (partial) { + int p = SHA256_BLOCK_SIZE - partial; + + memcpy(sctx->buf + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA256_BLOCK_SIZE; + len %= SHA256_BLOCK_SIZE; + + kernel_neon_begin_partial(28); + sha2_ce_transform(blocks, data, sctx->state, + partial ? sctx->buf : NULL, 0); + kernel_neon_end(); + + data += blocks * SHA256_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx->buf + partial, data, len); + return 0; +} + +static void sha2_final(struct shash_desc *desc) +{ + static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; + + struct sha256_state *sctx = shash_desc_ctx(desc); + __be64 bits = cpu_to_be64(sctx->count << 3); + u32 padlen = SHA256_BLOCK_SIZE + - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE); + + sha2_update(desc, padding, padlen); + sha2_update(desc, (const u8 *)&bits, sizeof(bits)); +} + +static int sha224_final(struct shash_desc *desc, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int i; + + sha2_final(desc); + + for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha256_state){}; + return 0; +} + +static int sha256_final(struct shash_desc *desc, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int i; + + sha2_final(desc); + + for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha256_state){}; + return 0; +} + +static void sha2_finup(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + int blocks; + + if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) { + sha2_update(desc, data, len); + sha2_final(desc); + return; + } + + /* + * Use a fast path if the input is a multiple of 64 bytes. In + * this case, there is no need to copy data around, and we can + * perform the entire digest calculation in a single invocation + * of sha2_ce_transform() + */ + blocks = len / SHA256_BLOCK_SIZE; + + kernel_neon_begin_partial(28); + sha2_ce_transform(blocks, data, sctx->state, NULL, len); + kernel_neon_end(); + data += blocks * SHA256_BLOCK_SIZE; +} + +static int sha224_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int i; + + sha2_finup(desc, data, len); + + for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha256_state){}; + return 0; +} + +static int sha256_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int i; + + sha2_finup(desc, data, len); + + for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha256_state){}; + return 0; +} + +static int sha2_export(struct shash_desc *desc, void *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state *dst = out; + + *dst = *sctx; + return 0; +} + +static int sha2_import(struct shash_desc *desc, const void *in) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state const *src = in; + + *sctx = *src; + return 0; +} + +static struct shash_alg algs[] = { { + .init = sha224_init, + .update = sha2_update, + .final = sha224_final, + .finup = sha224_finup, + .export = sha2_export, + .import = sha2_import, + .descsize = sizeof(struct sha256_state), + .digestsize = SHA224_DIGEST_SIZE, + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-ce", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .init = sha256_init, + .update = sha2_update, + .final = sha256_final, + .finup = sha256_finup, + .export = sha2_export, + .import = sha2_import, + .descsize = sizeof(struct sha256_state), + .digestsize = SHA256_DIGEST_SIZE, + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-ce", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static int __init sha2_ce_mod_init(void) +{ + return crypto_register_shashes(algs, ARRAY_SIZE(algs)); +} + +static void __exit sha2_ce_mod_fini(void) +{ + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); +} + +module_cpu_feature_match(SHA2, sha2_ce_mod_init); +module_exit(sha2_ce_mod_fini); From 5ead56a336d7bf9aaa73ec3432ea4d0acb3624af Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 26 Mar 2014 20:53:05 +0100 Subject: [PATCH 03/30] arm64/crypto: GHASH secure hash using ARMv8 Crypto Extensions This is a port to ARMv8 (Crypto Extensions) of the Intel implementation of the GHASH Secure Hash (used in the Galois/Counter chaining mode). It relies on the optional PMULL/PMULL2 instruction (polynomial multiply long, what Intel call carry-less multiply). Signed-off-by: Ard Biesheuvel Acked-by: Herbert Xu (cherry picked from commit fdd2389457b209a9723c3be818fcf301f35db906) Signed-off-by: Mark Brown --- arch/arm64/crypto/Kconfig | 6 ++ arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/ghash-ce-core.S | 95 ++++++++++++++++++ arch/arm64/crypto/ghash-ce-glue.c | 155 ++++++++++++++++++++++++++++++ 4 files changed, 259 insertions(+) create mode 100644 arch/arm64/crypto/ghash-ce-core.S create mode 100644 arch/arm64/crypto/ghash-ce-glue.c diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index eb1e99770c21..0c50859ee7b9 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -18,4 +18,10 @@ config CRYPTO_SHA2_ARM64_CE depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_HASH + +config CRYPTO_GHASH_ARM64_CE + tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_HASH + endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 0b3885a60d43..e8c81a068868 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -13,3 +13,6 @@ sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o + +obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o +ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S new file mode 100644 index 000000000000..b9e6eaf41c9b --- /dev/null +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -0,0 +1,95 @@ +/* + * Accelerated GHASH implementation with ARMv8 PMULL instructions. + * + * Copyright (C) 2014 Linaro Ltd. + * + * Based on arch/x86/crypto/ghash-pmullni-intel_asm.S + * + * Copyright (c) 2009 Intel Corp. + * Author: Huang Ying + * Vinodh Gopal + * Erdinc Ozturk + * Deniz Karakoyunlu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include + + DATA .req v0 + SHASH .req v1 + IN1 .req v2 + T1 .req v2 + T2 .req v3 + T3 .req v4 + VZR .req v5 + + .text + .arch armv8-a+crypto + + /* + * void pmull_ghash_update(int blocks, u64 dg[], const char *src, + * struct ghash_key const *k, const char *head) + */ +ENTRY(pmull_ghash_update) + ld1 {DATA.16b}, [x1] + ld1 {SHASH.16b}, [x3] + eor VZR.16b, VZR.16b, VZR.16b + + /* do the head block first, if supplied */ + cbz x4, 0f + ld1 {IN1.2d}, [x4] + b 1f + +0: ld1 {IN1.2d}, [x2], #16 + sub w0, w0, #1 +1: ext IN1.16b, IN1.16b, IN1.16b, #8 +CPU_LE( rev64 IN1.16b, IN1.16b ) + eor DATA.16b, DATA.16b, IN1.16b + + /* multiply DATA by SHASH in GF(2^128) */ + ext T2.16b, DATA.16b, DATA.16b, #8 + ext T3.16b, SHASH.16b, SHASH.16b, #8 + eor T2.16b, T2.16b, DATA.16b + eor T3.16b, T3.16b, SHASH.16b + + pmull2 T1.1q, SHASH.2d, DATA.2d // a1 * b1 + pmull DATA.1q, SHASH.1d, DATA.1d // a0 * b0 + pmull T2.1q, T2.1d, T3.1d // (a1 + a0)(b1 + b0) + eor T2.16b, T2.16b, T1.16b // (a0 * b1) + (a1 * b0) + eor T2.16b, T2.16b, DATA.16b + + ext T3.16b, VZR.16b, T2.16b, #8 + ext T2.16b, T2.16b, VZR.16b, #8 + eor DATA.16b, DATA.16b, T3.16b + eor T1.16b, T1.16b, T2.16b // is result of + // carry-less multiplication + + /* first phase of the reduction */ + shl T3.2d, DATA.2d, #1 + eor T3.16b, T3.16b, DATA.16b + shl T3.2d, T3.2d, #5 + eor T3.16b, T3.16b, DATA.16b + shl T3.2d, T3.2d, #57 + ext T2.16b, VZR.16b, T3.16b, #8 + ext T3.16b, T3.16b, VZR.16b, #8 + eor DATA.16b, DATA.16b, T2.16b + eor T1.16b, T1.16b, T3.16b + + /* second phase of the reduction */ + ushr T2.2d, DATA.2d, #5 + eor T2.16b, T2.16b, DATA.16b + ushr T2.2d, T2.2d, #1 + eor T2.16b, T2.16b, DATA.16b + ushr T2.2d, T2.2d, #1 + eor T1.16b, T1.16b, T2.16b + eor DATA.16b, DATA.16b, T1.16b + + cbnz w0, 0b + + st1 {DATA.16b}, [x1] + ret +ENDPROC(pmull_ghash_update) diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c new file mode 100644 index 000000000000..b92baf3f68c7 --- /dev/null +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -0,0 +1,155 @@ +/* + * Accelerated GHASH implementation with ARMv8 PMULL instructions. + * + * Copyright (C) 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); + +#define GHASH_BLOCK_SIZE 16 +#define GHASH_DIGEST_SIZE 16 + +struct ghash_key { + u64 a; + u64 b; +}; + +struct ghash_desc_ctx { + u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)]; + u8 buf[GHASH_BLOCK_SIZE]; + u32 count; +}; + +asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src, + struct ghash_key const *k, const char *head); + +static int ghash_init(struct shash_desc *desc) +{ + struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); + + *ctx = (struct ghash_desc_ctx){}; + return 0; +} + +static int ghash_update(struct shash_desc *desc, const u8 *src, + unsigned int len) +{ + struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); + unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; + + ctx->count += len; + + if ((partial + len) >= GHASH_BLOCK_SIZE) { + struct ghash_key *key = crypto_shash_ctx(desc->tfm); + int blocks; + + if (partial) { + int p = GHASH_BLOCK_SIZE - partial; + + memcpy(ctx->buf + partial, src, p); + src += p; + len -= p; + } + + blocks = len / GHASH_BLOCK_SIZE; + len %= GHASH_BLOCK_SIZE; + + kernel_neon_begin_partial(6); + pmull_ghash_update(blocks, ctx->digest, src, key, + partial ? ctx->buf : NULL); + kernel_neon_end(); + src += blocks * GHASH_BLOCK_SIZE; + } + if (len) + memcpy(ctx->buf + partial, src, len); + return 0; +} + +static int ghash_final(struct shash_desc *desc, u8 *dst) +{ + struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); + unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; + + if (partial) { + struct ghash_key *key = crypto_shash_ctx(desc->tfm); + + memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial); + + kernel_neon_begin_partial(6); + pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL); + kernel_neon_end(); + } + put_unaligned_be64(ctx->digest[1], dst); + put_unaligned_be64(ctx->digest[0], dst + 8); + + *ctx = (struct ghash_desc_ctx){}; + return 0; +} + +static int ghash_setkey(struct crypto_shash *tfm, + const u8 *inkey, unsigned int keylen) +{ + struct ghash_key *key = crypto_shash_ctx(tfm); + u64 a, b; + + if (keylen != GHASH_BLOCK_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + /* perform multiplication by 'x' in GF(2^128) */ + b = get_unaligned_be64(inkey); + a = get_unaligned_be64(inkey + 8); + + key->a = (a << 1) | (b >> 63); + key->b = (b << 1) | (a >> 63); + + if (b >> 63) + key->b ^= 0xc200000000000000UL; + + return 0; +} + +static struct shash_alg ghash_alg = { + .digestsize = GHASH_DIGEST_SIZE, + .init = ghash_init, + .update = ghash_update, + .final = ghash_final, + .setkey = ghash_setkey, + .descsize = sizeof(struct ghash_desc_ctx), + .base = { + .cra_name = "ghash", + .cra_driver_name = "ghash-ce", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ghash_key), + .cra_module = THIS_MODULE, + }, +}; + +static int __init ghash_ce_mod_init(void) +{ + return crypto_register_shash(&ghash_alg); +} + +static void __exit ghash_ce_mod_exit(void) +{ + crypto_unregister_shash(&ghash_alg); +} + +module_cpu_feature_match(PMULL, ghash_ce_mod_init); +module_exit(ghash_ce_mod_exit); From bd575592673e4d90b23c35cd0903057909afa6fb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 5 Feb 2014 18:13:38 +0100 Subject: [PATCH 04/30] arm64/crypto: AES using ARMv8 Crypto Extensions This patch adds support for the AES symmetric encryption algorithm for CPUs that have support for the AES part of the ARM v8 Crypto Extensions. Signed-off-by: Ard Biesheuvel Acked-by: Herbert Xu (cherry picked from commit 317f2f750d708d684bddd8cb14827ec2efee4b1c) Signed-off-by: Mark Brown --- arch/arm64/crypto/Kconfig | 7 +- arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/aes-ce-cipher.c | 155 ++++++++++++++++++++++++++++++ 3 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/crypto/aes-ce-cipher.c diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 0c50859ee7b9..9ba32c0da871 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -18,10 +18,15 @@ config CRYPTO_SHA2_ARM64_CE depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_HASH - config CRYPTO_GHASH_ARM64_CE tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions" depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_HASH +config CRYPTO_AES_ARM64_CE + tristate "AES core cipher using ARMv8 Crypto Extensions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_AES + endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index e8c81a068868..908abd9242b1 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -16,3 +16,6 @@ sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o + +obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o +CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c new file mode 100644 index 000000000000..2075e1acae6b --- /dev/null +++ b/arch/arm64/crypto/aes-ce-cipher.c @@ -0,0 +1,155 @@ +/* + * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions + * + * Copyright (C) 2013 - 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); + +struct aes_block { + u8 b[AES_BLOCK_SIZE]; +}; + +static int num_rounds(struct crypto_aes_ctx *ctx) +{ + /* + * # of rounds specified by AES: + * 128 bit key 10 rounds + * 192 bit key 12 rounds + * 256 bit key 14 rounds + * => n byte key => 6 + (n/4) rounds + */ + return 6 + ctx->key_length / 4; +} + +static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + struct aes_block *out = (struct aes_block *)dst; + struct aes_block const *in = (struct aes_block *)src; + void *dummy0; + int dummy1; + + kernel_neon_begin_partial(4); + + __asm__(" ld1 {v0.16b}, %[in] ;" + " ld1 {v1.2d}, [%[key]], #16 ;" + " cmp %w[rounds], #10 ;" + " bmi 0f ;" + " bne 3f ;" + " mov v3.16b, v1.16b ;" + " b 2f ;" + "0: mov v2.16b, v1.16b ;" + " ld1 {v3.2d}, [%[key]], #16 ;" + "1: aese v0.16b, v2.16b ;" + " aesmc v0.16b, v0.16b ;" + "2: ld1 {v1.2d}, [%[key]], #16 ;" + " aese v0.16b, v3.16b ;" + " aesmc v0.16b, v0.16b ;" + "3: ld1 {v2.2d}, [%[key]], #16 ;" + " subs %w[rounds], %w[rounds], #3 ;" + " aese v0.16b, v1.16b ;" + " aesmc v0.16b, v0.16b ;" + " ld1 {v3.2d}, [%[key]], #16 ;" + " bpl 1b ;" + " aese v0.16b, v2.16b ;" + " eor v0.16b, v0.16b, v3.16b ;" + " st1 {v0.16b}, %[out] ;" + + : [out] "=Q"(*out), + [key] "=r"(dummy0), + [rounds] "=r"(dummy1) + : [in] "Q"(*in), + "1"(ctx->key_enc), + "2"(num_rounds(ctx) - 2) + : "cc"); + + kernel_neon_end(); +} + +static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + struct aes_block *out = (struct aes_block *)dst; + struct aes_block const *in = (struct aes_block *)src; + void *dummy0; + int dummy1; + + kernel_neon_begin_partial(4); + + __asm__(" ld1 {v0.16b}, %[in] ;" + " ld1 {v1.2d}, [%[key]], #16 ;" + " cmp %w[rounds], #10 ;" + " bmi 0f ;" + " bne 3f ;" + " mov v3.16b, v1.16b ;" + " b 2f ;" + "0: mov v2.16b, v1.16b ;" + " ld1 {v3.2d}, [%[key]], #16 ;" + "1: aesd v0.16b, v2.16b ;" + " aesimc v0.16b, v0.16b ;" + "2: ld1 {v1.2d}, [%[key]], #16 ;" + " aesd v0.16b, v3.16b ;" + " aesimc v0.16b, v0.16b ;" + "3: ld1 {v2.2d}, [%[key]], #16 ;" + " subs %w[rounds], %w[rounds], #3 ;" + " aesd v0.16b, v1.16b ;" + " aesimc v0.16b, v0.16b ;" + " ld1 {v3.2d}, [%[key]], #16 ;" + " bpl 1b ;" + " aesd v0.16b, v2.16b ;" + " eor v0.16b, v0.16b, v3.16b ;" + " st1 {v0.16b}, %[out] ;" + + : [out] "=Q"(*out), + [key] "=r"(dummy0), + [rounds] "=r"(dummy1) + : [in] "Q"(*in), + "1"(ctx->key_dec), + "2"(num_rounds(ctx) - 2) + : "cc"); + + kernel_neon_end(); +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_module = THIS_MODULE, + .cra_cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = crypto_aes_set_key, + .cia_encrypt = aes_cipher_encrypt, + .cia_decrypt = aes_cipher_decrypt + } +}; + +static int __init aes_mod_init(void) +{ + return crypto_register_alg(&aes_alg); +} + +static void __exit aes_mod_exit(void) +{ + crypto_unregister_alg(&aes_alg); +} + +module_cpu_feature_match(AES, aes_mod_init); +module_exit(aes_mod_exit); From df53737750fcbc39a0b0fb7c03f0c86a00ddde68 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 10 Feb 2014 11:26:29 +0100 Subject: [PATCH 05/30] arm64/crypto: AES in CCM mode using ARMv8 Crypto Extensions This patch adds support for the AES-CCM encryption algorithm for CPUs that have support for the AES part of the ARM v8 Crypto Extensions. Signed-off-by: Ard Biesheuvel Acked-by: Herbert Xu (cherry picked from commit a3fd82105b9d149033984bf018f473140f5b94bc) Signed-off-by: Mark Brown --- arch/arm64/crypto/Kconfig | 7 + arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/aes-ce-ccm-core.S | 222 +++++++++++++++++++++ arch/arm64/crypto/aes-ce-ccm-glue.c | 297 ++++++++++++++++++++++++++++ 4 files changed, 529 insertions(+) create mode 100644 arch/arm64/crypto/aes-ce-ccm-core.S create mode 100644 arch/arm64/crypto/aes-ce-ccm-glue.c diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 9ba32c0da871..8fffd5af65ef 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -29,4 +29,11 @@ config CRYPTO_AES_ARM64_CE select CRYPTO_ALGAPI select CRYPTO_AES +config CRYPTO_AES_ARM64_CE_CCM + tristate "AES in CCM mode using ARMv8 Crypto Extensions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_AES + select CRYPTO_AEAD + endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 908abd9242b1..311287d68078 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -19,3 +19,6 @@ ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto + +obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o +aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S new file mode 100644 index 000000000000..432e4841cd81 --- /dev/null +++ b/arch/arm64/crypto/aes-ce-ccm-core.S @@ -0,0 +1,222 @@ +/* + * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions + * + * Copyright (C) 2013 - 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + + .text + .arch armv8-a+crypto + + /* + * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, + * u32 *macp, u8 const rk[], u32 rounds); + */ +ENTRY(ce_aes_ccm_auth_data) + ldr w8, [x3] /* leftover from prev round? */ + ld1 {v0.2d}, [x0] /* load mac */ + cbz w8, 1f + sub w8, w8, #16 + eor v1.16b, v1.16b, v1.16b +0: ldrb w7, [x1], #1 /* get 1 byte of input */ + subs w2, w2, #1 + add w8, w8, #1 + ins v1.b[0], w7 + ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ + beq 8f /* out of input? */ + cbnz w8, 0b + eor v0.16b, v0.16b, v1.16b +1: ld1 {v3.2d}, [x4] /* load first round key */ + prfm pldl1strm, [x1] + cmp w5, #12 /* which key size? */ + add x6, x4, #16 + sub w7, w5, #2 /* modified # of rounds */ + bmi 2f + bne 5f + mov v5.16b, v3.16b + b 4f +2: mov v4.16b, v3.16b + ld1 {v5.2d}, [x6], #16 /* load 2nd round key */ +3: aese v0.16b, v4.16b + aesmc v0.16b, v0.16b +4: ld1 {v3.2d}, [x6], #16 /* load next round key */ + aese v0.16b, v5.16b + aesmc v0.16b, v0.16b +5: ld1 {v4.2d}, [x6], #16 /* load next round key */ + subs w7, w7, #3 + aese v0.16b, v3.16b + aesmc v0.16b, v0.16b + ld1 {v5.2d}, [x6], #16 /* load next round key */ + bpl 3b + aese v0.16b, v4.16b + subs w2, w2, #16 /* last data? */ + eor v0.16b, v0.16b, v5.16b /* final round */ + bmi 6f + ld1 {v1.16b}, [x1], #16 /* load next input block */ + eor v0.16b, v0.16b, v1.16b /* xor with mac */ + bne 1b +6: st1 {v0.2d}, [x0] /* store mac */ + beq 10f + adds w2, w2, #16 + beq 10f + mov w8, w2 +7: ldrb w7, [x1], #1 + umov w6, v0.b[0] + eor w6, w6, w7 + strb w6, [x0], #1 + subs w2, w2, #1 + beq 10f + ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ + b 7b +8: mov w7, w8 + add w8, w8, #16 +9: ext v1.16b, v1.16b, v1.16b, #1 + adds w7, w7, #1 + bne 9b + eor v0.16b, v0.16b, v1.16b + st1 {v0.2d}, [x0] +10: str w8, [x3] + ret +ENDPROC(ce_aes_ccm_auth_data) + + /* + * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[], + * u32 rounds); + */ +ENTRY(ce_aes_ccm_final) + ld1 {v3.2d}, [x2], #16 /* load first round key */ + ld1 {v0.2d}, [x0] /* load mac */ + cmp w3, #12 /* which key size? */ + sub w3, w3, #2 /* modified # of rounds */ + ld1 {v1.2d}, [x1] /* load 1st ctriv */ + bmi 0f + bne 3f + mov v5.16b, v3.16b + b 2f +0: mov v4.16b, v3.16b +1: ld1 {v5.2d}, [x2], #16 /* load next round key */ + aese v0.16b, v4.16b + aese v1.16b, v4.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b +2: ld1 {v3.2d}, [x2], #16 /* load next round key */ + aese v0.16b, v5.16b + aese v1.16b, v5.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b +3: ld1 {v4.2d}, [x2], #16 /* load next round key */ + subs w3, w3, #3 + aese v0.16b, v3.16b + aese v1.16b, v3.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b + bpl 1b + aese v0.16b, v4.16b + aese v1.16b, v4.16b + /* final round key cancels out */ + eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ + st1 {v0.2d}, [x0] /* store result */ + ret +ENDPROC(ce_aes_ccm_final) + + .macro aes_ccm_do_crypt,enc + ldr x8, [x6, #8] /* load lower ctr */ + ld1 {v0.2d}, [x5] /* load mac */ + rev x8, x8 /* keep swabbed ctr in reg */ +0: /* outer loop */ + ld1 {v1.1d}, [x6] /* load upper ctr */ + prfm pldl1strm, [x1] + add x8, x8, #1 + rev x9, x8 + cmp w4, #12 /* which key size? */ + sub w7, w4, #2 /* get modified # of rounds */ + ins v1.d[1], x9 /* no carry in lower ctr */ + ld1 {v3.2d}, [x3] /* load first round key */ + add x10, x3, #16 + bmi 1f + bne 4f + mov v5.16b, v3.16b + b 3f +1: mov v4.16b, v3.16b + ld1 {v5.2d}, [x10], #16 /* load 2nd round key */ +2: /* inner loop: 3 rounds, 2x interleaved */ + aese v0.16b, v4.16b + aese v1.16b, v4.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b +3: ld1 {v3.2d}, [x10], #16 /* load next round key */ + aese v0.16b, v5.16b + aese v1.16b, v5.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b +4: ld1 {v4.2d}, [x10], #16 /* load next round key */ + subs w7, w7, #3 + aese v0.16b, v3.16b + aese v1.16b, v3.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b + ld1 {v5.2d}, [x10], #16 /* load next round key */ + bpl 2b + aese v0.16b, v4.16b + aese v1.16b, v4.16b + subs w2, w2, #16 + bmi 6f /* partial block? */ + ld1 {v2.16b}, [x1], #16 /* load next input block */ + .if \enc == 1 + eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ + eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ + .else + eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */ + eor v1.16b, v2.16b, v5.16b /* final round enc */ + .endif + eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ + st1 {v1.16b}, [x0], #16 /* write output block */ + bne 0b + rev x8, x8 + st1 {v0.2d}, [x5] /* store mac */ + str x8, [x6, #8] /* store lsb end of ctr (BE) */ +5: ret + +6: eor v0.16b, v0.16b, v5.16b /* final round mac */ + eor v1.16b, v1.16b, v5.16b /* final round enc */ + st1 {v0.2d}, [x5] /* store mac */ + add w2, w2, #16 /* process partial tail block */ +7: ldrb w9, [x1], #1 /* get 1 byte of input */ + umov w6, v1.b[0] /* get top crypted ctr byte */ + umov w7, v0.b[0] /* get top mac byte */ + .if \enc == 1 + eor w7, w7, w9 + eor w9, w9, w6 + .else + eor w9, w9, w6 + eor w7, w7, w9 + .endif + strb w9, [x0], #1 /* store out byte */ + strb w7, [x5], #1 /* store mac byte */ + subs w2, w2, #1 + beq 5b + ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ + ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ + b 7b + .endm + + /* + * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, + * u8 const rk[], u32 rounds, u8 mac[], + * u8 ctr[]); + * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, + * u8 const rk[], u32 rounds, u8 mac[], + * u8 ctr[]); + */ +ENTRY(ce_aes_ccm_encrypt) + aes_ccm_do_crypt 1 +ENDPROC(ce_aes_ccm_encrypt) + +ENTRY(ce_aes_ccm_decrypt) + aes_ccm_do_crypt 0 +ENDPROC(ce_aes_ccm_decrypt) diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c new file mode 100644 index 000000000000..9e6cdde9b43d --- /dev/null +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -0,0 +1,297 @@ +/* + * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions + * + * Copyright (C) 2013 - 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +static int num_rounds(struct crypto_aes_ctx *ctx) +{ + /* + * # of rounds specified by AES: + * 128 bit key 10 rounds + * 192 bit key 12 rounds + * 256 bit key 14 rounds + * => n byte key => 6 + (n/4) rounds + */ + return 6 + ctx->key_length / 4; +} + +asmlinkage void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, + u32 *macp, u32 const rk[], u32 rounds); + +asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, + u32 const rk[], u32 rounds, u8 mac[], + u8 ctr[]); + +asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, + u32 const rk[], u32 rounds, u8 mac[], + u8 ctr[]); + +asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[], + u32 rounds); + +static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm); + int ret; + + ret = crypto_aes_expand_key(ctx, in_key, key_len); + if (!ret) + return 0; + + tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; +} + +static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + if ((authsize & 1) || authsize < 4) + return -EINVAL; + return 0; +} + +static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + __be32 *n = (__be32 *)&maciv[AES_BLOCK_SIZE - 8]; + u32 l = req->iv[0] + 1; + + /* verify that CCM dimension 'L' is set correctly in the IV */ + if (l < 2 || l > 8) + return -EINVAL; + + /* verify that msglen can in fact be represented in L bytes */ + if (l < 4 && msglen >> (8 * l)) + return -EOVERFLOW; + + /* + * Even if the CCM spec allows L values of up to 8, the Linux cryptoapi + * uses a u32 type to represent msglen so the top 4 bytes are always 0. + */ + n[0] = 0; + n[1] = cpu_to_be32(msglen); + + memcpy(maciv, req->iv, AES_BLOCK_SIZE - l); + + /* + * Meaning of byte 0 according to CCM spec (RFC 3610/NIST 800-38C) + * - bits 0..2 : max # of bytes required to represent msglen, minus 1 + * (already set by caller) + * - bits 3..5 : size of auth tag (1 => 4 bytes, 2 => 6 bytes, etc) + * - bit 6 : indicates presence of authenticate-only data + */ + maciv[0] |= (crypto_aead_authsize(aead) - 2) << 2; + if (req->assoclen) + maciv[0] |= 0x40; + + memset(&req->iv[AES_BLOCK_SIZE - l], 0, l); + return 0; +} + +static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[]) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead); + struct __packed { __be16 l; __be32 h; u16 len; } ltag; + struct scatter_walk walk; + u32 len = req->assoclen; + u32 macp = 0; + + /* prepend the AAD with a length tag */ + if (len < 0xff00) { + ltag.l = cpu_to_be16(len); + ltag.len = 2; + } else { + ltag.l = cpu_to_be16(0xfffe); + put_unaligned_be32(len, <ag.h); + ltag.len = 6; + } + + ce_aes_ccm_auth_data(mac, (u8 *)<ag, ltag.len, &macp, ctx->key_enc, + num_rounds(ctx)); + scatterwalk_start(&walk, req->assoc); + + do { + u32 n = scatterwalk_clamp(&walk, len); + u8 *p; + + if (!n) { + scatterwalk_start(&walk, sg_next(walk.sg)); + n = scatterwalk_clamp(&walk, len); + } + p = scatterwalk_map(&walk); + ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc, + num_rounds(ctx)); + len -= n; + + scatterwalk_unmap(p); + scatterwalk_advance(&walk, n); + scatterwalk_done(&walk, 0, len); + } while (len); +} + +static int ccm_encrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead); + struct blkcipher_desc desc = { .info = req->iv }; + struct blkcipher_walk walk; + u8 __aligned(8) mac[AES_BLOCK_SIZE]; + u8 buf[AES_BLOCK_SIZE]; + u32 len = req->cryptlen; + int err; + + err = ccm_init_mac(req, mac, len); + if (err) + return err; + + kernel_neon_begin_partial(6); + + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); + + /* preserve the original iv for the final round */ + memcpy(buf, req->iv, AES_BLOCK_SIZE); + + blkcipher_walk_init(&walk, req->dst, req->src, len); + err = blkcipher_aead_walk_virt_block(&desc, &walk, aead, + AES_BLOCK_SIZE); + + while (walk.nbytes) { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; + + if (walk.nbytes == len) + tail = 0; + + ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes - tail, ctx->key_enc, + num_rounds(ctx), mac, walk.iv); + + len -= walk.nbytes - tail; + err = blkcipher_walk_done(&desc, &walk, tail); + } + if (!err) + ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); + + kernel_neon_end(); + + if (err) + return err; + + /* copy authtag to end of dst */ + scatterwalk_map_and_copy(mac, req->dst, req->cryptlen, + crypto_aead_authsize(aead), 1); + + return 0; +} + +static int ccm_decrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead); + unsigned int authsize = crypto_aead_authsize(aead); + struct blkcipher_desc desc = { .info = req->iv }; + struct blkcipher_walk walk; + u8 __aligned(8) mac[AES_BLOCK_SIZE]; + u8 buf[AES_BLOCK_SIZE]; + u32 len = req->cryptlen - authsize; + int err; + + err = ccm_init_mac(req, mac, len); + if (err) + return err; + + kernel_neon_begin_partial(6); + + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); + + /* preserve the original iv for the final round */ + memcpy(buf, req->iv, AES_BLOCK_SIZE); + + blkcipher_walk_init(&walk, req->dst, req->src, len); + err = blkcipher_aead_walk_virt_block(&desc, &walk, aead, + AES_BLOCK_SIZE); + + while (walk.nbytes) { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; + + if (walk.nbytes == len) + tail = 0; + + ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes - tail, ctx->key_enc, + num_rounds(ctx), mac, walk.iv); + + len -= walk.nbytes - tail; + err = blkcipher_walk_done(&desc, &walk, tail); + } + if (!err) + ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); + + kernel_neon_end(); + + if (err) + return err; + + /* compare calculated auth tag with the stored one */ + scatterwalk_map_and_copy(buf, req->src, req->cryptlen - authsize, + authsize, 0); + + if (memcmp(mac, buf, authsize)) + return -EBADMSG; + return 0; +} + +static struct crypto_alg ccm_aes_alg = { + .cra_name = "ccm(aes)", + .cra_driver_name = "ccm-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AEAD, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_aead_type, + .cra_module = THIS_MODULE, + .cra_aead = { + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = AES_BLOCK_SIZE, + .setkey = ccm_setkey, + .setauthsize = ccm_setauthsize, + .encrypt = ccm_encrypt, + .decrypt = ccm_decrypt, + } +}; + +static int __init aes_mod_init(void) +{ + if (!(elf_hwcap & HWCAP_AES)) + return -ENODEV; + return crypto_register_alg(&ccm_aes_alg); +} + +static void __exit aes_mod_exit(void) +{ + crypto_unregister_alg(&ccm_aes_alg); +} + +module_init(aes_mod_init); +module_exit(aes_mod_exit); + +MODULE_DESCRIPTION("Synchronous AES in CCM mode using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("ccm(aes)"); From 824a3dc7fd42b5d70209c4813b752e534e0b9bf7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Sep 2013 09:28:03 +0200 Subject: [PATCH 06/30] arm64: pull in from asm-generic Signed-off-by: Ard Biesheuvel (cherry picked from commit 2ca10f892f2f6eecdfce7fdea694bbafe6340ced) Signed-off-by: Mark Brown --- arch/arm64/include/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 79a642d199f2..fb6e0c4d46a3 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -36,6 +36,7 @@ generic-y += segment.h generic-y += sembuf.h generic-y += serial.h generic-y += shmbuf.h +generic-y += simd.h generic-y += sizes.h generic-y += socket.h generic-y += sockios.h From 75a566ceb15cab04a1bfb47e8bbd66550fe499e0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 21 Mar 2014 10:19:17 +0100 Subject: [PATCH 07/30] arm64/crypto: AES-ECB/CBC/CTR/XTS using ARMv8 NEON and Crypto Extensions This adds ARMv8 implementations of AES in ECB, CBC, CTR and XTS modes, both for ARMv8 with Crypto Extensions and for plain ARMv8 NEON. The Crypto Extensions version can only run on ARMv8 implementations that have support for these optional extensions. The plain NEON version is a table based yet time invariant implementation. All S-box substitutions are performed in parallel, leveraging the wide range of ARMv8's tbl/tbx instructions, and the huge NEON register file, which can comfortably hold the entire S-box and still have room to spare for doing the actual computations. The key expansion routines were borrowed from aes_generic. Signed-off-by: Ard Biesheuvel Acked-by: Herbert Xu (cherry picked from commit 49788fe2a128217f78a21ee4edbe6e92e988f222) Signed-off-by: Mark Brown --- arch/arm64/crypto/Kconfig | 14 + arch/arm64/crypto/Makefile | 14 + arch/arm64/crypto/aes-ce.S | 133 +++++++++ arch/arm64/crypto/aes-glue.c | 446 ++++++++++++++++++++++++++++ arch/arm64/crypto/aes-modes.S | 532 ++++++++++++++++++++++++++++++++++ arch/arm64/crypto/aes-neon.S | 382 ++++++++++++++++++++++++ 6 files changed, 1521 insertions(+) create mode 100644 arch/arm64/crypto/aes-ce.S create mode 100644 arch/arm64/crypto/aes-glue.c create mode 100644 arch/arm64/crypto/aes-modes.S create mode 100644 arch/arm64/crypto/aes-neon.S diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 8fffd5af65ef..5562652c5316 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -36,4 +36,18 @@ config CRYPTO_AES_ARM64_CE_CCM select CRYPTO_AES select CRYPTO_AEAD +config CRYPTO_AES_ARM64_CE_BLK + tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_AES + select CRYPTO_ABLK_HELPER + +config CRYPTO_AES_ARM64_NEON_BLK + tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_AES + select CRYPTO_ABLK_HELPER + endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 311287d68078..2070a56ecc46 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -22,3 +22,17 @@ CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o + +obj-$(CONFIG_CRYPTO_AES_ARM64_CE_BLK) += aes-ce-blk.o +aes-ce-blk-y := aes-glue-ce.o aes-ce.o + +obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o +aes-neon-blk-y := aes-glue-neon.o aes-neon.o + +AFLAGS_aes-ce.o := -DINTERLEAVE=2 -DINTERLEAVE_INLINE +AFLAGS_aes-neon.o := -DINTERLEAVE=4 + +CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS + +$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE + $(call if_changed_dep,cc_o_c) diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S new file mode 100644 index 000000000000..685a18f731eb --- /dev/null +++ b/arch/arm64/crypto/aes-ce.S @@ -0,0 +1,133 @@ +/* + * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with + * Crypto Extensions + * + * Copyright (C) 2013 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +#define AES_ENTRY(func) ENTRY(ce_ ## func) +#define AES_ENDPROC(func) ENDPROC(ce_ ## func) + + .arch armv8-a+crypto + + /* preload all round keys */ + .macro load_round_keys, rounds, rk + cmp \rounds, #12 + blo 2222f /* 128 bits */ + beq 1111f /* 192 bits */ + ld1 {v17.16b-v18.16b}, [\rk], #32 +1111: ld1 {v19.16b-v20.16b}, [\rk], #32 +2222: ld1 {v21.16b-v24.16b}, [\rk], #64 + ld1 {v25.16b-v28.16b}, [\rk], #64 + ld1 {v29.16b-v31.16b}, [\rk] + .endm + + /* prepare for encryption with key in rk[] */ + .macro enc_prepare, rounds, rk, ignore + load_round_keys \rounds, \rk + .endm + + /* prepare for encryption (again) but with new key in rk[] */ + .macro enc_switch_key, rounds, rk, ignore + load_round_keys \rounds, \rk + .endm + + /* prepare for decryption with key in rk[] */ + .macro dec_prepare, rounds, rk, ignore + load_round_keys \rounds, \rk + .endm + + .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3 + aes\de \i0\().16b, \k\().16b + .ifnb \i1 + aes\de \i1\().16b, \k\().16b + .ifnb \i3 + aes\de \i2\().16b, \k\().16b + aes\de \i3\().16b, \k\().16b + .endif + .endif + aes\mc \i0\().16b, \i0\().16b + .ifnb \i1 + aes\mc \i1\().16b, \i1\().16b + .ifnb \i3 + aes\mc \i2\().16b, \i2\().16b + aes\mc \i3\().16b, \i3\().16b + .endif + .endif + .endm + + /* up to 4 interleaved encryption rounds with the same round key */ + .macro round_Nx, enc, k, i0, i1, i2, i3 + .ifc \enc, e + do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3 + .else + do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3 + .endif + .endm + + /* up to 4 interleaved final rounds */ + .macro fin_round_Nx, de, k, k2, i0, i1, i2, i3 + aes\de \i0\().16b, \k\().16b + .ifnb \i1 + aes\de \i1\().16b, \k\().16b + .ifnb \i3 + aes\de \i2\().16b, \k\().16b + aes\de \i3\().16b, \k\().16b + .endif + .endif + eor \i0\().16b, \i0\().16b, \k2\().16b + .ifnb \i1 + eor \i1\().16b, \i1\().16b, \k2\().16b + .ifnb \i3 + eor \i2\().16b, \i2\().16b, \k2\().16b + eor \i3\().16b, \i3\().16b, \k2\().16b + .endif + .endif + .endm + + /* up to 4 interleaved blocks */ + .macro do_block_Nx, enc, rounds, i0, i1, i2, i3 + cmp \rounds, #12 + blo 2222f /* 128 bits */ + beq 1111f /* 192 bits */ + round_Nx \enc, v17, \i0, \i1, \i2, \i3 + round_Nx \enc, v18, \i0, \i1, \i2, \i3 +1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3 + round_Nx \enc, v20, \i0, \i1, \i2, \i3 +2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 + round_Nx \enc, \key, \i0, \i1, \i2, \i3 + .endr + fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3 + .endm + + .macro encrypt_block, in, rounds, t0, t1, t2 + do_block_Nx e, \rounds, \in + .endm + + .macro encrypt_block2x, i0, i1, rounds, t0, t1, t2 + do_block_Nx e, \rounds, \i0, \i1 + .endm + + .macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2 + do_block_Nx e, \rounds, \i0, \i1, \i2, \i3 + .endm + + .macro decrypt_block, in, rounds, t0, t1, t2 + do_block_Nx d, \rounds, \in + .endm + + .macro decrypt_block2x, i0, i1, rounds, t0, t1, t2 + do_block_Nx d, \rounds, \i0, \i1 + .endm + + .macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2 + do_block_Nx d, \rounds, \i0, \i1, \i2, \i3 + .endm + +#include "aes-modes.S" diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c new file mode 100644 index 000000000000..60f2f4c12256 --- /dev/null +++ b/arch/arm64/crypto/aes-glue.c @@ -0,0 +1,446 @@ +/* + * linux/arch/arm64/crypto/aes-glue.c - wrapper code for ARMv8 AES + * + * Copyright (C) 2013 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef USE_V8_CRYPTO_EXTENSIONS +#define MODE "ce" +#define PRIO 300 +#define aes_ecb_encrypt ce_aes_ecb_encrypt +#define aes_ecb_decrypt ce_aes_ecb_decrypt +#define aes_cbc_encrypt ce_aes_cbc_encrypt +#define aes_cbc_decrypt ce_aes_cbc_decrypt +#define aes_ctr_encrypt ce_aes_ctr_encrypt +#define aes_xts_encrypt ce_aes_xts_encrypt +#define aes_xts_decrypt ce_aes_xts_decrypt +MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); +#else +#define MODE "neon" +#define PRIO 200 +#define aes_ecb_encrypt neon_aes_ecb_encrypt +#define aes_ecb_decrypt neon_aes_ecb_decrypt +#define aes_cbc_encrypt neon_aes_cbc_encrypt +#define aes_cbc_decrypt neon_aes_cbc_decrypt +#define aes_ctr_encrypt neon_aes_ctr_encrypt +#define aes_xts_encrypt neon_aes_xts_encrypt +#define aes_xts_decrypt neon_aes_xts_decrypt +MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON"); +MODULE_ALIAS("ecb(aes)"); +MODULE_ALIAS("cbc(aes)"); +MODULE_ALIAS("ctr(aes)"); +MODULE_ALIAS("xts(aes)"); +#endif + +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); + +/* defined in aes-modes.S */ +asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, int first); +asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, int first); + +asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[], int first); +asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[], int first); + +asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 ctr[], int first); + +asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], + int rounds, int blocks, u8 const rk2[], u8 iv[], + int first); +asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], + int rounds, int blocks, u8 const rk2[], u8 iv[], + int first); + +struct crypto_aes_xts_ctx { + struct crypto_aes_ctx key1; + struct crypto_aes_ctx __aligned(8) key2; +}; + +static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + int ret; + + ret = crypto_aes_expand_key(&ctx->key1, in_key, key_len / 2); + if (!ret) + ret = crypto_aes_expand_key(&ctx->key2, &in_key[key_len / 2], + key_len / 2); + if (!ret) + return 0; + + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, rounds, blocks, first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + return err; +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_dec, rounds, blocks, first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + return err; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, rounds, blocks, walk.iv, + first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + return err; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_dec, rounds, blocks, walk.iv, + first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + return err; +} + +static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); + + first = 1; + kernel_neon_begin(); + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, rounds, blocks, walk.iv, + first); + first = 0; + nbytes -= blocks * AES_BLOCK_SIZE; + if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE) + break; + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + if (nbytes) { + u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; + u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + u8 __aligned(8) tail[AES_BLOCK_SIZE]; + + /* + * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need + * to tell aes_ctr_encrypt() to only read half a block. + */ + blocks = (nbytes <= 8) ? -1 : 1; + + aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds, + blocks, walk.iv, first); + memcpy(tdst, tail, nbytes); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + + return err; +} + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key1.key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key1.key_enc, rounds, blocks, + (u8 *)ctx->key2.key_enc, walk.iv, first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + + return err; +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key1.key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key1.key_dec, rounds, blocks, + (u8 *)ctx->key2.key_enc, walk.iv, first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + + return err; +} + +static struct crypto_alg aes_algs[] = { { + .cra_name = "__ecb-aes-" MODE, + .cra_driver_name = "__driver-ecb-aes-" MODE, + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = crypto_aes_set_key, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, +}, { + .cra_name = "__cbc-aes-" MODE, + .cra_driver_name = "__driver-cbc-aes-" MODE, + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = crypto_aes_set_key, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, +}, { + .cra_name = "__ctr-aes-" MODE, + .cra_driver_name = "__driver-ctr-aes-" MODE, + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = crypto_aes_set_key, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, + }, +}, { + .cra_name = "__xts-aes-" MODE, + .cra_driver_name = "__driver-xts-aes-" MODE, + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_set_key, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, +}, { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +} }; + +static int __init aes_init(void) +{ + return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs)); +} + +static void __exit aes_exit(void) +{ + crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs)); +} + +#ifdef USE_V8_CRYPTO_EXTENSIONS +module_cpu_feature_match(AES, aes_init); +#else +module_init(aes_init); +#endif +module_exit(aes_exit); diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S new file mode 100644 index 000000000000..f6e372c528eb --- /dev/null +++ b/arch/arm64/crypto/aes-modes.S @@ -0,0 +1,532 @@ +/* + * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES + * + * Copyright (C) 2013 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* included by aes-ce.S and aes-neon.S */ + + .text + .align 4 + +/* + * There are several ways to instantiate this code: + * - no interleave, all inline + * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2) + * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE) + * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4) + * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE) + * + * Macros imported by this code: + * - enc_prepare - setup NEON registers for encryption + * - dec_prepare - setup NEON registers for decryption + * - enc_switch_key - change to new key after having prepared for encryption + * - encrypt_block - encrypt a single block + * - decrypt block - decrypt a single block + * - encrypt_block2x - encrypt 2 blocks in parallel (if INTERLEAVE == 2) + * - decrypt_block2x - decrypt 2 blocks in parallel (if INTERLEAVE == 2) + * - encrypt_block4x - encrypt 4 blocks in parallel (if INTERLEAVE == 4) + * - decrypt_block4x - decrypt 4 blocks in parallel (if INTERLEAVE == 4) + */ + +#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE) +#define FRAME_PUSH stp x29, x30, [sp,#-16]! ; mov x29, sp +#define FRAME_POP ldp x29, x30, [sp],#16 + +#if INTERLEAVE == 2 + +aes_encrypt_block2x: + encrypt_block2x v0, v1, w3, x2, x6, w7 + ret +ENDPROC(aes_encrypt_block2x) + +aes_decrypt_block2x: + decrypt_block2x v0, v1, w3, x2, x6, w7 + ret +ENDPROC(aes_decrypt_block2x) + +#elif INTERLEAVE == 4 + +aes_encrypt_block4x: + encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + ret +ENDPROC(aes_encrypt_block4x) + +aes_decrypt_block4x: + decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + ret +ENDPROC(aes_decrypt_block4x) + +#else +#error INTERLEAVE should equal 2 or 4 +#endif + + .macro do_encrypt_block2x + bl aes_encrypt_block2x + .endm + + .macro do_decrypt_block2x + bl aes_decrypt_block2x + .endm + + .macro do_encrypt_block4x + bl aes_encrypt_block4x + .endm + + .macro do_decrypt_block4x + bl aes_decrypt_block4x + .endm + +#else +#define FRAME_PUSH +#define FRAME_POP + + .macro do_encrypt_block2x + encrypt_block2x v0, v1, w3, x2, x6, w7 + .endm + + .macro do_decrypt_block2x + decrypt_block2x v0, v1, w3, x2, x6, w7 + .endm + + .macro do_encrypt_block4x + encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + .endm + + .macro do_decrypt_block4x + decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + .endm + +#endif + + /* + * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, int first) + * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, int first) + */ + +AES_ENTRY(aes_ecb_encrypt) + FRAME_PUSH + cbz w5, .LecbencloopNx + + enc_prepare w3, x2, x5 + +.LecbencloopNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lecbenc1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */ + do_encrypt_block2x + st1 {v0.16b-v1.16b}, [x0], #32 +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ + do_encrypt_block4x + st1 {v0.16b-v3.16b}, [x0], #64 +#endif + b .LecbencloopNx +.Lecbenc1x: + adds w4, w4, #INTERLEAVE + beq .Lecbencout +#endif +.Lecbencloop: + ld1 {v0.16b}, [x1], #16 /* get next pt block */ + encrypt_block v0, w3, x2, x5, w6 + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + bne .Lecbencloop +.Lecbencout: + FRAME_POP + ret +AES_ENDPROC(aes_ecb_encrypt) + + +AES_ENTRY(aes_ecb_decrypt) + FRAME_PUSH + cbz w5, .LecbdecloopNx + + dec_prepare w3, x2, x5 + +.LecbdecloopNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lecbdec1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */ + do_decrypt_block2x + st1 {v0.16b-v1.16b}, [x0], #32 +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ + do_decrypt_block4x + st1 {v0.16b-v3.16b}, [x0], #64 +#endif + b .LecbdecloopNx +.Lecbdec1x: + adds w4, w4, #INTERLEAVE + beq .Lecbdecout +#endif +.Lecbdecloop: + ld1 {v0.16b}, [x1], #16 /* get next ct block */ + decrypt_block v0, w3, x2, x5, w6 + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + bne .Lecbdecloop +.Lecbdecout: + FRAME_POP + ret +AES_ENDPROC(aes_ecb_decrypt) + + + /* + * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[], int first) + * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[], int first) + */ + +AES_ENTRY(aes_cbc_encrypt) + cbz w6, .Lcbcencloop + + ld1 {v0.16b}, [x5] /* get iv */ + enc_prepare w3, x2, x5 + +.Lcbcencloop: + ld1 {v1.16b}, [x1], #16 /* get next pt block */ + eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */ + encrypt_block v0, w3, x2, x5, w6 + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + bne .Lcbcencloop + ret +AES_ENDPROC(aes_cbc_encrypt) + + +AES_ENTRY(aes_cbc_decrypt) + FRAME_PUSH + cbz w6, .LcbcdecloopNx + + ld1 {v7.16b}, [x5] /* get iv */ + dec_prepare w3, x2, x5 + +.LcbcdecloopNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lcbcdec1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */ + mov v2.16b, v0.16b + mov v3.16b, v1.16b + do_decrypt_block2x + eor v0.16b, v0.16b, v7.16b + eor v1.16b, v1.16b, v2.16b + mov v7.16b, v3.16b + st1 {v0.16b-v1.16b}, [x0], #32 +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ + mov v4.16b, v0.16b + mov v5.16b, v1.16b + mov v6.16b, v2.16b + do_decrypt_block4x + sub x1, x1, #16 + eor v0.16b, v0.16b, v7.16b + eor v1.16b, v1.16b, v4.16b + ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */ + eor v2.16b, v2.16b, v5.16b + eor v3.16b, v3.16b, v6.16b + st1 {v0.16b-v3.16b}, [x0], #64 +#endif + b .LcbcdecloopNx +.Lcbcdec1x: + adds w4, w4, #INTERLEAVE + beq .Lcbcdecout +#endif +.Lcbcdecloop: + ld1 {v1.16b}, [x1], #16 /* get next ct block */ + mov v0.16b, v1.16b /* ...and copy to v0 */ + decrypt_block v0, w3, x2, x5, w6 + eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */ + mov v7.16b, v1.16b /* ct is next iv */ + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + bne .Lcbcdecloop +.Lcbcdecout: + FRAME_POP + ret +AES_ENDPROC(aes_cbc_decrypt) + + + /* + * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 ctr[], int first) + */ + +AES_ENTRY(aes_ctr_encrypt) + FRAME_PUSH + cbnz w6, .Lctrfirst /* 1st time around? */ + umov x5, v4.d[1] /* keep swabbed ctr in reg */ + rev x5, x5 +#if INTERLEAVE >= 2 + cmn w5, w4 /* 32 bit overflow? */ + bcs .Lctrinc + add x5, x5, #1 /* increment BE ctr */ + b .LctrincNx +#else + b .Lctrinc +#endif +.Lctrfirst: + enc_prepare w3, x2, x6 + ld1 {v4.16b}, [x5] + umov x5, v4.d[1] /* keep swabbed ctr in reg */ + rev x5, x5 +#if INTERLEAVE >= 2 + cmn w5, w4 /* 32 bit overflow? */ + bcs .Lctrloop +.LctrloopNx: + subs w4, w4, #INTERLEAVE + bmi .Lctr1x +#if INTERLEAVE == 2 + mov v0.8b, v4.8b + mov v1.8b, v4.8b + rev x7, x5 + add x5, x5, #1 + ins v0.d[1], x7 + rev x7, x5 + add x5, x5, #1 + ins v1.d[1], x7 + ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */ + do_encrypt_block2x + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v3.16b + st1 {v0.16b-v1.16b}, [x0], #32 +#else + ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ + dup v7.4s, w5 + mov v0.16b, v4.16b + add v7.4s, v7.4s, v8.4s + mov v1.16b, v4.16b + rev32 v8.16b, v7.16b + mov v2.16b, v4.16b + mov v3.16b, v4.16b + mov v1.s[3], v8.s[0] + mov v2.s[3], v8.s[1] + mov v3.s[3], v8.s[2] + ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ + do_encrypt_block4x + eor v0.16b, v5.16b, v0.16b + ld1 {v5.16b}, [x1], #16 /* get 1 input block */ + eor v1.16b, v6.16b, v1.16b + eor v2.16b, v7.16b, v2.16b + eor v3.16b, v5.16b, v3.16b + st1 {v0.16b-v3.16b}, [x0], #64 + add x5, x5, #INTERLEAVE +#endif + cbz w4, .LctroutNx +.LctrincNx: + rev x7, x5 + ins v4.d[1], x7 + b .LctrloopNx +.LctroutNx: + sub x5, x5, #1 + rev x7, x5 + ins v4.d[1], x7 + b .Lctrout +.Lctr1x: + adds w4, w4, #INTERLEAVE + beq .Lctrout +#endif +.Lctrloop: + mov v0.16b, v4.16b + encrypt_block v0, w3, x2, x6, w7 + subs w4, w4, #1 + bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */ + ld1 {v3.16b}, [x1], #16 + eor v3.16b, v0.16b, v3.16b + st1 {v3.16b}, [x0], #16 + beq .Lctrout +.Lctrinc: + adds x5, x5, #1 /* increment BE ctr */ + rev x7, x5 + ins v4.d[1], x7 + bcc .Lctrloop /* no overflow? */ + umov x7, v4.d[0] /* load upper word of ctr */ + rev x7, x7 /* ... to handle the carry */ + add x7, x7, #1 + rev x7, x7 + ins v4.d[0], x7 + b .Lctrloop +.Lctrhalfblock: + ld1 {v3.8b}, [x1] + eor v3.8b, v0.8b, v3.8b + st1 {v3.8b}, [x0] +.Lctrout: + FRAME_POP + ret +AES_ENDPROC(aes_ctr_encrypt) + .ltorg + + + /* + * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, + * int blocks, u8 const rk2[], u8 iv[], int first) + * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, + * int blocks, u8 const rk2[], u8 iv[], int first) + */ + + .macro next_tweak, out, in, const, tmp + sshr \tmp\().2d, \in\().2d, #63 + and \tmp\().16b, \tmp\().16b, \const\().16b + add \out\().2d, \in\().2d, \in\().2d + ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 + eor \out\().16b, \out\().16b, \tmp\().16b + .endm + +.Lxts_mul_x: + .word 1, 0, 0x87, 0 + +AES_ENTRY(aes_xts_encrypt) + FRAME_PUSH + cbz w7, .LxtsencloopNx + + ld1 {v4.16b}, [x6] + enc_prepare w3, x5, x6 + encrypt_block v4, w3, x5, x6, w7 /* first tweak */ + enc_switch_key w3, x2, x6 + ldr q7, .Lxts_mul_x + b .LxtsencNx + +.LxtsencloopNx: + ldr q7, .Lxts_mul_x + next_tweak v4, v4, v7, v8 +.LxtsencNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lxtsenc1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */ + next_tweak v5, v4, v7, v8 + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + do_encrypt_block2x + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + st1 {v0.16b-v1.16b}, [x0], #32 + cbz w4, .LxtsencoutNx + next_tweak v4, v5, v7, v8 + b .LxtsencNx +.LxtsencoutNx: + mov v4.16b, v5.16b + b .Lxtsencout +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ + next_tweak v5, v4, v7, v8 + eor v0.16b, v0.16b, v4.16b + next_tweak v6, v5, v7, v8 + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + next_tweak v7, v6, v7, v8 + eor v3.16b, v3.16b, v7.16b + do_encrypt_block4x + eor v3.16b, v3.16b, v7.16b + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + st1 {v0.16b-v3.16b}, [x0], #64 + mov v4.16b, v7.16b + cbz w4, .Lxtsencout + b .LxtsencloopNx +#endif +.Lxtsenc1x: + adds w4, w4, #INTERLEAVE + beq .Lxtsencout +#endif +.Lxtsencloop: + ld1 {v1.16b}, [x1], #16 + eor v0.16b, v1.16b, v4.16b + encrypt_block v0, w3, x2, x6, w7 + eor v0.16b, v0.16b, v4.16b + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + beq .Lxtsencout + next_tweak v4, v4, v7, v8 + b .Lxtsencloop +.Lxtsencout: + FRAME_POP + ret +AES_ENDPROC(aes_xts_encrypt) + + +AES_ENTRY(aes_xts_decrypt) + FRAME_PUSH + cbz w7, .LxtsdecloopNx + + ld1 {v4.16b}, [x6] + enc_prepare w3, x5, x6 + encrypt_block v4, w3, x5, x6, w7 /* first tweak */ + dec_prepare w3, x2, x6 + ldr q7, .Lxts_mul_x + b .LxtsdecNx + +.LxtsdecloopNx: + ldr q7, .Lxts_mul_x + next_tweak v4, v4, v7, v8 +.LxtsdecNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lxtsdec1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */ + next_tweak v5, v4, v7, v8 + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + do_decrypt_block2x + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + st1 {v0.16b-v1.16b}, [x0], #32 + cbz w4, .LxtsdecoutNx + next_tweak v4, v5, v7, v8 + b .LxtsdecNx +.LxtsdecoutNx: + mov v4.16b, v5.16b + b .Lxtsdecout +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ + next_tweak v5, v4, v7, v8 + eor v0.16b, v0.16b, v4.16b + next_tweak v6, v5, v7, v8 + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + next_tweak v7, v6, v7, v8 + eor v3.16b, v3.16b, v7.16b + do_decrypt_block4x + eor v3.16b, v3.16b, v7.16b + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + st1 {v0.16b-v3.16b}, [x0], #64 + mov v4.16b, v7.16b + cbz w4, .Lxtsdecout + b .LxtsdecloopNx +#endif +.Lxtsdec1x: + adds w4, w4, #INTERLEAVE + beq .Lxtsdecout +#endif +.Lxtsdecloop: + ld1 {v1.16b}, [x1], #16 + eor v0.16b, v1.16b, v4.16b + decrypt_block v0, w3, x2, x6, w7 + eor v0.16b, v0.16b, v4.16b + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + beq .Lxtsdecout + next_tweak v4, v4, v7, v8 + b .Lxtsdecloop +.Lxtsdecout: + FRAME_POP + ret +AES_ENDPROC(aes_xts_decrypt) diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S new file mode 100644 index 000000000000..b93170e1cc93 --- /dev/null +++ b/arch/arm64/crypto/aes-neon.S @@ -0,0 +1,382 @@ +/* + * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON + * + * Copyright (C) 2013 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +#define AES_ENTRY(func) ENTRY(neon_ ## func) +#define AES_ENDPROC(func) ENDPROC(neon_ ## func) + + /* multiply by polynomial 'x' in GF(2^8) */ + .macro mul_by_x, out, in, temp, const + sshr \temp, \in, #7 + add \out, \in, \in + and \temp, \temp, \const + eor \out, \out, \temp + .endm + + /* preload the entire Sbox */ + .macro prepare, sbox, shiftrows, temp + adr \temp, \sbox + movi v12.16b, #0x40 + ldr q13, \shiftrows + movi v14.16b, #0x1b + ld1 {v16.16b-v19.16b}, [\temp], #64 + ld1 {v20.16b-v23.16b}, [\temp], #64 + ld1 {v24.16b-v27.16b}, [\temp], #64 + ld1 {v28.16b-v31.16b}, [\temp] + .endm + + /* do preload for encryption */ + .macro enc_prepare, ignore0, ignore1, temp + prepare .LForward_Sbox, .LForward_ShiftRows, \temp + .endm + + .macro enc_switch_key, ignore0, ignore1, temp + /* do nothing */ + .endm + + /* do preload for decryption */ + .macro dec_prepare, ignore0, ignore1, temp + prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp + .endm + + /* apply SubBytes transformation using the the preloaded Sbox */ + .macro sub_bytes, in + sub v9.16b, \in\().16b, v12.16b + tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b + sub v10.16b, v9.16b, v12.16b + tbx \in\().16b, {v20.16b-v23.16b}, v9.16b + sub v11.16b, v10.16b, v12.16b + tbx \in\().16b, {v24.16b-v27.16b}, v10.16b + tbx \in\().16b, {v28.16b-v31.16b}, v11.16b + .endm + + /* apply MixColumns transformation */ + .macro mix_columns, in + mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b + rev32 v8.8h, \in\().8h + eor \in\().16b, v10.16b, \in\().16b + shl v9.4s, v8.4s, #24 + shl v11.4s, \in\().4s, #24 + sri v9.4s, v8.4s, #8 + sri v11.4s, \in\().4s, #8 + eor v9.16b, v9.16b, v8.16b + eor v10.16b, v10.16b, v9.16b + eor \in\().16b, v10.16b, v11.16b + .endm + + /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ + .macro inv_mix_columns, in + mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b + mul_by_x v11.16b, v11.16b, v10.16b, v14.16b + eor \in\().16b, \in\().16b, v11.16b + rev32 v11.8h, v11.8h + eor \in\().16b, \in\().16b, v11.16b + mix_columns \in + .endm + + .macro do_block, enc, in, rounds, rk, rkp, i + ld1 {v15.16b}, [\rk] + add \rkp, \rk, #16 + mov \i, \rounds +1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ + tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ + sub_bytes \in + ld1 {v15.16b}, [\rkp], #16 + subs \i, \i, #1 + beq 2222f + .if \enc == 1 + mix_columns \in + .else + inv_mix_columns \in + .endif + b 1111b +2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ + .endm + + .macro encrypt_block, in, rounds, rk, rkp, i + do_block 1, \in, \rounds, \rk, \rkp, \i + .endm + + .macro decrypt_block, in, rounds, rk, rkp, i + do_block 0, \in, \rounds, \rk, \rkp, \i + .endm + + /* + * Interleaved versions: functionally equivalent to the + * ones above, but applied to 2 or 4 AES states in parallel. + */ + + .macro sub_bytes_2x, in0, in1 + sub v8.16b, \in0\().16b, v12.16b + sub v9.16b, \in1\().16b, v12.16b + tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b + tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b + sub v10.16b, v8.16b, v12.16b + sub v11.16b, v9.16b, v12.16b + tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b + tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b + sub v8.16b, v10.16b, v12.16b + sub v9.16b, v11.16b, v12.16b + tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b + tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b + tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b + tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b + .endm + + .macro sub_bytes_4x, in0, in1, in2, in3 + sub v8.16b, \in0\().16b, v12.16b + tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b + sub v9.16b, \in1\().16b, v12.16b + tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b + sub v10.16b, \in2\().16b, v12.16b + tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b + sub v11.16b, \in3\().16b, v12.16b + tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b + tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b + tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b + sub v8.16b, v8.16b, v12.16b + tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b + sub v9.16b, v9.16b, v12.16b + tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b + sub v10.16b, v10.16b, v12.16b + tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b + sub v11.16b, v11.16b, v12.16b + tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b + sub v8.16b, v8.16b, v12.16b + tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b + sub v9.16b, v9.16b, v12.16b + tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b + sub v10.16b, v10.16b, v12.16b + tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b + sub v11.16b, v11.16b, v12.16b + tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b + tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b + tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b + .endm + + .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const + sshr \tmp0\().16b, \in0\().16b, #7 + add \out0\().16b, \in0\().16b, \in0\().16b + sshr \tmp1\().16b, \in1\().16b, #7 + and \tmp0\().16b, \tmp0\().16b, \const\().16b + add \out1\().16b, \in1\().16b, \in1\().16b + and \tmp1\().16b, \tmp1\().16b, \const\().16b + eor \out0\().16b, \out0\().16b, \tmp0\().16b + eor \out1\().16b, \out1\().16b, \tmp1\().16b + .endm + + .macro mix_columns_2x, in0, in1 + mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 + rev32 v10.8h, \in0\().8h + rev32 v11.8h, \in1\().8h + eor \in0\().16b, v8.16b, \in0\().16b + eor \in1\().16b, v9.16b, \in1\().16b + shl v12.4s, v10.4s, #24 + shl v13.4s, v11.4s, #24 + eor v8.16b, v8.16b, v10.16b + sri v12.4s, v10.4s, #8 + shl v10.4s, \in0\().4s, #24 + eor v9.16b, v9.16b, v11.16b + sri v13.4s, v11.4s, #8 + shl v11.4s, \in1\().4s, #24 + sri v10.4s, \in0\().4s, #8 + eor \in0\().16b, v8.16b, v12.16b + sri v11.4s, \in1\().4s, #8 + eor \in1\().16b, v9.16b, v13.16b + eor \in0\().16b, v10.16b, \in0\().16b + eor \in1\().16b, v11.16b, \in1\().16b + .endm + + .macro inv_mix_cols_2x, in0, in1 + mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 + mul_by_x_2x v8, v9, v8, v9, v10, v11, v14 + eor \in0\().16b, \in0\().16b, v8.16b + eor \in1\().16b, \in1\().16b, v9.16b + rev32 v8.8h, v8.8h + rev32 v9.8h, v9.8h + eor \in0\().16b, \in0\().16b, v8.16b + eor \in1\().16b, \in1\().16b, v9.16b + mix_columns_2x \in0, \in1 + .endm + + .macro inv_mix_cols_4x, in0, in1, in2, in3 + mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 + mul_by_x_2x v10, v11, \in2, \in3, v12, v13, v14 + mul_by_x_2x v8, v9, v8, v9, v12, v13, v14 + mul_by_x_2x v10, v11, v10, v11, v12, v13, v14 + eor \in0\().16b, \in0\().16b, v8.16b + eor \in1\().16b, \in1\().16b, v9.16b + eor \in2\().16b, \in2\().16b, v10.16b + eor \in3\().16b, \in3\().16b, v11.16b + rev32 v8.8h, v8.8h + rev32 v9.8h, v9.8h + rev32 v10.8h, v10.8h + rev32 v11.8h, v11.8h + eor \in0\().16b, \in0\().16b, v8.16b + eor \in1\().16b, \in1\().16b, v9.16b + eor \in2\().16b, \in2\().16b, v10.16b + eor \in3\().16b, \in3\().16b, v11.16b + mix_columns_2x \in0, \in1 + mix_columns_2x \in2, \in3 + .endm + + .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i + ld1 {v15.16b}, [\rk] + add \rkp, \rk, #16 + mov \i, \rounds +1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ + eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ + sub_bytes_2x \in0, \in1 + tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ + tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ + ld1 {v15.16b}, [\rkp], #16 + subs \i, \i, #1 + beq 2222f + .if \enc == 1 + mix_columns_2x \in0, \in1 + ldr q13, .LForward_ShiftRows + .else + inv_mix_cols_2x \in0, \in1 + ldr q13, .LReverse_ShiftRows + .endif + movi v12.16b, #0x40 + b 1111b +2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ + eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ + .endm + + .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i + ld1 {v15.16b}, [\rk] + add \rkp, \rk, #16 + mov \i, \rounds +1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ + eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ + eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ + eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ + sub_bytes_4x \in0, \in1, \in2, \in3 + tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ + tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ + tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ + tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ + ld1 {v15.16b}, [\rkp], #16 + subs \i, \i, #1 + beq 2222f + .if \enc == 1 + mix_columns_2x \in0, \in1 + mix_columns_2x \in2, \in3 + ldr q13, .LForward_ShiftRows + .else + inv_mix_cols_4x \in0, \in1, \in2, \in3 + ldr q13, .LReverse_ShiftRows + .endif + movi v12.16b, #0x40 + b 1111b +2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ + eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ + eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ + eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ + .endm + + .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i + do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i + .endm + + .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i + do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i + .endm + + .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i + do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i + .endm + + .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i + do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i + .endm + +#include "aes-modes.S" + + .text + .align 4 +.LForward_ShiftRows: + .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 + .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb + +.LReverse_ShiftRows: + .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb + .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 + +.LForward_Sbox: + .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 + .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 + .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 + .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 + .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc + .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 + .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a + .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 + .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 + .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 + .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b + .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf + .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 + .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 + .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 + .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 + .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 + .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 + .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 + .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb + .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c + .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 + .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 + .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 + .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 + .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a + .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e + .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e + .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 + .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf + .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 + .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 + +.LReverse_Sbox: + .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 + .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb + .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 + .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb + .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d + .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e + .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 + .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 + .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 + .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 + .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda + .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 + .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a + .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 + .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 + .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b + .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea + .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 + .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 + .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e + .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 + .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b + .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 + .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 + .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 + .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f + .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d + .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef + .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 + .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 + .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 + .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d From 8f01ecc2d1730f81b4b68b33f61cdce13bb42912 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Thu, 12 Jun 2014 22:30:34 +0530 Subject: [PATCH 08/30] mailbox: rename pl320-ipc specific mailbox.h The patch 30058677 "ARM / highbank: add support for pl320 IPC" added a pl320 IPC specific header file as a generic mailbox.h. This file has been renamed appropriately to allow the introduction of the generic mailbox API framework. Acked-by: Mark Langsdorf Cc: Rafael J. Wysocki Signed-off-by: Suman Anna Signed-off-by: Mark Brown --- arch/arm/mach-highbank/highbank.c | 1 + drivers/cpufreq/highbank-cpufreq.c | 2 +- drivers/mailbox/pl320-ipc.c | 2 +- include/linux/{mailbox.h => pl320-ipc.h} | 0 4 files changed, 3 insertions(+), 2 deletions(-) rename include/linux/{mailbox.h => pl320-ipc.h} (100%) diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c index e7df2dd43a40..eec13a1fbb25 100644 --- a/arch/arm/mach-highbank/highbank.c +++ b/arch/arm/mach-highbank/highbank.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c index b61b5a3fad64..3118b87a37bc 100644 --- a/drivers/cpufreq/highbank-cpufreq.c +++ b/drivers/cpufreq/highbank-cpufreq.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #define HB_CPUFREQ_CHANGE_NOTE 0x80000001 diff --git a/drivers/mailbox/pl320-ipc.c b/drivers/mailbox/pl320-ipc.c index d873cbae2fbb..f3755e0aa935 100644 --- a/drivers/mailbox/pl320-ipc.c +++ b/drivers/mailbox/pl320-ipc.c @@ -26,7 +26,7 @@ #include #include -#include +#include #define IPCMxSOURCE(m) ((m) * 0x40) #define IPCMxDSET(m) (((m) * 0x40) + 0x004) diff --git a/include/linux/mailbox.h b/include/linux/pl320-ipc.h similarity index 100% rename from include/linux/mailbox.h rename to include/linux/pl320-ipc.h From 4270559b4203861bc856f863a8fd2a7536dc3975 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2013 16:59:28 -0700 Subject: [PATCH 09/30] clocksource: arch_timer: Make register accessors less error-prone Using an enum for the register we wish to access allows newer compilers to determine if we've forgotten a case in our switch statement. This allows us to remove the BUILD_BUG() instances in the arm64 port, avoiding problems where optimizations may not happen. To try and force better code generation we're currently marking the accessor functions as inline, but newer compilers can ignore the inline keyword unless it's marked __always_inline. Luckily on arm and arm64 inline is __always_inline, but let's make everything __always_inline to be explicit. Suggested-by: Thomas Gleixner Cc: Thomas Gleixner Cc: Mark Rutland Cc: Marc Zyngier Signed-off-by: Stephen Boyd Signed-off-by: Daniel Lezcano Acked-by: Mark Rutland (cherry picked from commit e09f3cc0184d6b5c3816f921b7ffb67623e5e834) Signed-off-by: Mark Brown --- arch/arm/include/asm/arch_timer.h | 14 ++++++-------- arch/arm64/include/asm/arch_timer.h | 23 +++++++++-------------- drivers/clocksource/arm_arch_timer.c | 6 +++--- include/clocksource/arm_arch_timer.h | 6 ++++-- 4 files changed, 22 insertions(+), 27 deletions(-) diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index 7c1bfc0aea0c..fa2b4f07de5a 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -17,7 +17,8 @@ int arch_timer_arch_init(void); * nicely work out which register we want, and chuck away the rest of * the code. At least it does so with a recent GCC (4.6.3). */ -static inline void arch_timer_reg_write(const int access, const int reg, u32 val) +static __always_inline +void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val) { if (access == ARCH_TIMER_PHYS_ACCESS) { switch (reg) { @@ -28,9 +29,7 @@ static inline void arch_timer_reg_write(const int access, const int reg, u32 val asm volatile("mcr p15, 0, %0, c14, c2, 0" : : "r" (val)); break; } - } - - if (access == ARCH_TIMER_VIRT_ACCESS) { + } else if (access == ARCH_TIMER_VIRT_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: asm volatile("mcr p15, 0, %0, c14, c3, 1" : : "r" (val)); @@ -44,7 +43,8 @@ static inline void arch_timer_reg_write(const int access, const int reg, u32 val isb(); } -static inline u32 arch_timer_reg_read(const int access, const int reg) +static __always_inline +u32 arch_timer_reg_read(int access, enum arch_timer_reg reg) { u32 val = 0; @@ -57,9 +57,7 @@ static inline u32 arch_timer_reg_read(const int access, const int reg) asm volatile("mrc p15, 0, %0, c14, c2, 0" : "=r" (val)); break; } - } - - if (access == ARCH_TIMER_VIRT_ACCESS) { + } else if (access == ARCH_TIMER_VIRT_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: asm volatile("mrc p15, 0, %0, c14, c3, 1" : "=r" (val)); diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index bf6ab242f047..0d74a130386c 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -26,7 +26,13 @@ #include -static inline void arch_timer_reg_write(int access, int reg, u32 val) +/* + * These register accessors are marked inline so the compiler can + * nicely work out which register we want, and chuck away the rest of + * the code. + */ +static __always_inline +void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val) { if (access == ARCH_TIMER_PHYS_ACCESS) { switch (reg) { @@ -36,8 +42,6 @@ static inline void arch_timer_reg_write(int access, int reg, u32 val) case ARCH_TIMER_REG_TVAL: asm volatile("msr cntp_tval_el0, %0" : : "r" (val)); break; - default: - BUILD_BUG(); } } else if (access == ARCH_TIMER_VIRT_ACCESS) { switch (reg) { @@ -47,17 +51,14 @@ static inline void arch_timer_reg_write(int access, int reg, u32 val) case ARCH_TIMER_REG_TVAL: asm volatile("msr cntv_tval_el0, %0" : : "r" (val)); break; - default: - BUILD_BUG(); } - } else { - BUILD_BUG(); } isb(); } -static inline u32 arch_timer_reg_read(int access, int reg) +static __always_inline +u32 arch_timer_reg_read(int access, enum arch_timer_reg reg) { u32 val; @@ -69,8 +70,6 @@ static inline u32 arch_timer_reg_read(int access, int reg) case ARCH_TIMER_REG_TVAL: asm volatile("mrs %0, cntp_tval_el0" : "=r" (val)); break; - default: - BUILD_BUG(); } } else if (access == ARCH_TIMER_VIRT_ACCESS) { switch (reg) { @@ -80,11 +79,7 @@ static inline u32 arch_timer_reg_read(int access, int reg) case ARCH_TIMER_REG_TVAL: asm volatile("mrs %0, cntv_tval_el0" : "=r" (val)); break; - default: - BUILD_BUG(); } - } else { - BUILD_BUG(); } return val; diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index a2b254189782..f72dd518070b 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -43,7 +43,7 @@ static bool arch_timer_use_virtual = true; * Architected system timer support. */ -static inline irqreturn_t timer_handler(const int access, +static __always_inline irqreturn_t timer_handler(const int access, struct clock_event_device *evt) { unsigned long ctrl; @@ -72,7 +72,7 @@ static irqreturn_t arch_timer_handler_phys(int irq, void *dev_id) return timer_handler(ARCH_TIMER_PHYS_ACCESS, evt); } -static inline void timer_set_mode(const int access, int mode) +static __always_inline void timer_set_mode(const int access, int mode) { unsigned long ctrl; switch (mode) { @@ -99,7 +99,7 @@ static void arch_timer_set_mode_phys(enum clock_event_mode mode, timer_set_mode(ARCH_TIMER_PHYS_ACCESS, mode); } -static inline void set_next_event(const int access, unsigned long evt) +static __always_inline void set_next_event(const int access, unsigned long evt) { unsigned long ctrl; ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL); diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index e6c9c4cc9b23..bfa1a0141b6e 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h @@ -23,8 +23,10 @@ #define ARCH_TIMER_CTRL_IT_MASK (1 << 1) #define ARCH_TIMER_CTRL_IT_STAT (1 << 2) -#define ARCH_TIMER_REG_CTRL 0 -#define ARCH_TIMER_REG_TVAL 1 +enum arch_timer_reg { + ARCH_TIMER_REG_CTRL, + ARCH_TIMER_REG_TVAL, +}; #define ARCH_TIMER_PHYS_ACCESS 0 #define ARCH_TIMER_VIRT_ACCESS 1 From 5a5f99e94c10d176066d02239361cd009dc06bf1 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2013 16:59:30 -0700 Subject: [PATCH 10/30] clocksource: arch_timer: Pass clock event to set_mode callback There isn't any reason why we don't pass the event here and we'll need it in the near future for memory mapped arch timers anyway. Cc: Mark Rutland Cc: Marc Zyngier Signed-off-by: Stephen Boyd Signed-off-by: Daniel Lezcano Acked-by: Mark Rutland (cherry picked from commit 1ff99ea65687d921cb71f330491ec4205c00eb9f) Signed-off-by: Mark Brown --- drivers/clocksource/arm_arch_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index f72dd518070b..a91e72826f12 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -140,7 +140,7 @@ static int __cpuinit arch_timer_setup(struct clock_event_device *clk) clk->cpumask = cpumask_of(smp_processor_id()); - clk->set_mode(CLOCK_EVT_MODE_SHUTDOWN, NULL); + clk->set_mode(CLOCK_EVT_MODE_SHUTDOWN, clk); clockevents_config_and_register(clk, arch_timer_rate, 0xf, 0x7fffffff); From e66e53a5b9c08e4b4d078eed74cedb62a70885e7 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2013 16:59:29 -0700 Subject: [PATCH 11/30] Documentation: Add memory mapped ARM architected timer binding Add a binding for the arm architected timer hardware's memory mapped interface. The mmio timer hardware is made up of one base frame and a collection of up to 8 timer frames, where each of the 8 timer frames can have either one or two views. A frame typically maps to a privilege level (user/kernel, hypervisor, secure). The first view has full access to the registers within a frame, while the second view can be restricted to particular registers within a frame. Each frame must support a physical timer. It's optional for a frame to support a virtual timer. Cc: devicetree-discuss@lists.ozlabs.org Cc: Marc Zyngier Cc: Mark Rutland Cc: Rob Herring Signed-off-by: Stephen Boyd Signed-off-by: Daniel Lezcano Acked-by: Mark Rutland (cherry picked from commit d53ef114cf40a043e3cc3fa70dbcdfb268a7e4dc) Signed-off-by: Mark Brown --- .../devicetree/bindings/arm/arch_timer.txt | 59 ++++++++++++++++++- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt index 20746e5abe6f..06fc7602593a 100644 --- a/Documentation/devicetree/bindings/arm/arch_timer.txt +++ b/Documentation/devicetree/bindings/arm/arch_timer.txt @@ -1,10 +1,14 @@ * ARM architected timer -ARM cores may have a per-core architected timer, which provides per-cpu timers. +ARM cores may have a per-core architected timer, which provides per-cpu timers, +or a memory mapped architected timer, which provides up to 8 frames with a +physical and optional virtual timer per frame. -The timer is attached to a GIC to deliver its per-processor interrupts. +The per-core architected timer is attached to a GIC to deliver its +per-processor interrupts via PPIs. The memory mapped timer is attached to a GIC +to deliver its interrupts via SPIs. -** Timer node properties: +** CP15 Timer node properties: - compatible : Should at least contain one of "arm,armv7-timer" @@ -26,3 +30,52 @@ Example: <1 10 0xf08>; clock-frequency = <100000000>; }; + +** Memory mapped timer node properties: + +- compatible : Should at least contain "arm,armv7-timer-mem". + +- clock-frequency : The frequency of the main counter, in Hz. Optional. + +- reg : The control frame base address. + +Note that #address-cells, #size-cells, and ranges shall be present to ensure +the CPU can address a frame's registers. + +A timer node has up to 8 frame sub-nodes, each with the following properties: + +- frame-number: 0 to 7. + +- interrupts : Interrupt list for physical and virtual timers in that order. + The virtual timer interrupt is optional. + +- reg : The first and second view base addresses in that order. The second view + base address is optional. + +- status : "disabled" indicates the frame is not available for use. Optional. + +Example: + + timer@f0000000 { + compatible = "arm,armv7-timer-mem"; + #address-cells = <1>; + #size-cells = <1>; + ranges; + reg = <0xf0000000 0x1000>; + clock-frequency = <50000000>; + + frame@f0001000 { + frame-number = <0> + interrupts = <0 13 0x8>, + <0 14 0x8>; + reg = <0xf0001000 0x1000>, + <0xf0002000 0x1000>; + }; + + frame@f0003000 { + frame-number = <1> + interrupts = <0 15 0x8>; + reg = <0xf0003000 0x1000>; + status = "disabled"; + }; + }; From ff52761adb613e1549bd9471850a6f86a420de49 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2013 16:59:31 -0700 Subject: [PATCH 12/30] clocksource: arch_timer: Push the read/write wrappers deeper We're going to introduce support to read and write the memory mapped timer registers in the next patch, so push the cp15 read/write functions one level deeper. This simplifies the next patch and makes it clearer what's going on. Cc: Mark Rutland Cc: Marc Zyngier Signed-off-by: Stephen Boyd Signed-off-by: Daniel Lezcano Acked-by: Mark Rutland (cherry picked from commit 60faddf6eb3aba16068032bdcf35e18ace4bfb21) Signed-off-by: Mark Brown --- arch/arm/include/asm/arch_timer.h | 4 +-- arch/arm64/include/asm/arch_timer.h | 4 +-- drivers/clocksource/arm_arch_timer.c | 46 +++++++++++++++++++--------- 3 files changed, 35 insertions(+), 19 deletions(-) diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index fa2b4f07de5a..3d81e1b97707 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -18,7 +18,7 @@ int arch_timer_arch_init(void); * the code. At least it does so with a recent GCC (4.6.3). */ static __always_inline -void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val) +void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val) { if (access == ARCH_TIMER_PHYS_ACCESS) { switch (reg) { @@ -44,7 +44,7 @@ void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val) } static __always_inline -u32 arch_timer_reg_read(int access, enum arch_timer_reg reg) +u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg) { u32 val = 0; diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 0d74a130386c..f568a19bef1d 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -32,7 +32,7 @@ * the code. */ static __always_inline -void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val) +void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val) { if (access == ARCH_TIMER_PHYS_ACCESS) { switch (reg) { @@ -58,7 +58,7 @@ void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val) } static __always_inline -u32 arch_timer_reg_read(int access, enum arch_timer_reg reg) +u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg) { u32 val; diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index a91e72826f12..59898b96e457 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -43,14 +43,28 @@ static bool arch_timer_use_virtual = true; * Architected system timer support. */ +static __always_inline +void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val, + struct clock_event_device *clk) +{ + arch_timer_reg_write_cp15(access, reg, val); +} + +static __always_inline +u32 arch_timer_reg_read(int access, enum arch_timer_reg reg, + struct clock_event_device *clk) +{ + return arch_timer_reg_read_cp15(access, reg); +} + static __always_inline irqreturn_t timer_handler(const int access, struct clock_event_device *evt) { unsigned long ctrl; - ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL); + ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, evt); if (ctrl & ARCH_TIMER_CTRL_IT_STAT) { ctrl |= ARCH_TIMER_CTRL_IT_MASK; - arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl); + arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, evt); evt->event_handler(evt); return IRQ_HANDLED; } @@ -72,15 +86,16 @@ static irqreturn_t arch_timer_handler_phys(int irq, void *dev_id) return timer_handler(ARCH_TIMER_PHYS_ACCESS, evt); } -static __always_inline void timer_set_mode(const int access, int mode) +static __always_inline void timer_set_mode(const int access, int mode, + struct clock_event_device *clk) { unsigned long ctrl; switch (mode) { case CLOCK_EVT_MODE_UNUSED: case CLOCK_EVT_MODE_SHUTDOWN: - ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL); + ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk); ctrl &= ~ARCH_TIMER_CTRL_ENABLE; - arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl); + arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); break; default: break; @@ -90,36 +105,37 @@ static __always_inline void timer_set_mode(const int access, int mode) static void arch_timer_set_mode_virt(enum clock_event_mode mode, struct clock_event_device *clk) { - timer_set_mode(ARCH_TIMER_VIRT_ACCESS, mode); + timer_set_mode(ARCH_TIMER_VIRT_ACCESS, mode, clk); } static void arch_timer_set_mode_phys(enum clock_event_mode mode, struct clock_event_device *clk) { - timer_set_mode(ARCH_TIMER_PHYS_ACCESS, mode); + timer_set_mode(ARCH_TIMER_PHYS_ACCESS, mode, clk); } -static __always_inline void set_next_event(const int access, unsigned long evt) +static __always_inline void set_next_event(const int access, unsigned long evt, + struct clock_event_device *clk) { unsigned long ctrl; - ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL); + ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk); ctrl |= ARCH_TIMER_CTRL_ENABLE; ctrl &= ~ARCH_TIMER_CTRL_IT_MASK; - arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt); - arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl); + arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt, clk); + arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); } static int arch_timer_set_next_event_virt(unsigned long evt, - struct clock_event_device *unused) + struct clock_event_device *clk) { - set_next_event(ARCH_TIMER_VIRT_ACCESS, evt); + set_next_event(ARCH_TIMER_VIRT_ACCESS, evt, clk); return 0; } static int arch_timer_set_next_event_phys(unsigned long evt, - struct clock_event_device *unused) + struct clock_event_device *clk) { - set_next_event(ARCH_TIMER_PHYS_ACCESS, evt); + set_next_event(ARCH_TIMER_PHYS_ACCESS, evt, clk); return 0; } From 2b8b608bfca576bbadfa199825b378514696238e Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2013 16:59:32 -0700 Subject: [PATCH 13/30] clocksource: arch_timer: Add support for memory mapped timers Add support for the memory mapped timers by filling in the read/write functions and adding some parsing code. Note that we only register one clocksource, preferring the cp15 based clocksource over the mmio one. To keep things simple we register one global clockevent. This covers the case of UP and SMP systems with only mmio hardware and systems where the memory mapped timers are used as the broadcast timer in low power modes. The DT binding allows for per-CPU memory mapped timers in case we want to support that in the future, but the code isn't added here. We also don't do much for hypervisor support, although it should be possible to support it by searching for at least two frames where one frame has the virtual capability and then updating KVM timers to support it. Cc: Mark Rutland Cc: Marc Zyngier Cc: Rob Herring Signed-off-by: Stephen Boyd Signed-off-by: Daniel Lezcano Acked-by: Mark Rutland (cherry picked from commit 220069945b298d3998c6598b081c466dca259929) Signed-off-by: Mark Brown Conflicts: drivers/clocksource/arm_arch_timer.c --- drivers/clocksource/arm_arch_timer.c | 400 +++++++++++++++++++++++---- include/clocksource/arm_arch_timer.h | 2 + 2 files changed, 347 insertions(+), 55 deletions(-) diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 59898b96e457..6a5e963ff062 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -16,13 +16,39 @@ #include #include #include +#include #include +#include #include #include #include +#define CNTTIDR 0x08 +#define CNTTIDR_VIRT(n) (BIT(1) << ((n) * 4)) + +#define CNTVCT_LO 0x08 +#define CNTVCT_HI 0x0c +#define CNTFRQ 0x10 +#define CNTP_TVAL 0x28 +#define CNTP_CTL 0x2c +#define CNTV_TVAL 0x38 +#define CNTV_CTL 0x3c + +#define ARCH_CP15_TIMER BIT(0) +#define ARCH_MEM_TIMER BIT(1) +static unsigned arch_timers_present __initdata; + +static void __iomem *arch_counter_base; + +struct arch_timer { + void __iomem *base; + struct clock_event_device evt; +}; + +#define to_arch_timer(e) container_of(e, struct arch_timer, evt) + static u32 arch_timer_rate; enum ppi_nr { @@ -38,6 +64,7 @@ static int arch_timer_ppi[MAX_TIMER_PPI]; static struct clock_event_device __percpu *arch_timer_evt; static bool arch_timer_use_virtual = true; +static bool arch_timer_mem_use_virtual; /* * Architected system timer support. @@ -47,14 +74,62 @@ static __always_inline void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val, struct clock_event_device *clk) { - arch_timer_reg_write_cp15(access, reg, val); + if (access == ARCH_TIMER_MEM_PHYS_ACCESS) { + struct arch_timer *timer = to_arch_timer(clk); + switch (reg) { + case ARCH_TIMER_REG_CTRL: + writel_relaxed(val, timer->base + CNTP_CTL); + break; + case ARCH_TIMER_REG_TVAL: + writel_relaxed(val, timer->base + CNTP_TVAL); + break; + } + } else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) { + struct arch_timer *timer = to_arch_timer(clk); + switch (reg) { + case ARCH_TIMER_REG_CTRL: + writel_relaxed(val, timer->base + CNTV_CTL); + break; + case ARCH_TIMER_REG_TVAL: + writel_relaxed(val, timer->base + CNTV_TVAL); + break; + } + } else { + arch_timer_reg_write_cp15(access, reg, val); + } } static __always_inline u32 arch_timer_reg_read(int access, enum arch_timer_reg reg, struct clock_event_device *clk) { - return arch_timer_reg_read_cp15(access, reg); + u32 val; + + if (access == ARCH_TIMER_MEM_PHYS_ACCESS) { + struct arch_timer *timer = to_arch_timer(clk); + switch (reg) { + case ARCH_TIMER_REG_CTRL: + val = readl_relaxed(timer->base + CNTP_CTL); + break; + case ARCH_TIMER_REG_TVAL: + val = readl_relaxed(timer->base + CNTP_TVAL); + break; + } + } else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) { + struct arch_timer *timer = to_arch_timer(clk); + switch (reg) { + case ARCH_TIMER_REG_CTRL: + val = readl_relaxed(timer->base + CNTV_CTL); + break; + case ARCH_TIMER_REG_TVAL: + val = readl_relaxed(timer->base + CNTV_TVAL); + break; + } + } else { + val = arch_timer_reg_read_cp15(access, reg); + } + + return val; } static __always_inline irqreturn_t timer_handler(const int access, @@ -86,6 +161,20 @@ static irqreturn_t arch_timer_handler_phys(int irq, void *dev_id) return timer_handler(ARCH_TIMER_PHYS_ACCESS, evt); } +static irqreturn_t arch_timer_handler_phys_mem(int irq, void *dev_id) +{ + struct clock_event_device *evt = dev_id; + + return timer_handler(ARCH_TIMER_MEM_PHYS_ACCESS, evt); +} + +static irqreturn_t arch_timer_handler_virt_mem(int irq, void *dev_id) +{ + struct clock_event_device *evt = dev_id; + + return timer_handler(ARCH_TIMER_MEM_VIRT_ACCESS, evt); +} + static __always_inline void timer_set_mode(const int access, int mode, struct clock_event_device *clk) { @@ -114,6 +203,18 @@ static void arch_timer_set_mode_phys(enum clock_event_mode mode, timer_set_mode(ARCH_TIMER_PHYS_ACCESS, mode, clk); } +static void arch_timer_set_mode_virt_mem(enum clock_event_mode mode, + struct clock_event_device *clk) +{ + timer_set_mode(ARCH_TIMER_MEM_VIRT_ACCESS, mode, clk); +} + +static void arch_timer_set_mode_phys_mem(enum clock_event_mode mode, + struct clock_event_device *clk) +{ + timer_set_mode(ARCH_TIMER_MEM_PHYS_ACCESS, mode, clk); +} + static __always_inline void set_next_event(const int access, unsigned long evt, struct clock_event_device *clk) { @@ -139,27 +240,62 @@ static int arch_timer_set_next_event_phys(unsigned long evt, return 0; } -static int __cpuinit arch_timer_setup(struct clock_event_device *clk) +static int arch_timer_set_next_event_virt_mem(unsigned long evt, + struct clock_event_device *clk) { - clk->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP; - clk->name = "arch_sys_timer"; - clk->rating = 450; - if (arch_timer_use_virtual) { - clk->irq = arch_timer_ppi[VIRT_PPI]; - clk->set_mode = arch_timer_set_mode_virt; - clk->set_next_event = arch_timer_set_next_event_virt; - } else { - clk->irq = arch_timer_ppi[PHYS_SECURE_PPI]; - clk->set_mode = arch_timer_set_mode_phys; - clk->set_next_event = arch_timer_set_next_event_phys; - } + set_next_event(ARCH_TIMER_MEM_VIRT_ACCESS, evt, clk); + return 0; +} - clk->cpumask = cpumask_of(smp_processor_id()); +static int arch_timer_set_next_event_phys_mem(unsigned long evt, + struct clock_event_device *clk) +{ + set_next_event(ARCH_TIMER_MEM_PHYS_ACCESS, evt, clk); + return 0; +} + +static void __cpuinit __arch_timer_setup(unsigned type, + struct clock_event_device *clk) +{ + clk->features = CLOCK_EVT_FEAT_ONESHOT; + + if (type == ARCH_CP15_TIMER) { + clk->features |= CLOCK_EVT_FEAT_C3STOP; + clk->name = "arch_sys_timer"; + clk->rating = 450; + clk->cpumask = cpumask_of(smp_processor_id()); + if (arch_timer_use_virtual) { + clk->irq = arch_timer_ppi[VIRT_PPI]; + clk->set_mode = arch_timer_set_mode_virt; + clk->set_next_event = arch_timer_set_next_event_virt; + } else { + clk->irq = arch_timer_ppi[PHYS_SECURE_PPI]; + clk->set_mode = arch_timer_set_mode_phys; + clk->set_next_event = arch_timer_set_next_event_phys; + } + } else { + clk->name = "arch_mem_timer"; + clk->rating = 400; + clk->cpumask = cpu_all_mask; + if (arch_timer_mem_use_virtual) { + clk->set_mode = arch_timer_set_mode_virt_mem; + clk->set_next_event = + arch_timer_set_next_event_virt_mem; + } else { + clk->set_mode = arch_timer_set_mode_phys_mem; + clk->set_next_event = + arch_timer_set_next_event_phys_mem; + } + } clk->set_mode(CLOCK_EVT_MODE_SHUTDOWN, clk); - clockevents_config_and_register(clk, arch_timer_rate, - 0xf, 0x7fffffff); + clockevents_config_and_register(clk, arch_timer_rate, 0xf, 0x7fffffff); +} + +static int __cpuinit arch_timer_setup(struct clock_event_device *clk) +{ + __arch_timer_setup(ARCH_CP15_TIMER, clk); if (arch_timer_use_virtual) enable_percpu_irq(arch_timer_ppi[VIRT_PPI], 0); @@ -174,27 +310,41 @@ static int __cpuinit arch_timer_setup(struct clock_event_device *clk) return 0; } -static int arch_timer_available(void) +static void +arch_timer_detect_rate(void __iomem *cntbase, struct device_node *np) { - u32 freq; + /* Who has more than one independent system counter? */ + if (arch_timer_rate) + return; - if (arch_timer_rate == 0) { - freq = arch_timer_get_cntfrq(); - - /* Check the timer frequency. */ - if (freq == 0) { - pr_warn("Architected timer frequency not available\n"); - return -EINVAL; - } - - arch_timer_rate = freq; + /* Try to determine the frequency from the device tree or CNTFRQ */ + if (of_property_read_u32(np, "clock-frequency", &arch_timer_rate)) { + if (cntbase) + arch_timer_rate = readl_relaxed(cntbase + CNTFRQ); + else + arch_timer_rate = arch_timer_get_cntfrq(); } - pr_info_once("Architected local timer running at %lu.%02luMHz (%s).\n", + /* Check the timer frequency. */ + if (arch_timer_rate == 0) + pr_warn("Architected timer frequency not available\n"); +} + +static void arch_timer_banner(unsigned type) +{ + pr_info("Architected %s%s%s timer(s) running at %lu.%02luMHz (%s%s%s).\n", + type & ARCH_CP15_TIMER ? "cp15" : "", + type == (ARCH_CP15_TIMER | ARCH_MEM_TIMER) ? " and " : "", + type & ARCH_MEM_TIMER ? "mmio" : "", (unsigned long)arch_timer_rate / 1000000, (unsigned long)(arch_timer_rate / 10000) % 100, - arch_timer_use_virtual ? "virt" : "phys"); - return 0; + type & ARCH_CP15_TIMER ? + arch_timer_use_virtual ? "virt" : "phys" : + "", + type == (ARCH_CP15_TIMER | ARCH_MEM_TIMER) ? "/" : "", + type & ARCH_MEM_TIMER ? + arch_timer_mem_use_virtual ? "virt" : "phys" : + ""); } u32 arch_timer_get_rate(void) @@ -202,18 +352,26 @@ u32 arch_timer_get_rate(void) return arch_timer_rate; } -/* - * Some external users of arch_timer_read_counter (e.g. sched_clock) may try to - * call it before it has been initialised. Rather than incur a performance - * penalty checking for initialisation, provide a default implementation that - * won't lead to time appearing to jump backwards. - */ -static u64 arch_timer_read_zero(void) +static u64 arch_counter_get_cntvct_mem(void) { - return 0; + u32 vct_lo, vct_hi, tmp_hi; + + do { + vct_hi = readl_relaxed(arch_counter_base + CNTVCT_HI); + vct_lo = readl_relaxed(arch_counter_base + CNTVCT_LO); + tmp_hi = readl_relaxed(arch_counter_base + CNTVCT_HI); + } while (vct_hi != tmp_hi); + + return ((u64) vct_hi << 32) | vct_lo; } -u64 (*arch_timer_read_counter)(void) = arch_timer_read_zero; +/* + * Default to cp15 based access because arm64 uses this function for + * sched_clock() before DT is probed and the cp15 method is guaranteed + * to exist on arm64. arm doesn't use this before DT is probed so even + * if we don't have the cp15 accessors we won't have a problem. + */ +u64 (*arch_timer_read_counter)(void) = arch_counter_get_cntvct; static cycle_t arch_counter_read(struct clocksource *cs) { @@ -245,6 +403,23 @@ struct timecounter *arch_timer_get_timecounter(void) return &timecounter; } +static void __init arch_counter_register(unsigned type) +{ + u64 start_count; + + /* Register the CP15 based counter if we have one */ + if (type & ARCH_CP15_TIMER) + arch_timer_read_counter = arch_counter_get_cntvct; + else + arch_timer_read_counter = arch_counter_get_cntvct_mem; + + start_count = arch_timer_read_counter(); + clocksource_register_hz(&clocksource_counter, arch_timer_rate); + cyclecounter.mult = clocksource_counter.mult; + cyclecounter.shift = clocksource_counter.shift; + timecounter_init(&timecounter, &cyclecounter, start_count); +} + static void __cpuinit arch_timer_stop(struct clock_event_device *clk) { pr_debug("arch_timer_teardown disable IRQ%d cpu #%d\n", @@ -289,10 +464,6 @@ static int __init arch_timer_register(void) int err; int ppi; - err = arch_timer_available(); - if (err) - goto out; - arch_timer_evt = alloc_percpu(struct clock_event_device); if (!arch_timer_evt) { err = -ENOMEM; @@ -355,24 +526,77 @@ static int __init arch_timer_register(void) return err; } +static int __init arch_timer_mem_register(void __iomem *base, unsigned int irq) +{ + int ret; + irq_handler_t func; + struct arch_timer *t; + + t = kzalloc(sizeof(*t), GFP_KERNEL); + if (!t) + return -ENOMEM; + + t->base = base; + t->evt.irq = irq; + __arch_timer_setup(ARCH_MEM_TIMER, &t->evt); + + if (arch_timer_mem_use_virtual) + func = arch_timer_handler_virt_mem; + else + func = arch_timer_handler_phys_mem; + + ret = request_irq(irq, func, IRQF_TIMER, "arch_mem_timer", &t->evt); + if (ret) { + pr_err("arch_timer: Failed to request mem timer irq\n"); + kfree(t); + } + + return ret; +} + +static const struct of_device_id arch_timer_of_match[] __initconst = { + { .compatible = "arm,armv7-timer", }, + { .compatible = "arm,armv8-timer", }, + {}, +}; + +static const struct of_device_id arch_timer_mem_of_match[] __initconst = { + { .compatible = "arm,armv7-timer-mem", }, + {}, +}; + +static void __init arch_timer_common_init(void) +{ + unsigned mask = ARCH_CP15_TIMER | ARCH_MEM_TIMER; + + /* Wait until both nodes are probed if we have two timers */ + if ((arch_timers_present & mask) != mask) { + if (of_find_matching_node(NULL, arch_timer_mem_of_match) && + !(arch_timers_present & ARCH_MEM_TIMER)) + return; + if (of_find_matching_node(NULL, arch_timer_of_match) && + !(arch_timers_present & ARCH_CP15_TIMER)) + return; + } + + arch_timer_banner(arch_timers_present); + arch_counter_register(arch_timers_present); + arch_timer_arch_init(); +} + static void __init arch_timer_init(struct device_node *np) { - u32 freq; int i; - if (arch_timer_get_rate()) { + if (arch_timers_present & ARCH_CP15_TIMER) { pr_warn("arch_timer: multiple nodes in dt, skipping\n"); return; } - /* Try to determine the frequency from the device tree or CNTFRQ */ - if (!of_property_read_u32(np, "clock-frequency", &freq)) - arch_timer_rate = freq; - + arch_timers_present |= ARCH_CP15_TIMER; for (i = PHYS_SECURE_PPI; i < MAX_TIMER_PPI; i++) arch_timer_ppi[i] = irq_of_parse_and_map(np, i); - - of_node_put(np); + arch_timer_detect_rate(NULL, np); /* * If HYP mode is available, we know that the physical timer @@ -398,7 +622,73 @@ static void __init arch_timer_init(struct device_node *np) arch_timer_read_counter = arch_counter_get_cntpct; arch_timer_register(); - arch_timer_arch_init(); + arch_timer_common_init(); } CLOCKSOURCE_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_init); CLOCKSOURCE_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_init); + +static void __init arch_timer_mem_init(struct device_node *np) +{ + struct device_node *frame, *best_frame = NULL; + void __iomem *cntctlbase, *base; + unsigned int irq; + u32 cnttidr; + + arch_timers_present |= ARCH_MEM_TIMER; + cntctlbase = of_iomap(np, 0); + if (!cntctlbase) { + pr_err("arch_timer: Can't find CNTCTLBase\n"); + return; + } + + cnttidr = readl_relaxed(cntctlbase + CNTTIDR); + iounmap(cntctlbase); + + /* + * Try to find a virtual capable frame. Otherwise fall back to a + * physical capable frame. + */ + for_each_available_child_of_node(np, frame) { + int n; + + if (of_property_read_u32(frame, "frame-number", &n)) { + pr_err("arch_timer: Missing frame-number\n"); + of_node_put(best_frame); + of_node_put(frame); + return; + } + + if (cnttidr & CNTTIDR_VIRT(n)) { + of_node_put(best_frame); + best_frame = frame; + arch_timer_mem_use_virtual = true; + break; + } + of_node_put(best_frame); + best_frame = of_node_get(frame); + } + + base = arch_counter_base = of_iomap(best_frame, 0); + if (!base) { + pr_err("arch_timer: Can't map frame's registers\n"); + of_node_put(best_frame); + return; + } + + if (arch_timer_mem_use_virtual) + irq = irq_of_parse_and_map(best_frame, 1); + else + irq = irq_of_parse_and_map(best_frame, 0); + of_node_put(best_frame); + if (!irq) { + pr_err("arch_timer: Frame missing %s irq", + arch_timer_mem_use_virtual ? "virt" : "phys"); + return; + } + + arch_timer_detect_rate(base, np); + arch_timer_mem_register(base, irq); + arch_timer_common_init(); +} +CLOCKSOURCE_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem", + arch_timer_mem_init); diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index bfa1a0141b6e..93b7f96f9c59 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h @@ -30,6 +30,8 @@ enum arch_timer_reg { #define ARCH_TIMER_PHYS_ACCESS 0 #define ARCH_TIMER_VIRT_ACCESS 1 +#define ARCH_TIMER_MEM_PHYS_ACCESS 2 +#define ARCH_TIMER_MEM_VIRT_ACCESS 3 #ifdef CONFIG_ARM_ARCH_TIMER From ede42157ac5f489e459dbac898435016faac1696 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 13 Jun 2014 17:43:12 +0100 Subject: [PATCH 14/30] arm64: Add cpu idle information to the fast model DT Signed-off-by: Mark Brown --- arch/arm64/boot/dts/fvp-base-gicv2-psci.dts | 30 ++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts index a46be6148b3a..ed55571e06dd 100644 --- a/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts +++ b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts @@ -54,7 +54,7 @@ aliases { psci { compatible = "arm,psci"; method = "smc"; - cpu_suspend = <0xc4000001>; + cpu_suspend = <0x84000001>; cpu_off = <0x84000002>; cpu_on = <0xc4000003>; }; @@ -63,12 +63,33 @@ cpus { #address-cells = <2>; #size-cells = <0>; + idle-states { + entry-method = "arm,psci"; + + CPU_SLEEP_0: cpu-sleep-0 { + compatible = "arm,idle-state"; + entry-method-param = <0x0010000>; + entry-latency-us = <40>; + exit-latency-us = <100>; + min-residency-us = <150>; + }; + + CLUSTER_SLEEP_0: cluster-sleep-0 { + compatible = "arm,idle-state"; + entry-method-param = <0x1010000>; + entry-latency-us = <500>; + exit-latency-us = <1000>; + min-residency-us = <2500>; + }; + }; + big0: cpu@0 { device_type = "cpu"; compatible = "arm,cortex-a57", "arm,armv8"; reg = <0x0 0x0>; enable-method = "psci"; clock-frequency = <1000000>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; big1: cpu@1 { device_type = "cpu"; @@ -76,6 +97,7 @@ big1: cpu@1 { reg = <0x0 0x1>; enable-method = "psci"; clock-frequency = <1000000>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; big2: cpu@2 { device_type = "cpu"; @@ -83,6 +105,7 @@ big2: cpu@2 { reg = <0x0 0x2>; enable-method = "psci"; clock-frequency = <1000000>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; big3: cpu@3 { device_type = "cpu"; @@ -90,6 +113,7 @@ big3: cpu@3 { reg = <0x0 0x3>; enable-method = "psci"; clock-frequency = <1000000>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; little0: cpu@100 { device_type = "cpu"; @@ -97,6 +121,7 @@ little0: cpu@100 { reg = <0x0 0x100>; enable-method = "psci"; clock-frequency = <1000000>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; little1: cpu@101 { device_type = "cpu"; @@ -104,6 +129,7 @@ little1: cpu@101 { reg = <0x0 0x101>; enable-method = "psci"; clock-frequency = <1000000>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; little2: cpu@102 { device_type = "cpu"; @@ -111,6 +137,7 @@ little2: cpu@102 { reg = <0x0 0x102>; enable-method = "psci"; clock-frequency = <1000000>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; little3: cpu@103 { device_type = "cpu"; @@ -118,6 +145,7 @@ little3: cpu@103 { reg = <0x0 0x103>; enable-method = "psci"; clock-frequency = <1000000>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; cpu-map { From 804bf578437c1ff33930aa73c8e536532608691a Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Mon, 9 Jun 2014 16:27:10 +0100 Subject: [PATCH 15/30] arm64: Juno: Carve out memory reserved for secure access. Trusted Firmware 0.4 reserves the top 16MB of RAM available in the first 4GB address space for the secure world use. Carve out that memory from the device tree to avoid triggering faults when accessing protected memory. Signed-off-by: Liviu Dudau Signed-off-by: Jon Medhurst Signed-off-by: Mark Brown --- arch/arm64/boot/dts/juno.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/juno.dts b/arch/arm64/boot/dts/juno.dts index 9785a14ca604..f260d702041c 100644 --- a/arch/arm64/boot/dts/juno.dts +++ b/arch/arm64/boot/dts/juno.dts @@ -68,7 +68,7 @@ cpu@1 { memory@80000000 { device_type = "memory"; - reg = <0x00000000 0x80000000 0x0 0x80000000>, + reg = <0x00000000 0x80000000 0x0 0x7f000000>, <0x00000008 0x80000000 0x1 0x80000000>; }; From ca3a638daef5b2b489bd105f3d95e7faf077cea3 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 15 Oct 2013 15:31:51 +0200 Subject: [PATCH 16/30] clocksource: arch_timer: Do not register arch_sys_counter twice Commit: 65cd4f6 ("arch_timer: Move to generic sched_clock framework") added code to register the arch_sys_counter in arch_timer_register(), but it is already registered in arch_counter_register(). This results in the timer being added to the clocksource list twice, therefore causing an infinite loop in the list. Remove the duplicate registration and register the scheduler clock after the original registration instead. This fixes a hang during boot on Tegra114 (Cortex-A15). [ While I've only tested this on Tegra114, I suspect the same hang during boot happens for all processors that use this clock source. ] Signed-off-by: Thierry Reding Acked-by: John Stultz Cc: Stephen Boyd Cc: Will Deacon Cc: Stephen Warren Cc: linux-arm-kernel@lists.infradead.org Cc: Daniel Lezcano Link: http://lkml.kernel.org/r/1381843911-31962-1-git-send-email-treding@nvidia.com Signed-off-by: Ingo Molnar (cherry picked from commit 4a7d3e8a9939cf8073c247286d81cbe0ae48eba2) Signed-off-by: Mark Brown Conflicts: drivers/clocksource/arm_arch_timer.c --- drivers/clocksource/arm_arch_timer.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 6a5e963ff062..89affb5606b5 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -418,6 +418,9 @@ static void __init arch_counter_register(unsigned type) cyclecounter.mult = clocksource_counter.mult; cyclecounter.shift = clocksource_counter.shift; timecounter_init(&timecounter, &cyclecounter, start_count); + + /* 56 bits minimum, so we assume worst case rollover */ + sched_clock_register(arch_timer_read_counter, 56, arch_timer_rate); } static void __cpuinit arch_timer_stop(struct clock_event_device *clk) @@ -470,12 +473,6 @@ static int __init arch_timer_register(void) goto out; } - clocksource_register_hz(&clocksource_counter, arch_timer_rate); - cyclecounter.mult = clocksource_counter.mult; - cyclecounter.shift = clocksource_counter.shift; - timecounter_init(&timecounter, &cyclecounter, - arch_counter_get_cntpct()); - if (arch_timer_use_virtual) { ppi = arch_timer_ppi[VIRT_PPI]; err = request_percpu_irq(ppi, arch_timer_handler_virt, From c4310b09f95a1cd8e1f6eb0378828e47588c68ae Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 13 Jun 2014 21:22:02 +0100 Subject: [PATCH 17/30] configs: Enable mailbox API Signed-off-by: Mark Brown --- linaro/configs/linaro-base.conf | 1 + 1 file changed, 1 insertion(+) diff --git a/linaro/configs/linaro-base.conf b/linaro/configs/linaro-base.conf index 4d2ac0209671..fa38b9e44367 100644 --- a/linaro/configs/linaro-base.conf +++ b/linaro/configs/linaro-base.conf @@ -102,3 +102,4 @@ CONFIG_FUNCTION_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_FUNCTION_PROFILER=y +CONFIG_MAILBOX=y From 096b5c0248a86fff0097a0e62e40ca4d29c30fb2 Mon Sep 17 00:00:00 2001 From: Jassi Brar Date: Thu, 12 Jun 2014 22:31:19 +0530 Subject: [PATCH 18/30] mailbox: Introduce framework for mailbox Introduce common framework for client/protocol drivers and controller drivers of Inter-Processor-Communication (IPC). Client driver developers should have a look at include/linux/mailbox_client.h to understand the part of the API exposed to client drivers. Similarly controller driver developers should have a look at include/linux/mailbox_controller.h Signed-off-by: Jassi Brar Signed-off-by: Mark Brown --- drivers/mailbox/Makefile | 4 + drivers/mailbox/mailbox.c | 487 +++++++++++++++++++++++++++++ include/linux/mailbox_client.h | 45 +++ include/linux/mailbox_controller.h | 121 +++++++ 4 files changed, 657 insertions(+) create mode 100644 drivers/mailbox/mailbox.c create mode 100644 include/linux/mailbox_client.h create mode 100644 include/linux/mailbox_controller.h diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile index 543ad6a79505..fefef7ebcbec 100644 --- a/drivers/mailbox/Makefile +++ b/drivers/mailbox/Makefile @@ -1 +1,5 @@ +# Generic MAILBOX API + +obj-$(CONFIG_MAILBOX) += mailbox.o + obj-$(CONFIG_PL320_MBOX) += pl320-ipc.o diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c new file mode 100644 index 000000000000..9d356ba77edf --- /dev/null +++ b/drivers/mailbox/mailbox.c @@ -0,0 +1,487 @@ +/* + * Mailbox: Common code for Mailbox controllers and users + * + * Copyright (C) 2014 Linaro Ltd. + * Author: Jassi Brar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define TXDONE_BY_IRQ (1 << 0) /* controller has remote RTR irq */ +#define TXDONE_BY_POLL (1 << 1) /* controller can read status of last TX */ +#define TXDONE_BY_ACK (1 << 2) /* S/W ACK recevied by Client ticks the TX */ + +static LIST_HEAD(mbox_cons); +static DEFINE_MUTEX(con_mutex); + +static int _add_to_rbuf(struct mbox_chan *chan, void *mssg) +{ + int idx; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + + /* See if there is any space left */ + if (chan->msg_count == MBOX_TX_QUEUE_LEN) { + spin_unlock_irqrestore(&chan->lock, flags); + return -ENOMEM; + } + + idx = chan->msg_free; + chan->msg_data[idx] = mssg; + chan->msg_count++; + + if (idx == MBOX_TX_QUEUE_LEN - 1) + chan->msg_free = 0; + else + chan->msg_free++; + + spin_unlock_irqrestore(&chan->lock, flags); + + return idx; +} + +static void _msg_submit(struct mbox_chan *chan) +{ + unsigned count, idx; + unsigned long flags; + void *data; + int err; + + spin_lock_irqsave(&chan->lock, flags); + + if (!chan->msg_count || chan->active_req) { + spin_unlock_irqrestore(&chan->lock, flags); + return; + } + + count = chan->msg_count; + idx = chan->msg_free; + if (idx >= count) + idx -= count; + else + idx += MBOX_TX_QUEUE_LEN - count; + + data = chan->msg_data[idx]; + + /* Try to submit a message to the MBOX controller */ + err = chan->mbox->ops->send_data(chan, data); + if (!err) { + chan->active_req = data; + chan->msg_count--; + } + + spin_unlock_irqrestore(&chan->lock, flags); +} + +static void tx_tick(struct mbox_chan *chan, int r) +{ + unsigned long flags; + void *mssg; + + spin_lock_irqsave(&chan->lock, flags); + mssg = chan->active_req; + chan->active_req = NULL; + spin_unlock_irqrestore(&chan->lock, flags); + + /* Submit next message */ + _msg_submit(chan); + + /* Notify the client */ + if (chan->cl->tx_block) + complete(&chan->tx_complete); + else if (mssg && chan->cl->tx_done) + chan->cl->tx_done(chan->cl, mssg, r); +} + +static void poll_txdone(unsigned long data) +{ + struct mbox_controller *mbox = (struct mbox_controller *)data; + bool txdone, resched = false; + int i; + + for (i = 0; i < mbox->num_chans; i++) { + struct mbox_chan *chan = &mbox->chans[i]; + + if (chan->active_req && chan->cl) { + resched = true; + txdone = chan->mbox->ops->last_tx_done(chan); + if (txdone) + tx_tick(chan, 0); + } + } + + if (resched) + mod_timer(&mbox->poll, + jiffies + msecs_to_jiffies(mbox->period)); +} + +/** + * mbox_chan_received_data - A way for controller driver to push data + * received from remote to the upper layer. + * @chan: Pointer to the mailbox channel on which RX happened. + * @data: Client specific message typecasted as void * + * + * After startup and before shutdown any data received on the chan + * is passed on to the API via atomic mbox_chan_received_data(). + * The controller should ACK the RX only after this call returns. + */ +void mbox_chan_received_data(struct mbox_chan *chan, void *mssg) +{ + /* No buffering the received data */ + if (chan->cl->rx_callback) + chan->cl->rx_callback(chan->cl, mssg); +} +EXPORT_SYMBOL_GPL(mbox_chan_received_data); + +/** + * mbox_chan_txdone - A way for controller driver to notify the + * framework that the last TX has completed. + * @chan: Pointer to the mailbox chan on which TX happened. + * @r: Status of last TX - OK or ERROR + * + * The controller that has IRQ for TX ACK calls this atomic API + * to tick the TX state machine. It works only if txdone_irq + * is set by the controller. + */ +void mbox_chan_txdone(struct mbox_chan *chan, int r) +{ + if (unlikely(!(chan->txdone_method & TXDONE_BY_IRQ))) { + pr_err("Controller can't run the TX ticker\n"); + return; + } + + tx_tick(chan, r); +} +EXPORT_SYMBOL_GPL(mbox_chan_txdone); + +/** + * mbox_client_txdone - The way for a client to run the TX state machine. + * @chan: Mailbox channel assigned to this client. + * @r: Success status of last transmission. + * + * The client/protocol had received some 'ACK' packet and it notifies + * the API that the last packet was sent successfully. This only works + * if the controller can't sense TX-Done. + */ +void mbox_client_txdone(struct mbox_chan *chan, int r) +{ + if (unlikely(!(chan->txdone_method & TXDONE_BY_ACK))) { + pr_err("Client can't run the TX ticker\n"); + return; + } + + tx_tick(chan, r); +} +EXPORT_SYMBOL_GPL(mbox_client_txdone); + +/** + * mbox_client_peek_data - A way for client driver to pull data + * received from remote by the controller. + * @chan: Mailbox channel assigned to this client. + * + * A poke to controller driver for any received data. + * The data is actually passed onto client via the + * mbox_chan_received_data() + * The call can be made from atomic context, so the controller's + * implementation of peek_data() must not sleep. + * + * Return: True, if controller has, and is going to push after this, + * some data. + * False, if controller doesn't have any data to be read. + */ +bool mbox_client_peek_data(struct mbox_chan *chan) +{ + if (chan->mbox->ops->peek_data) + return chan->mbox->ops->peek_data(chan); + + return false; +} +EXPORT_SYMBOL_GPL(mbox_client_peek_data); + +/** + * mbox_send_message - For client to submit a message to be + * sent to the remote. + * @chan: Mailbox channel assigned to this client. + * @mssg: Client specific message typecasted. + * + * For client to submit data to the controller destined for a remote + * processor. If the client had set 'tx_block', the call will return + * either when the remote receives the data or when 'tx_tout' millisecs + * run out. + * In non-blocking mode, the requests are buffered by the API and a + * non-negative token is returned for each queued request. If the request + * is not queued, a negative token is returned. Upon failure or successful + * TX, the API calls 'tx_done' from atomic context, from which the client + * could submit yet another request. + * In blocking mode, 'tx_done' is not called, effectively making the + * queue length 1. + * The pointer to message should be preserved until it is sent + * over the chan, i.e, tx_done() is made. + * This function could be called from atomic context as it simply + * queues the data and returns a token against the request. + * + * Return: Non-negative integer for successful submission (non-blocking mode) + * or transmission over chan (blocking mode). + * Negative value denotes failure. + */ +int mbox_send_message(struct mbox_chan *chan, void *mssg) +{ + int t; + + if (!chan || !chan->cl) + return -EINVAL; + + t = _add_to_rbuf(chan, mssg); + if (t < 0) { + pr_err("Try increasing MBOX_TX_QUEUE_LEN\n"); + return t; + } + + _msg_submit(chan); + + init_completion(&chan->tx_complete); + + if (chan->txdone_method == TXDONE_BY_POLL) + poll_txdone((unsigned long)chan->mbox); + + if (chan->cl->tx_block && chan->active_req) { + unsigned long wait; + int ret; + + if (!chan->cl->tx_tout) /* wait for ever */ + wait = msecs_to_jiffies(3600000); + else + wait = msecs_to_jiffies(chan->cl->tx_tout); + + ret = wait_for_completion_timeout(&chan->tx_complete, wait); + if (ret == 0) { + t = -EIO; + tx_tick(chan, -EIO); + } + } + + return t; +} +EXPORT_SYMBOL_GPL(mbox_send_message); + +/** + * mbox_request_channel - Request a mailbox channel. + * @cl: Identity of the client requesting the channel. + * + * The Client specifies its requirements and capabilities while asking for + * a mailbox channel. It can't be called from atomic context. + * The channel is exclusively allocated and can't be used by another + * client before the owner calls mbox_free_channel. + * After assignment, any packet received on this channel will be + * handed over to the client via the 'rx_callback'. + * The framework holds reference to the client, so the mbox_client + * structure shouldn't be modified until the mbox_free_channel returns. + * + * Return: Pointer to the channel assigned to the client if successful. + * ERR_PTR for request failure. + */ +struct mbox_chan *mbox_request_channel(const struct mbox_client *cl) +{ + struct device *dev = cl->dev; + struct mbox_controller *mbox; + struct of_phandle_args spec; + struct mbox_chan *chan; + unsigned long flags; + int count, i, ret; + + if (!dev || !dev->of_node) { + pr_err("%s: No owner device node\n", __func__); + return ERR_PTR(-ENODEV); + } + + count = of_property_count_strings(dev->of_node, "mbox-names"); + if (count < 0) { + pr_err("%s: mbox-names property of node '%s' missing\n", + __func__, dev->of_node->full_name); + return ERR_PTR(-ENODEV); + } + + mutex_lock(&con_mutex); + + ret = -ENODEV; + for (i = 0; i < count; i++) { + const char *s; + + if (of_property_read_string_index(dev->of_node, + "mbox-names", i, &s)) + continue; + + if (strcmp(cl->chan_name, s)) + continue; + + if (of_parse_phandle_with_args(dev->of_node, + "mbox", "#mbox-cells", i, &spec)) + continue; + + chan = NULL; + list_for_each_entry(mbox, &mbox_cons, node) + if (mbox->dev->of_node == spec.np) { + chan = mbox->of_xlate(mbox, &spec); + break; + } + + of_node_put(spec.np); + + if (!chan) + continue; + + ret = -EBUSY; + if (!chan->cl && try_module_get(mbox->dev->driver->owner)) + break; + } + + if (i == count) { + mutex_unlock(&con_mutex); + return ERR_PTR(ret); + } + + spin_lock_irqsave(&chan->lock, flags); + chan->msg_free = 0; + chan->msg_count = 0; + chan->active_req = NULL; + chan->cl = cl; + init_completion(&chan->tx_complete); + + if (chan->txdone_method == TXDONE_BY_POLL + && cl->knows_txdone) + chan->txdone_method |= TXDONE_BY_ACK; + spin_unlock_irqrestore(&chan->lock, flags); + + ret = chan->mbox->ops->startup(chan); + if (ret) { + pr_err("Unable to startup the chan (%d)\n", ret); + mbox_free_channel(chan); + chan = ERR_PTR(ret); + } + + mutex_unlock(&con_mutex); + return chan; +} +EXPORT_SYMBOL_GPL(mbox_request_channel); + +/** + * mbox_free_channel - The client relinquishes control of a mailbox + * channel by this call. + * @chan: The mailbox channel to be freed. + */ +void mbox_free_channel(struct mbox_chan *chan) +{ + unsigned long flags; + + if (!chan || !chan->cl) + return; + + chan->mbox->ops->shutdown(chan); + + /* The queued TX requests are simply aborted, no callbacks are made */ + spin_lock_irqsave(&chan->lock, flags); + chan->cl = NULL; + chan->active_req = NULL; + if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK)) + chan->txdone_method = TXDONE_BY_POLL; + + module_put(chan->mbox->dev->driver->owner); + spin_unlock_irqrestore(&chan->lock, flags); +} +EXPORT_SYMBOL_GPL(mbox_free_channel); + +static struct mbox_chan * +of_mbox_index_xlate(struct mbox_controller *mbox, + const struct of_phandle_args *sp) +{ + int ind = sp->args[0]; + + if (ind >= mbox->num_chans) + return NULL; + + return &mbox->chans[ind]; +} + +/** + * mbox_controller_register - Register the mailbox controller + * @mbox: Pointer to the mailbox controller. + * + * The controller driver registers its communication chans + */ +int mbox_controller_register(struct mbox_controller *mbox) +{ + int i, txdone; + + /* Sanity check */ + if (!mbox || !mbox->dev || !mbox->ops || !mbox->num_chans) + return -EINVAL; + + if (mbox->txdone_irq) + txdone = TXDONE_BY_IRQ; + else if (mbox->txdone_poll) + txdone = TXDONE_BY_POLL; + else /* It has to be ACK then */ + txdone = TXDONE_BY_ACK; + + if (txdone == TXDONE_BY_POLL) { + mbox->poll.function = &poll_txdone; + mbox->poll.data = (unsigned long)mbox; + init_timer(&mbox->poll); + } + + for (i = 0; i < mbox->num_chans; i++) { + struct mbox_chan *chan = &mbox->chans[i]; + chan->cl = NULL; + chan->mbox = mbox; + chan->txdone_method = txdone; + spin_lock_init(&chan->lock); + } + + if (!mbox->of_xlate) + mbox->of_xlate = of_mbox_index_xlate; + + mutex_lock(&con_mutex); + list_add_tail(&mbox->node, &mbox_cons); + mutex_unlock(&con_mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(mbox_controller_register); + +/** + * mbox_controller_unregister - UnRegister the mailbox controller + * @mbox: Pointer to the mailbox controller. + */ +void mbox_controller_unregister(struct mbox_controller *mbox) +{ + int i; + + if (!mbox) + return; + + mutex_lock(&con_mutex); + + list_del(&mbox->node); + + for (i = 0; i < mbox->num_chans; i++) + mbox_free_channel(&mbox->chans[i]); + + del_timer_sync(&mbox->poll); + + mutex_unlock(&con_mutex); +} +EXPORT_SYMBOL_GPL(mbox_controller_unregister); diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h new file mode 100644 index 000000000000..53eb0784261b --- /dev/null +++ b/include/linux/mailbox_client.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2014 Linaro Ltd. + * Author: Jassi Brar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __MAILBOX_CLIENT_H +#define __MAILBOX_CLIENT_H + +#include + +struct mbox_chan; + +/** + * struct mbox_client - User of a mailbox + * @dev: The client device + * @chan_name: The "controller:channel" this client wants + * @rx_callback: Atomic callback to provide client the data received + * @tx_done: Atomic callback to tell client of data transmission + * @tx_block: If the mbox_send_message should block until data is + * transmitted. + * @tx_tout: Max block period in ms before TX is assumed failure + * @knows_txdone: if the client could run the TX state machine. Usually + * if the client receives some ACK packet for transmission. + * Unused if the controller already has TX_Done/RTR IRQ. + */ +struct mbox_client { + struct device *dev; + const char *chan_name; + void (*rx_callback)(struct mbox_client *cl, void *mssg); + void (*tx_done)(struct mbox_client *cl, void *mssg, int r); + bool tx_block; + unsigned long tx_tout; + bool knows_txdone; +}; + +struct mbox_chan *mbox_request_channel(const struct mbox_client *cl); +int mbox_send_message(struct mbox_chan *chan, void *mssg); +void mbox_client_txdone(struct mbox_chan *chan, int r); +void mbox_free_channel(struct mbox_chan *chan); + +#endif /* __MAILBOX_CLIENT_H */ diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h new file mode 100644 index 000000000000..5d1915b9af60 --- /dev/null +++ b/include/linux/mailbox_controller.h @@ -0,0 +1,121 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __MAILBOX_CONTROLLER_H +#define __MAILBOX_CONTROLLER_H + +#include + +struct mbox_chan; + +/** + * struct mbox_chan_ops - s/w representation of a communication chan + * @send_data: The API asks the MBOX controller driver, in atomic + * context try to transmit a message on the bus. Returns 0 if + * data is accepted for transmission, -EBUSY while rejecting + * if the remote hasn't yet read the last data sent. Actual + * transmission of data is reported by the controller via + * mbox_chan_txdone (if it has some TX ACK irq). It must not + * block. + * @startup: Called when a client requests the chan. The controller + * could ask clients for additional parameters of communication + * to be provided via client's chan_data. This call may + * block. After this call the Controller must forward any + * data received on the chan by calling mbox_chan_received_data. + * @shutdown: Called when a client relinquishes control of a chan. + * This call may block too. The controller must not forwared + * any received data anymore. + * @last_tx_done: If the controller sets 'txdone_poll', the API calls + * this to poll status of last TX. The controller must + * give priority to IRQ method over polling and never + * set both txdone_poll and txdone_irq. Only in polling + * mode 'send_data' is expected to return -EBUSY. + * Used only if txdone_poll:=true && txdone_irq:=false + * @peek_data: Atomic check for any received data. Return true if controller + * has some data to push to the client. False otherwise. + */ +struct mbox_chan_ops { + int (*send_data)(struct mbox_chan *chan, void *data); + int (*startup)(struct mbox_chan *chan); + void (*shutdown)(struct mbox_chan *chan); + bool (*last_tx_done)(struct mbox_chan *chan); + bool (*peek_data)(struct mbox_chan *chan); +}; + +/** + * struct mbox_controller - Controller of a class of communication chans + * @dev: Device backing this controller + * @controller_name: Literal name of the controller. + * @ops: Operators that work on each communication chan + * @chans: Null terminated array of chans. + * @txdone_irq: Indicates if the controller can report to API when + * the last transmitted data was read by the remote. + * Eg, if it has some TX ACK irq. + * @txdone_poll: If the controller can read but not report the TX + * done. Ex, some register shows the TX status but + * no interrupt rises. Ignored if 'txdone_irq' is set. + * @txpoll_period: If 'txdone_poll' is in effect, the API polls for + * last TX's status after these many millisecs + */ +struct mbox_controller { + struct device *dev; + struct mbox_chan_ops *ops; + struct mbox_chan *chans; + int num_chans; + bool txdone_irq; + bool txdone_poll; + unsigned txpoll_period; + struct mbox_chan *(*of_xlate)(struct mbox_controller *mbox, + const struct of_phandle_args *sp); + /* + * If the controller supports only TXDONE_BY_POLL, + * this timer polls all the links for txdone. + */ + struct timer_list poll; + unsigned period; + /* Hook to add to the global controller list */ + struct list_head node; +}; + +/* + * The length of circular buffer for queuing messages from a client. + * 'msg_count' tracks the number of buffered messages while 'msg_free' + * is the index where the next message would be buffered. + * We shouldn't need it too big because every transferr is interrupt + * triggered and if we have lots of data to transfer, the interrupt + * latencies are going to be the bottleneck, not the buffer length. + * Besides, mbox_send_message could be called from atomic context and + * the client could also queue another message from the notifier 'tx_done' + * of the last transfer done. + * REVIST: If too many platforms see the "Try increasing MBOX_TX_QUEUE_LEN" + * print, it needs to be taken from config option or somesuch. + */ +#define MBOX_TX_QUEUE_LEN 20 + +struct mbox_chan { + struct mbox_controller *mbox; /* Parent Controller */ + unsigned txdone_method; + + /* client */ + struct mbox_client *cl; + struct completion tx_complete; + + void *active_req; + unsigned msg_count, msg_free; + void *msg_data[MBOX_TX_QUEUE_LEN]; + /* Access to the channel */ + spinlock_t lock; + + /* Private data for controller */ + void *con_priv; +}; + +int mbox_controller_register(struct mbox_controller *mbox); +void mbox_chan_received_data(struct mbox_chan *chan, void *data); +void mbox_chan_txdone(struct mbox_chan *chan, int r); +void mbox_controller_unregister(struct mbox_controller *mbox); + +#endif /* __MAILBOX_CONTROLLER_H */ From cc2e5bd705fe3beabb50180f5b1e3db005e3e569 Mon Sep 17 00:00:00 2001 From: Jassi Brar Date: Thu, 12 Jun 2014 22:31:54 +0530 Subject: [PATCH 19/30] Mailbox: Generic: Specify mailbox api bindings Due to the platform specific nature of remote's firmware, we can't do much to facilitate shareable client drivers. So the Mailbox api is more like a bunch of useful functions put together. The only feature that can be abstracted out is mailbox channel assignment to client drivers, which this binding specifies. Signed-off-by: Jassi Brar Signed-off-by: Mark Brown --- .../devicetree/bindings/mailbox/mailbox.txt | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 Documentation/devicetree/bindings/mailbox/mailbox.txt diff --git a/Documentation/devicetree/bindings/mailbox/mailbox.txt b/Documentation/devicetree/bindings/mailbox/mailbox.txt new file mode 100644 index 000000000000..3f009555f392 --- /dev/null +++ b/Documentation/devicetree/bindings/mailbox/mailbox.txt @@ -0,0 +1,33 @@ +* Generic Mailbox Controller and client driver bindings + +Generic binding to provide a way for Mailbox controller drivers to +assign appropriate mailbox channel to client drivers. + +* Mailbox Controller + +Required property: +- #mbox-cells: Must be at least 1. Number of cells in a mailbox + specifier. + +Example: + mailbox: mailbox { + ... + #mbox-cells = <1>; + }; + + +* Mailbox Client + +Required property: +- mbox: List of phandle and mailbox channel specifier. + +- mbox-names: List of identifier strings for each mailbox channel + required by the client. + +Example: + pwr_cntrl: power { + ... + mbox-names = "pwr-ctrl", "rpc"; + mbox = <&mailbox 0 + &mailbox 1>; + }; From 7e8a356e6c2d32cf8b0111be5a83089201c75c83 Mon Sep 17 00:00:00 2001 From: LeyFoon Tan Date: Thu, 12 Jun 2014 22:32:22 +0530 Subject: [PATCH 20/30] mailbox: Fix deleteing poll timer Try to delete the timer only if it was init/used. Signed-off-by: LeyFoon Tan Signed-off-by: Jassi Brar Signed-off-by: Mark Brown --- drivers/mailbox/mailbox.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index 9d356ba77edf..e1fc6aaeb87c 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -480,7 +480,8 @@ void mbox_controller_unregister(struct mbox_controller *mbox) for (i = 0; i < mbox->num_chans; i++) mbox_free_channel(&mbox->chans[i]); - del_timer_sync(&mbox->poll); + if (mbox->txdone_poll) + del_timer_sync(&mbox->poll); mutex_unlock(&con_mutex); } From 36a11573b56ea5f16c150387828dd929f012fdb1 Mon Sep 17 00:00:00 2001 From: Jassi Brar Date: Thu, 12 Jun 2014 22:32:54 +0530 Subject: [PATCH 21/30] MAINTAINERS: Add maintainer entry for Mailbox API Add myself as the maintainer of mailbox api Signed-off-by: Jassi Brar Signed-off-by: Mark Brown --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ad7e322ad17b..d5a14e676330 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5152,6 +5152,14 @@ S: Maintained F: drivers/net/macvlan.c F: include/linux/if_macvlan.h +MAILBOX API +M: Jassi Brar +L: linux-kernel@vger.kernel.org +S: Maintained +F: drivers/mailbox/ +F: include/linux/mailbox_client.h +F: include/linux/mailbox_controller.h + MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7 M: Michael Kerrisk W: http://www.kernel.org/doc/man-pages From 312629a7629e9be8858ae7452a2e1901863944f5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 13 Jun 2014 21:36:34 +0100 Subject: [PATCH 22/30] mailbox: Prototype mbox_client_peek_data() This is an interface intended for client drivers and exported but it is not prototyped in the headers so can't be used. Add a prototype. Signed-off-by: Mark Brown --- include/linux/mailbox_client.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h index 53eb0784261b..cbfcf8478ca9 100644 --- a/include/linux/mailbox_client.h +++ b/include/linux/mailbox_client.h @@ -40,6 +40,7 @@ struct mbox_client { struct mbox_chan *mbox_request_channel(const struct mbox_client *cl); int mbox_send_message(struct mbox_chan *chan, void *mssg); void mbox_client_txdone(struct mbox_chan *chan, int r); +bool mbox_client_peek_data(struct mbox_chan *chan); void mbox_free_channel(struct mbox_chan *chan); #endif /* __MAILBOX_CLIENT_H */ From e5d1d7e3ec89d4e48d630c0f5f9bb2883706da53 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 13 Jun 2014 21:40:45 +0100 Subject: [PATCH 23/30] mailbox: Remove const from client argument of mbox_request_channel() The struct mbox_client supplied to mbox_request_channel() is const but it is stored in the channel in a non-constant member causing compiler warnings. While the mailbox API should treat the struct mailbox_client as const itself the struct is passed back to the channel in callbacks without a const so we need to either remove the const, change the callbacks to take const or cast the const away when doing callbacks. Take the simplest option and just remove the const. Signed-off-by: Mark Brown --- drivers/mailbox/mailbox.c | 2 +- include/linux/mailbox_client.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index e1fc6aaeb87c..607dd91e87a1 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -294,7 +294,7 @@ EXPORT_SYMBOL_GPL(mbox_send_message); * Return: Pointer to the channel assigned to the client if successful. * ERR_PTR for request failure. */ -struct mbox_chan *mbox_request_channel(const struct mbox_client *cl) +struct mbox_chan *mbox_request_channel(struct mbox_client *cl) { struct device *dev = cl->dev; struct mbox_controller *mbox; diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h index cbfcf8478ca9..955f3d7641e8 100644 --- a/include/linux/mailbox_client.h +++ b/include/linux/mailbox_client.h @@ -37,7 +37,7 @@ struct mbox_client { bool knows_txdone; }; -struct mbox_chan *mbox_request_channel(const struct mbox_client *cl); +struct mbox_chan *mbox_request_channel(struct mbox_client *cl); int mbox_send_message(struct mbox_chan *chan, void *mssg); void mbox_client_txdone(struct mbox_chan *chan, int r); bool mbox_client_peek_data(struct mbox_chan *chan); From 1881384399daa2df6cef8117bc133f8f37fcbe83 Mon Sep 17 00:00:00 2001 From: Sherman Yin Date: Wed, 14 May 2014 18:30:47 -0700 Subject: [PATCH 24/30] pinctrl: Fix pin_config_*_set_bulk APIs In commit baeae2041e14d5b7a272b9941b88fb56d716f643, Linaro-specific APIs pin_config_set_bulk and pin_config_group_set_bulk were introduced. However, pinconf_check_ops was not updated, and the wrong function was used in the PIN_MAP_TYPE_CONFIGS_PIN case in pinconf_apply_setting. Change-Id: Idc4a802f3d0086b927b808f65270e548b5b927be Signed-off-by: Sherman Yin --- drivers/pinctrl/pinconf.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/pinconf.c b/drivers/pinctrl/pinconf.c index ad30263a7410..596a2522a6b1 100644 --- a/drivers/pinctrl/pinconf.c +++ b/drivers/pinctrl/pinconf.c @@ -35,7 +35,9 @@ int pinconf_check_ops(struct pinctrl_dev *pctldev) return -EINVAL; } /* We have to be able to config the pins in SOME way */ - if (!ops->pin_config_set && !ops->pin_config_group_set) { + if (!ops->pin_config_set && !ops->pin_config_group_set + && !ops->pin_config_set_bulk + && !ops->pin_config_group_set_bulk) { dev_err(pctldev->dev, "pinconf has to be able to set a pins config\n"); return -EINVAL; @@ -171,14 +173,14 @@ int pinconf_apply_setting(struct pinctrl_setting const *setting) dev_err(pctldev->dev, "missing pin_config_set op\n"); return -EINVAL; } - if (ops->pin_config_group_set_bulk) { - ret = ops->pin_config_group_set_bulk(pctldev, + if (ops->pin_config_set_bulk) { + ret = ops->pin_config_set_bulk(pctldev, setting->data.configs.group_or_pin, setting->data.configs.configs, setting->data.configs.num_configs); if (ret < 0) { dev_err(pctldev->dev, - "pin_config_set op failed for pin %d\n", + "pin_config_set_bulk op failed for pin %d\n", setting->data.configs.group_or_pin); return ret; } @@ -211,7 +213,7 @@ int pinconf_apply_setting(struct pinctrl_setting const *setting) setting->data.configs.num_configs); if (ret < 0) { dev_err(pctldev->dev, - "pin_config_group_set op failed for group %d\n", + "pin_config_group_set_bulk op failed for group %d\n", setting->data.configs.group_or_pin); return ret; } From 3d65586ebc11d3919d6fa0de0cd6153b9e399005 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 3 Mar 2014 07:34:44 +0000 Subject: [PATCH 25/30] binfmt_elf: add ELF_HWCAP2 to compat auxv entries Add ELF_HWCAP2 to the set of auxv entries that is passed to a 32-bit ELF program running in 32-bit compat mode under a 64-bit kernel. Signed-off-by: Ard Biesheuvel Acked-by: Andrew Morton Signed-off-by: Catalin Marinas (cherry picked from commit 66d6e3b385778c2eef3b172a037235516207393e) Signed-off-by: Mark Brown --- fs/compat_binfmt_elf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index a81147e2e4ef..4d24d17bcfc1 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c @@ -88,6 +88,11 @@ static void cputime_to_compat_timeval(const cputime_t cputime, #define ELF_HWCAP COMPAT_ELF_HWCAP #endif +#ifdef COMPAT_ELF_HWCAP2 +#undef ELF_HWCAP2 +#define ELF_HWCAP2 COMPAT_ELF_HWCAP2 +#endif + #ifdef COMPAT_ARCH_DLINFO #undef ARCH_DLINFO #define ARCH_DLINFO COMPAT_ARCH_DLINFO From 126ef42a10e491015dcb40477b042db50c3d2acf Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 8 Feb 2014 13:34:09 +0100 Subject: [PATCH 26/30] cpu: add generic support for CPU feature based module autoloading This patch adds support for advertising optional CPU features over udev using the modalias, and for declaring compatibility with/dependency upon such a feature in a module. The mapping between feature numbers and actual features should be provided by the architecture in a file called which exports the following functions/macros: - cpu_feature(FEAT), a preprocessor macro that maps token FEAT to a numeric index; - bool cpu_have_feature(n), returning whether this CPU has support for feature #n; - MAX_CPU_FEATURES, an upper bound for 'n' in the previous function. The feature can then be enabled by setting CONFIG_GENERIC_CPU_AUTOPROBE for the architecture. For instance, a module that registers its module init function using module_cpu_feature_match(FEAT_X, module_init_function) will be probed automatically when the CPU's support for the 'FEAT_X' feature is advertised over udev, and will only allow the module to be loaded by hand if the 'FEAT_X' feature is supported. Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 67bad2fdb754dbef14596c0b5d28b3a12c8dfe84) Signed-off-by: Mark Brown Conflicts: drivers/base/cpu.c --- drivers/base/Kconfig | 8 +++++ drivers/base/cpu.c | 52 ++++++++++++++++++++++++--- include/linux/cpufeature.h | 60 +++++++++++++++++++++++++++++++ include/linux/mod_devicetable.h | 9 +++++ scripts/mod/devicetable-offsets.c | 3 ++ scripts/mod/file2alias.c | 10 ++++++ 6 files changed, 137 insertions(+), 5 deletions(-) create mode 100644 include/linux/cpufeature.h diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 07abd9d76f7f..9634800e800f 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -187,6 +187,14 @@ config GENERIC_CPU_DEVICES bool default n +config HAVE_CPU_AUTOPROBE + def_bool ARCH_HAS_CPU_AUTOPROBE + +config GENERIC_CPU_AUTOPROBE + bool + depends on !ARCH_HAS_CPU_AUTOPROBE + select HAVE_CPU_AUTOPROBE + config SOC_BUS bool diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 3d48fc887ef4..607efe6b7dc8 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -13,6 +13,9 @@ #include #include #include +#include +#include +#include #include "base.h" @@ -260,6 +263,45 @@ static void cpu_device_release(struct device *dev) */ } +#ifdef CONFIG_HAVE_CPU_AUTOPROBE +#ifdef CONFIG_GENERIC_CPU_AUTOPROBE +static ssize_t print_cpu_modalias(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + ssize_t n; + u32 i; + + n = sprintf(buf, "cpu:type:" CPU_FEATURE_TYPEFMT ":feature:", + CPU_FEATURE_TYPEVAL); + + for (i = 0; i < MAX_CPU_FEATURES; i++) + if (cpu_have_feature(i)) { + if (PAGE_SIZE < n + sizeof(",XXXX\n")) { + WARN(1, "CPU features overflow page\n"); + break; + } + n += sprintf(&buf[n], ",%04X", i); + } + buf[n++] = '\n'; + return n; +} +#else +#define print_cpu_modalias arch_print_cpu_modalias +#endif + +static int cpu_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (buf) { + print_cpu_modalias(NULL, NULL, buf); + add_uevent_var(env, "MODALIAS=%s", buf); + kfree(buf); + } + return 0; +} +#endif + /* * register_cpu - Setup a sysfs device for a CPU. * @cpu - cpu->hotpluggable field set to 1 will generate a control file in @@ -277,8 +319,8 @@ int __cpuinit register_cpu(struct cpu *cpu, int num) cpu->dev.id = num; cpu->dev.bus = &cpu_subsys; cpu->dev.release = cpu_device_release; -#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE - cpu->dev.bus->uevent = arch_cpu_uevent; +#ifdef CONFIG_HAVE_CPU_AUTOPROBE + cpu->dev.bus->uevent = cpu_uevent; #endif error = device_register(&cpu->dev); if (!error && cpu->hotpluggable) @@ -307,8 +349,8 @@ struct device *get_cpu_device(unsigned cpu) } EXPORT_SYMBOL_GPL(get_cpu_device); -#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE -static DEVICE_ATTR(modalias, 0444, arch_print_cpu_modalias, NULL); +#ifdef CONFIG_HAVE_CPU_AUTOPROBE +static DEVICE_ATTR(modalias, 0444, print_cpu_modalias, NULL); #endif static struct attribute *cpu_root_attrs[] = { @@ -321,7 +363,7 @@ static struct attribute *cpu_root_attrs[] = { &cpu_attrs[2].attr.attr, &dev_attr_kernel_max.attr, &dev_attr_offline.attr, -#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE +#ifdef CONFIG_HAVE_CPU_AUTOPROBE &dev_attr_modalias.attr, #endif NULL diff --git a/include/linux/cpufeature.h b/include/linux/cpufeature.h new file mode 100644 index 000000000000..c4d4eb8ac9fe --- /dev/null +++ b/include/linux/cpufeature.h @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_CPUFEATURE_H +#define __LINUX_CPUFEATURE_H + +#ifdef CONFIG_GENERIC_CPU_AUTOPROBE + +#include +#include + +/* + * Macros imported from : + * - cpu_feature(x) ordinal value of feature called 'x' + * - cpu_have_feature(u32 n) whether feature #n is available + * - MAX_CPU_FEATURES upper bound for feature ordinal values + * Optional: + * - CPU_FEATURE_TYPEFMT format string fragment for printing the cpu type + * - CPU_FEATURE_TYPEVAL set of values matching the format string above + */ + +#ifndef CPU_FEATURE_TYPEFMT +#define CPU_FEATURE_TYPEFMT "%s" +#endif + +#ifndef CPU_FEATURE_TYPEVAL +#define CPU_FEATURE_TYPEVAL ELF_PLATFORM +#endif + +/* + * Use module_cpu_feature_match(feature, module_init_function) to + * declare that + * a) the module shall be probed upon discovery of CPU feature 'feature' + * (typically at boot time using udev) + * b) the module must not be loaded if CPU feature 'feature' is not present + * (not even by manual insmod). + * + * For a list of legal values for 'feature', please consult the file + * 'asm/cpufeature.h' of your favorite architecture. + */ +#define module_cpu_feature_match(x, __init) \ +static struct cpu_feature const cpu_feature_match_ ## x[] = \ + { { .feature = cpu_feature(x) }, { } }; \ +MODULE_DEVICE_TABLE(cpu, cpu_feature_match_ ## x); \ + \ +static int cpu_feature_match_ ## x ## _init(void) \ +{ \ + if (!cpu_have_feature(cpu_feature(x))) \ + return -ENODEV; \ + return __init(); \ +} \ +module_init(cpu_feature_match_ ## x ## _init) + +#endif +#endif diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index b508016fb76d..e2c55f297d0b 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -561,6 +561,15 @@ struct x86_cpu_id { #define X86_MODEL_ANY 0 #define X86_FEATURE_ANY 0 /* Same as FPU, you can't test for that */ +/* + * Generic table type for matching CPU features. + * @feature: the bit number of the feature (0 - 65535) + */ + +struct cpu_feature { + __u16 feature; +}; + #define IPACK_ANY_FORMAT 0xff #define IPACK_ANY_ID (~0) struct ipack_device_id { diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index e66d4d258e1a..5e74595f2e85 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -174,6 +174,9 @@ int main(void) DEVID_FIELD(x86_cpu_id, model); DEVID_FIELD(x86_cpu_id, vendor); + DEVID(cpu_feature); + DEVID_FIELD(cpu_feature, feature); + DEVID(mei_cl_device_id); DEVID_FIELD(mei_cl_device_id, name); diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 45f9a3377dcd..8a9612f17cb0 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -1133,6 +1133,16 @@ static int do_x86cpu_entry(const char *filename, void *symval, } ADD_TO_DEVTABLE("x86cpu", x86_cpu_id, do_x86cpu_entry); +/* LOOKS like cpu:type:*:feature:*FEAT* */ +static int do_cpu_entry(const char *filename, void *symval, char *alias) +{ + DEF_FIELD(symval, cpu_feature, feature); + + sprintf(alias, "cpu:type:*:feature:*%04X*", feature); + return 1; +} +ADD_TO_DEVTABLE("cpu", cpu_feature, do_cpu_entry); + /* Looks like: mei:S */ static int do_mei_entry(const char *filename, void *symval, char *alias) From 581a6bd433c6ec65fdb5b4667258a8b275ae5c96 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 4 Mar 2014 01:10:04 +0000 Subject: [PATCH 27/30] arm64: enable generic CPU feature modalias matching for this architecture This enables support for the generic CPU feature modalias implementation that wires up optional CPU features to udev based module autoprobing. A file is provided that maps CPU feature numbers to elf_hwcap bits, which is the standard way on arm64 to advertise optional CPU features both internally and to user space. Signed-off-by: Ard Biesheuvel [catalin.marinas@arm.com: removed unnecessary "!!"] Signed-off-by: Catalin Marinas (cherry picked from commit 3be1a5c4f75989cf457f13f38ff0913dff6d4996) Signed-off-by: Mark Brown Conflicts: arch/arm64/Kconfig --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/cpufeature.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 arch/arm64/include/asm/cpufeature.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f7ef3f6ee979..df62331c3e4d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -10,6 +10,7 @@ config ARM64 select CLONE_BACKWARDS select COMMON_CLK select GENERIC_CLOCKEVENTS + select GENERIC_CPU_AUTOPROBE select GENERIC_IOMAP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h new file mode 100644 index 000000000000..cd4ac0516488 --- /dev/null +++ b/arch/arm64/include/asm/cpufeature.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_CPUFEATURE_H +#define __ASM_CPUFEATURE_H + +#include + +/* + * In the arm64 world (as in the ARM world), elf_hwcap is used both internally + * in the kernel and for user space to keep track of which optional features + * are supported by the current system. So let's map feature 'x' to HWCAP_x. + * Note that HWCAP_x constants are bit fields so we need to take the log. + */ + +#define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap)) +#define cpu_feature(x) ilog2(HWCAP_ ## x) + +static inline bool cpu_have_feature(unsigned int num) +{ + return elf_hwcap & (1UL << num); +} + +#endif From e978ae4553efd9417df3f5c873d3c915369a3f1e Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Sun, 28 Jul 2013 20:06:15 +0530 Subject: [PATCH 28/30] ASoC: compress: use soc_xxx handlers for metadata the compress metadata handlers were wrongly named sst_xxx Signed-off-by: Vinod Koul Signed-off-by: Mark Brown (cherry picked from commit 02bd90e86dc63728feebaf2b238684208ccb19eb) Signed-off-by: Mark Brown --- sound/soc/soc-compress.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c index 06a8000aa07b..d22015074670 100644 --- a/sound/soc/soc-compress.c +++ b/sound/soc/soc-compress.c @@ -334,7 +334,7 @@ static int soc_compr_copy(struct snd_compr_stream *cstream, return ret; } -static int sst_compr_set_metadata(struct snd_compr_stream *cstream, +static int soc_compr_set_metadata(struct snd_compr_stream *cstream, struct snd_compr_metadata *metadata) { struct snd_soc_pcm_runtime *rtd = cstream->private_data; @@ -347,7 +347,7 @@ static int sst_compr_set_metadata(struct snd_compr_stream *cstream, return ret; } -static int sst_compr_get_metadata(struct snd_compr_stream *cstream, +static int soc_compr_get_metadata(struct snd_compr_stream *cstream, struct snd_compr_metadata *metadata) { struct snd_soc_pcm_runtime *rtd = cstream->private_data; @@ -364,8 +364,8 @@ static struct snd_compr_ops soc_compr_ops = { .open = soc_compr_open, .free = soc_compr_free, .set_params = soc_compr_set_params, - .set_metadata = sst_compr_set_metadata, - .get_metadata = sst_compr_get_metadata, + .set_metadata = soc_compr_set_metadata, + .get_metadata = soc_compr_get_metadata, .get_params = soc_compr_get_params, .trigger = soc_compr_trigger, .pointer = soc_compr_pointer, From d83ee12388ee3a3fdac2d4dac1dae9fcc73cc64a Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Fri, 17 Jan 2014 17:03:55 +0000 Subject: [PATCH 29/30] ASoC: DPCM: make some DPCM API calls non static for compressed usage The ASoC compressed code needs to call the internal DPCM APIs in order to dynamically route compressed data to different DAIs. Signed-off-by: Liam Girdwood Signed-off-by: Mark Brown (cherry picked from commit 23607025303af6e84bc2cd4cabe89c21f6a22a3f) Signed-off-by: Mark Brown --- include/sound/soc-dpcm.h | 22 ++++++++++++++++++++++ sound/soc/soc-pcm.c | 29 ++++++++++++----------------- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/include/sound/soc-dpcm.h b/include/sound/soc-dpcm.h index 04598f1efd77..a2e15cad76a0 100644 --- a/include/sound/soc-dpcm.h +++ b/include/sound/soc-dpcm.h @@ -11,6 +11,7 @@ #ifndef __LINUX_SND_SOC_DPCM_H #define __LINUX_SND_SOC_DPCM_H +#include #include #include @@ -135,4 +136,25 @@ int soc_dpcm_be_digital_mute(struct snd_soc_pcm_runtime *fe, int mute); int soc_dpcm_debugfs_add(struct snd_soc_pcm_runtime *rtd); int soc_dpcm_runtime_update(struct snd_soc_dapm_widget *); +int dpcm_path_get(struct snd_soc_pcm_runtime *fe, + int stream, struct snd_soc_dapm_widget_list **list_); +int dpcm_process_paths(struct snd_soc_pcm_runtime *fe, + int stream, struct snd_soc_dapm_widget_list **list, int new); +int dpcm_be_dai_startup(struct snd_soc_pcm_runtime *fe, int stream); +int dpcm_be_dai_shutdown(struct snd_soc_pcm_runtime *fe, int stream); +void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream); +void dpcm_clear_pending_state(struct snd_soc_pcm_runtime *fe, int stream); +int dpcm_be_dai_hw_free(struct snd_soc_pcm_runtime *fe, int stream); +int dpcm_be_dai_hw_params(struct snd_soc_pcm_runtime *fe, int tream); +int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, int cmd); +int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream); +int dpcm_dapm_stream_event(struct snd_soc_pcm_runtime *fe, int dir, + int event); + +static inline void dpcm_path_put(struct snd_soc_dapm_widget_list **list) +{ + kfree(*list); +} + + #endif diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index ccb6be4d658d..6c336542a641 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -34,7 +34,7 @@ #define DPCM_MAX_BE_USERS 8 /* DPCM stream event, send event to FE and all active BEs. */ -static int dpcm_dapm_stream_event(struct snd_soc_pcm_runtime *fe, int dir, +int dpcm_dapm_stream_event(struct snd_soc_pcm_runtime *fe, int dir, int event) { struct snd_soc_dpcm *dpcm; @@ -757,7 +757,7 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe, } /* disconnect a BE and FE */ -static void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream) +void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm, *d; @@ -853,7 +853,7 @@ static int widget_in_list(struct snd_soc_dapm_widget_list *list, return 0; } -static int dpcm_path_get(struct snd_soc_pcm_runtime *fe, +int dpcm_path_get(struct snd_soc_pcm_runtime *fe, int stream, struct snd_soc_dapm_widget_list **list_) { struct snd_soc_dai *cpu_dai = fe->cpu_dai; @@ -875,11 +875,6 @@ static int dpcm_path_get(struct snd_soc_pcm_runtime *fe, return paths; } -static inline void dpcm_path_put(struct snd_soc_dapm_widget_list **list) -{ - kfree(*list); -} - static int dpcm_prune_paths(struct snd_soc_pcm_runtime *fe, int stream, struct snd_soc_dapm_widget_list **list_) { @@ -949,7 +944,7 @@ static int dpcm_add_paths(struct snd_soc_pcm_runtime *fe, int stream, continue; /* don't connect if FE is not running */ - if (!fe->dpcm[stream].runtime) + if (!fe->dpcm[stream].runtime && !fe->fe_compr) continue; /* newly connected FE and BE */ @@ -974,7 +969,7 @@ static int dpcm_add_paths(struct snd_soc_pcm_runtime *fe, int stream, * Find the corresponding BE DAIs that source or sink audio to this * FE substream. */ -static int dpcm_process_paths(struct snd_soc_pcm_runtime *fe, +int dpcm_process_paths(struct snd_soc_pcm_runtime *fe, int stream, struct snd_soc_dapm_widget_list **list, int new) { if (new) @@ -983,7 +978,7 @@ static int dpcm_process_paths(struct snd_soc_pcm_runtime *fe, return dpcm_prune_paths(fe, stream, list); } -static void dpcm_clear_pending_state(struct snd_soc_pcm_runtime *fe, int stream) +void dpcm_clear_pending_state(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm; @@ -1021,7 +1016,7 @@ static void dpcm_be_dai_startup_unwind(struct snd_soc_pcm_runtime *fe, } } -static int dpcm_be_dai_startup(struct snd_soc_pcm_runtime *fe, int stream) +int dpcm_be_dai_startup(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm; int err, count = 0; @@ -1163,7 +1158,7 @@ static int dpcm_fe_dai_startup(struct snd_pcm_substream *fe_substream) return ret; } -static int dpcm_be_dai_shutdown(struct snd_soc_pcm_runtime *fe, int stream) +int dpcm_be_dai_shutdown(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm; @@ -1224,7 +1219,7 @@ static int dpcm_fe_dai_shutdown(struct snd_pcm_substream *substream) return 0; } -static int dpcm_be_dai_hw_free(struct snd_soc_pcm_runtime *fe, int stream) +int dpcm_be_dai_hw_free(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm; @@ -1289,7 +1284,7 @@ static int dpcm_fe_dai_hw_free(struct snd_pcm_substream *substream) return 0; } -static int dpcm_be_dai_hw_params(struct snd_soc_pcm_runtime *fe, int stream) +int dpcm_be_dai_hw_params(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm; int ret; @@ -1419,7 +1414,7 @@ static int dpcm_do_trigger(struct snd_soc_dpcm *dpcm, return ret; } -static int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, +int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, int cmd) { struct snd_soc_dpcm *dpcm; @@ -1587,7 +1582,7 @@ static int dpcm_fe_dai_trigger(struct snd_pcm_substream *substream, int cmd) return ret; } -static int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream) +int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm; int ret = 0; From a3f2705d8fefdd578eeae9cfb530ba16331d2bad Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Fri, 17 Jan 2014 17:03:56 +0000 Subject: [PATCH 30/30] ASoC: compress: Add suport for DPCM into compressed audio Currently compressed audio streams are statically routed from the /dev to the DAI link. Some DSPs can route compressed data to multiple BE DAIs like they do for PCM data. Add support to allow dynamically routed compressed streams using the existing DPCM infrastructure. This patch adds special FE versions of the compressed ops that work out the runtime routing. Signed-off-by: Liam Girdwood Acked-by: Vinod Koul Signed-off-by: Mark Brown (cherry picked from commit 2a99ef0fdb35a0f8d6b56ccc5d9d821e9ff100c1) Signed-off-by: Mark Brown --- include/sound/soc.h | 1 + sound/soc/soc-compress.c | 301 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 301 insertions(+), 1 deletion(-) diff --git a/include/sound/soc.h b/include/sound/soc.h index 85c15226103b..5bbdc653a826 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1063,6 +1063,7 @@ struct snd_soc_pcm_runtime { /* Dynamic PCM BE runtime data */ struct snd_soc_dpcm_runtime dpcm[2]; + int fe_compr; long pmdown_time; unsigned char pop_wait:1; diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c index d22015074670..1bf5c47a427d 100644 --- a/sound/soc/soc-compress.c +++ b/sound/soc/soc-compress.c @@ -24,6 +24,7 @@ #include #include #include +#include static int soc_compr_open(struct snd_compr_stream *cstream) { @@ -75,6 +76,98 @@ static int soc_compr_open(struct snd_compr_stream *cstream) return ret; } +static int soc_compr_open_fe(struct snd_compr_stream *cstream) +{ + struct snd_soc_pcm_runtime *fe = cstream->private_data; + struct snd_pcm_substream *fe_substream = fe->pcm->streams[0].substream; + struct snd_soc_platform *platform = fe->platform; + struct snd_soc_dai *cpu_dai = fe->cpu_dai; + struct snd_soc_dai *codec_dai = fe->codec_dai; + struct snd_soc_dpcm *dpcm; + struct snd_soc_dapm_widget_list *list; + int stream; + int ret = 0; + + if (cstream->direction == SND_COMPRESS_PLAYBACK) + stream = SNDRV_PCM_STREAM_PLAYBACK; + else + stream = SNDRV_PCM_STREAM_CAPTURE; + + mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME); + + if (platform->driver->compr_ops && platform->driver->compr_ops->open) { + ret = platform->driver->compr_ops->open(cstream); + if (ret < 0) { + pr_err("compress asoc: can't open platform %s\n", platform->name); + goto out; + } + } + + if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->startup) { + ret = fe->dai_link->compr_ops->startup(cstream); + if (ret < 0) { + pr_err("compress asoc: %s startup failed\n", fe->dai_link->name); + goto machine_err; + } + } + + fe->dpcm[stream].runtime = fe_substream->runtime; + + if (dpcm_path_get(fe, stream, &list) <= 0) { + dev_dbg(fe->dev, "ASoC: %s no valid %s route\n", + fe->dai_link->name, stream ? "capture" : "playback"); + } + + /* calculate valid and active FE <-> BE dpcms */ + dpcm_process_paths(fe, stream, &list, 1); + + fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + + ret = dpcm_be_dai_startup(fe, stream); + if (ret < 0) { + /* clean up all links */ + list_for_each_entry(dpcm, &fe->dpcm[stream].be_clients, list_be) + dpcm->state = SND_SOC_DPCM_LINK_STATE_FREE; + + dpcm_be_disconnect(fe, stream); + fe->dpcm[stream].runtime = NULL; + goto fe_err; + } + + dpcm_clear_pending_state(fe, stream); + dpcm_path_put(&list); + + fe->dpcm[stream].state = SND_SOC_DPCM_STATE_OPEN; + fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + + if (cstream->direction == SND_COMPRESS_PLAYBACK) { + cpu_dai->playback_active++; + codec_dai->playback_active++; + } else { + cpu_dai->capture_active++; + codec_dai->capture_active++; + } + + cpu_dai->active++; + codec_dai->active++; + fe->codec->active++; + + mutex_unlock(&fe->card->mutex); + + return 0; + +fe_err: + if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->shutdown) + fe->dai_link->compr_ops->shutdown(cstream); +machine_err: + if (platform->driver->compr_ops && platform->driver->compr_ops->free) + platform->driver->compr_ops->free(cstream); +out: + fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + mutex_unlock(&fe->card->mutex); + return ret; +} + /* * Power down the audio subsystem pmdown_time msecs after close is called. * This is to ensure there are no pops or clicks in between any music tracks @@ -163,6 +256,65 @@ static int soc_compr_free(struct snd_compr_stream *cstream) return 0; } +static int soc_compr_free_fe(struct snd_compr_stream *cstream) +{ + struct snd_soc_pcm_runtime *fe = cstream->private_data; + struct snd_soc_platform *platform = fe->platform; + struct snd_soc_dai *cpu_dai = fe->cpu_dai; + struct snd_soc_dai *codec_dai = fe->codec_dai; + struct snd_soc_dpcm *dpcm; + int stream, ret; + + mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME); + + if (cstream->direction == SND_COMPRESS_PLAYBACK) { + stream = SNDRV_PCM_STREAM_PLAYBACK; + cpu_dai->playback_active--; + codec_dai->playback_active--; + } else { + stream = SNDRV_PCM_STREAM_CAPTURE; + cpu_dai->capture_active--; + codec_dai->capture_active--; + } + + cpu_dai->active--; + codec_dai->active--; + fe->codec->active--; + + fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + + ret = dpcm_be_dai_hw_free(fe, stream); + if (ret < 0) + dev_err(fe->dev, "compressed hw_free failed %d\n", ret); + + ret = dpcm_be_dai_shutdown(fe, stream); + + /* mark FE's links ready to prune */ + list_for_each_entry(dpcm, &fe->dpcm[stream].be_clients, list_be) + dpcm->state = SND_SOC_DPCM_LINK_STATE_FREE; + + if (stream == SNDRV_PCM_STREAM_PLAYBACK) + dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_STOP); + else + dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_STOP); + + fe->dpcm[stream].state = SND_SOC_DPCM_STATE_CLOSE; + fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + + dpcm_be_disconnect(fe, stream); + + fe->dpcm[stream].runtime = NULL; + + if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->shutdown) + fe->dai_link->compr_ops->shutdown(cstream); + + if (platform->driver->compr_ops && platform->driver->compr_ops->free) + platform->driver->compr_ops->free(cstream); + + mutex_unlock(&fe->card->mutex); + return 0; +} + static int soc_compr_trigger(struct snd_compr_stream *cstream, int cmd) { @@ -193,6 +345,59 @@ static int soc_compr_trigger(struct snd_compr_stream *cstream, int cmd) return ret; } +static int soc_compr_trigger_fe(struct snd_compr_stream *cstream, int cmd) +{ + struct snd_soc_pcm_runtime *fe = cstream->private_data; + struct snd_soc_platform *platform = fe->platform; + int ret = 0, stream; + + if (cmd == SND_COMPR_TRIGGER_PARTIAL_DRAIN || + cmd == SND_COMPR_TRIGGER_DRAIN) { + + if (platform->driver->compr_ops && + platform->driver->compr_ops->trigger) + return platform->driver->compr_ops->trigger(cstream, cmd); + } + + if (cstream->direction == SND_COMPRESS_PLAYBACK) + stream = SNDRV_PCM_STREAM_PLAYBACK; + else + stream = SNDRV_PCM_STREAM_CAPTURE; + + + mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME); + + if (platform->driver->compr_ops && platform->driver->compr_ops->trigger) { + ret = platform->driver->compr_ops->trigger(cstream, cmd); + if (ret < 0) + goto out; + } + + fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + + ret = dpcm_be_dai_trigger(fe, stream, cmd); + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + fe->dpcm[stream].state = SND_SOC_DPCM_STATE_START; + break; + case SNDRV_PCM_TRIGGER_STOP: + case SNDRV_PCM_TRIGGER_SUSPEND: + fe->dpcm[stream].state = SND_SOC_DPCM_STATE_STOP; + break; + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + fe->dpcm[stream].state = SND_SOC_DPCM_STATE_PAUSED; + break; + } + +out: + fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + mutex_unlock(&fe->card->mutex); + return ret; +} + static int soc_compr_set_params(struct snd_compr_stream *cstream, struct snd_compr_params *params) { @@ -240,6 +445,64 @@ static int soc_compr_set_params(struct snd_compr_stream *cstream, return ret; } +static int soc_compr_set_params_fe(struct snd_compr_stream *cstream, + struct snd_compr_params *params) +{ + struct snd_soc_pcm_runtime *fe = cstream->private_data; + struct snd_pcm_substream *fe_substream = fe->pcm->streams[0].substream; + struct snd_soc_platform *platform = fe->platform; + int ret = 0, stream; + + if (cstream->direction == SND_COMPRESS_PLAYBACK) + stream = SNDRV_PCM_STREAM_PLAYBACK; + else + stream = SNDRV_PCM_STREAM_CAPTURE; + + mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME); + + if (platform->driver->compr_ops && platform->driver->compr_ops->set_params) { + ret = platform->driver->compr_ops->set_params(cstream, params); + if (ret < 0) + goto out; + } + + if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->set_params) { + ret = fe->dai_link->compr_ops->set_params(cstream); + if (ret < 0) + goto out; + } + + /* + * Create an empty hw_params for the BE as the machine driver must + * fix this up to match DSP decoder and ASRC configuration. + * I.e. machine driver fixup for compressed BE is mandatory. + */ + memset(&fe->dpcm[fe_substream->stream].hw_params, 0, + sizeof(struct snd_pcm_hw_params)); + + fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + + ret = dpcm_be_dai_hw_params(fe, stream); + if (ret < 0) + goto out; + + ret = dpcm_be_dai_prepare(fe, stream); + if (ret < 0) + goto out; + + if (stream == SNDRV_PCM_STREAM_PLAYBACK) + dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_START); + else + dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_START); + + fe->dpcm[stream].state = SND_SOC_DPCM_STATE_PREPARE; + +out: + fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + mutex_unlock(&fe->card->mutex); + return ret; +} + static int soc_compr_get_params(struct snd_compr_stream *cstream, struct snd_codec *params) { @@ -359,6 +622,7 @@ static int soc_compr_get_metadata(struct snd_compr_stream *cstream, return ret; } + /* ASoC Compress operations */ static struct snd_compr_ops soc_compr_ops = { .open = soc_compr_open, @@ -374,6 +638,21 @@ static struct snd_compr_ops soc_compr_ops = { .get_codec_caps = soc_compr_get_codec_caps }; +/* ASoC Dynamic Compress operations */ +static struct snd_compr_ops soc_compr_dyn_ops = { + .open = soc_compr_open_fe, + .free = soc_compr_free_fe, + .set_params = soc_compr_set_params_fe, + .get_params = soc_compr_get_params, + .set_metadata = soc_compr_set_metadata, + .get_metadata = soc_compr_get_metadata, + .trigger = soc_compr_trigger_fe, + .pointer = soc_compr_pointer, + .ack = soc_compr_ack, + .get_caps = soc_compr_get_caps, + .get_codec_caps = soc_compr_get_codec_caps +}; + /* create a new compress */ int soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num) { @@ -382,6 +661,7 @@ int soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num) struct snd_soc_dai *codec_dai = rtd->codec_dai; struct snd_soc_dai *cpu_dai = rtd->cpu_dai; struct snd_compr *compr; + struct snd_pcm *be_pcm; char new_name[64]; int ret = 0, direction = 0; @@ -409,7 +689,26 @@ int soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num) ret = -ENOMEM; goto compr_err; } - memcpy(compr->ops, &soc_compr_ops, sizeof(soc_compr_ops)); + + if (rtd->dai_link->dynamic) { + snprintf(new_name, sizeof(new_name), "(%s)", + rtd->dai_link->stream_name); + + ret = snd_pcm_new_internal(rtd->card->snd_card, new_name, num, + 1, 0, &be_pcm); + if (ret < 0) { + dev_err(rtd->card->dev, "ASoC: can't create compressed for %s\n", + rtd->dai_link->name); + goto compr_err; + } + + rtd->pcm = be_pcm; + rtd->fe_compr = 1; + be_pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream->private_data = rtd; + be_pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream->private_data = rtd; + memcpy(compr->ops, &soc_compr_dyn_ops, sizeof(soc_compr_dyn_ops)); + } else + memcpy(compr->ops, &soc_compr_ops, sizeof(soc_compr_ops)); /* Add copy callback for not memory mapped DSPs */ if (platform->driver->compr_ops && platform->driver->compr_ops->copy)