diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt index 20746e5abe6f..06fc7602593a 100644 --- a/Documentation/devicetree/bindings/arm/arch_timer.txt +++ b/Documentation/devicetree/bindings/arm/arch_timer.txt @@ -1,10 +1,14 @@ * ARM architected timer -ARM cores may have a per-core architected timer, which provides per-cpu timers. +ARM cores may have a per-core architected timer, which provides per-cpu timers, +or a memory mapped architected timer, which provides up to 8 frames with a +physical and optional virtual timer per frame. -The timer is attached to a GIC to deliver its per-processor interrupts. +The per-core architected timer is attached to a GIC to deliver its +per-processor interrupts via PPIs. The memory mapped timer is attached to a GIC +to deliver its interrupts via SPIs. -** Timer node properties: +** CP15 Timer node properties: - compatible : Should at least contain one of "arm,armv7-timer" @@ -26,3 +30,52 @@ Example: <1 10 0xf08>; clock-frequency = <100000000>; }; + +** Memory mapped timer node properties: + +- compatible : Should at least contain "arm,armv7-timer-mem". + +- clock-frequency : The frequency of the main counter, in Hz. Optional. + +- reg : The control frame base address. + +Note that #address-cells, #size-cells, and ranges shall be present to ensure +the CPU can address a frame's registers. + +A timer node has up to 8 frame sub-nodes, each with the following properties: + +- frame-number: 0 to 7. + +- interrupts : Interrupt list for physical and virtual timers in that order. + The virtual timer interrupt is optional. + +- reg : The first and second view base addresses in that order. The second view + base address is optional. + +- status : "disabled" indicates the frame is not available for use. Optional. + +Example: + + timer@f0000000 { + compatible = "arm,armv7-timer-mem"; + #address-cells = <1>; + #size-cells = <1>; + ranges; + reg = <0xf0000000 0x1000>; + clock-frequency = <50000000>; + + frame@f0001000 { + frame-number = <0> + interrupts = <0 13 0x8>, + <0 14 0x8>; + reg = <0xf0001000 0x1000>, + <0xf0002000 0x1000>; + }; + + frame@f0003000 { + frame-number = <1> + interrupts = <0 15 0x8>; + reg = <0xf0003000 0x1000>; + status = "disabled"; + }; + }; diff --git a/Documentation/devicetree/bindings/mailbox/mailbox.txt b/Documentation/devicetree/bindings/mailbox/mailbox.txt new file mode 100644 index 000000000000..3f009555f392 --- /dev/null +++ b/Documentation/devicetree/bindings/mailbox/mailbox.txt @@ -0,0 +1,33 @@ +* Generic Mailbox Controller and client driver bindings + +Generic binding to provide a way for Mailbox controller drivers to +assign appropriate mailbox channel to client drivers. + +* Mailbox Controller + +Required property: +- #mbox-cells: Must be at least 1. Number of cells in a mailbox + specifier. + +Example: + mailbox: mailbox { + ... + #mbox-cells = <1>; + }; + + +* Mailbox Client + +Required property: +- mbox: List of phandle and mailbox channel specifier. + +- mbox-names: List of identifier strings for each mailbox channel + required by the client. + +Example: + pwr_cntrl: power { + ... + mbox-names = "pwr-ctrl", "rpc"; + mbox = <&mailbox 0 + &mailbox 1>; + }; diff --git a/MAINTAINERS b/MAINTAINERS index 0c204dd7c12b..b4f332475b73 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5152,6 +5152,14 @@ S: Maintained F: drivers/net/macvlan.c F: include/linux/if_macvlan.h +MAILBOX API +M: Jassi Brar +L: linux-kernel@vger.kernel.org +S: Maintained +F: drivers/mailbox/ +F: include/linux/mailbox_client.h +F: include/linux/mailbox_controller.h + MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7 M: Michael Kerrisk W: http://www.kernel.org/doc/man-pages diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index a60052b24916..4b26d14e41b3 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -17,7 +17,8 @@ int arch_timer_arch_init(void); * nicely work out which register we want, and chuck away the rest of * the code. At least it does so with a recent GCC (4.6.3). */ -static inline void arch_timer_reg_write(const int access, const int reg, u32 val) +static __always_inline +void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val) { if (access == ARCH_TIMER_PHYS_ACCESS) { switch (reg) { @@ -28,9 +29,7 @@ static inline void arch_timer_reg_write(const int access, const int reg, u32 val asm volatile("mcr p15, 0, %0, c14, c2, 0" : : "r" (val)); break; } - } - - if (access == ARCH_TIMER_VIRT_ACCESS) { + } else if (access == ARCH_TIMER_VIRT_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: asm volatile("mcr p15, 0, %0, c14, c3, 1" : : "r" (val)); @@ -44,7 +43,8 @@ static inline void arch_timer_reg_write(const int access, const int reg, u32 val isb(); } -static inline u32 arch_timer_reg_read(const int access, const int reg) +static __always_inline +u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg) { u32 val = 0; @@ -57,9 +57,7 @@ static inline u32 arch_timer_reg_read(const int access, const int reg) asm volatile("mrc p15, 0, %0, c14, c2, 0" : "=r" (val)); break; } - } - - if (access == ARCH_TIMER_VIRT_ACCESS) { + } else if (access == ARCH_TIMER_VIRT_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: asm volatile("mrc p15, 0, %0, c14, c3, 1" : "=r" (val)); diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c index 35d1029d7c9d..c37d31e15a06 100644 --- a/arch/arm/mach-highbank/highbank.c +++ b/arch/arm/mach-highbank/highbank.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index afbdbd099d67..dea35c100fd6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -17,6 +17,7 @@ config ARM64 select DCACHE_WORD_ACCESS select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_CPU_AUTOPROBE select GENERIC_EARLY_IOREMAP select GENERIC_IOMAP select GENERIC_IRQ_PROBE @@ -461,5 +462,8 @@ source "arch/arm64/Kconfig.debug" source "security/Kconfig" source "crypto/Kconfig" +if CRYPTO +source "arch/arm64/crypto/Kconfig" +endif source "lib/Kconfig" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 27ad9166d012..28750a191dd8 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -43,6 +43,7 @@ TEXT_OFFSET := 0x00080000 export TEXT_OFFSET GZFLAGS core-y += arch/arm64/kernel/ arch/arm64/mm/ +core-$(CONFIG_CRYPTO) += arch/arm64/crypto/ libs-y := arch/arm64/lib/ $(libs-y) libs-y += $(LIBGCC) diff --git a/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts index a46be6148b3a..ed55571e06dd 100644 --- a/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts +++ b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts @@ -54,7 +54,7 @@ aliases { psci { compatible = "arm,psci"; method = "smc"; - cpu_suspend = <0xc4000001>; + cpu_suspend = <0x84000001>; cpu_off = <0x84000002>; cpu_on = <0xc4000003>; }; @@ -63,12 +63,33 @@ cpus { #address-cells = <2>; #size-cells = <0>; + idle-states { + entry-method = "arm,psci"; + + CPU_SLEEP_0: cpu-sleep-0 { + compatible = "arm,idle-state"; + entry-method-param = <0x0010000>; + entry-latency-us = <40>; + exit-latency-us = <100>; + min-residency-us = <150>; + }; + + CLUSTER_SLEEP_0: cluster-sleep-0 { + compatible = "arm,idle-state"; + entry-method-param = <0x1010000>; + entry-latency-us = <500>; + exit-latency-us = <1000>; + min-residency-us = <2500>; + }; + }; + big0: cpu@0 { device_type = "cpu"; compatible = "arm,cortex-a57", "arm,armv8"; reg = <0x0 0x0>; enable-method = "psci"; clock-frequency = <1000000>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; big1: cpu@1 { device_type = "cpu"; @@ -76,6 +97,7 @@ big1: cpu@1 { reg = <0x0 0x1>; enable-method = "psci"; clock-frequency = <1000000>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; big2: cpu@2 { device_type = "cpu"; @@ -83,6 +105,7 @@ big2: cpu@2 { reg = <0x0 0x2>; enable-method = "psci"; clock-frequency = <1000000>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; big3: cpu@3 { device_type = "cpu"; @@ -90,6 +113,7 @@ big3: cpu@3 { reg = <0x0 0x3>; enable-method = "psci"; clock-frequency = <1000000>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; little0: cpu@100 { device_type = "cpu"; @@ -97,6 +121,7 @@ little0: cpu@100 { reg = <0x0 0x100>; enable-method = "psci"; clock-frequency = <1000000>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; little1: cpu@101 { device_type = "cpu"; @@ -104,6 +129,7 @@ little1: cpu@101 { reg = <0x0 0x101>; enable-method = "psci"; clock-frequency = <1000000>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; little2: cpu@102 { device_type = "cpu"; @@ -111,6 +137,7 @@ little2: cpu@102 { reg = <0x0 0x102>; enable-method = "psci"; clock-frequency = <1000000>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; little3: cpu@103 { device_type = "cpu"; @@ -118,6 +145,7 @@ little3: cpu@103 { reg = <0x0 0x103>; enable-method = "psci"; clock-frequency = <1000000>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; cpu-map { diff --git a/arch/arm64/boot/dts/juno.dts b/arch/arm64/boot/dts/juno.dts index 9785a14ca604..f260d702041c 100644 --- a/arch/arm64/boot/dts/juno.dts +++ b/arch/arm64/boot/dts/juno.dts @@ -68,7 +68,7 @@ cpu@1 { memory@80000000 { device_type = "memory"; - reg = <0x00000000 0x80000000 0x0 0x80000000>, + reg = <0x00000000 0x80000000 0x0 0x7f000000>, <0x00000008 0x80000000 0x1 0x80000000>; }; diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig new file mode 100644 index 000000000000..5562652c5316 --- /dev/null +++ b/arch/arm64/crypto/Kconfig @@ -0,0 +1,53 @@ + +menuconfig ARM64_CRYPTO + bool "ARM64 Accelerated Cryptographic Algorithms" + depends on ARM64 + help + Say Y here to choose from a selection of cryptographic algorithms + implemented using ARM64 specific CPU features or instructions. + +if ARM64_CRYPTO + +config CRYPTO_SHA1_ARM64_CE + tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_HASH + +config CRYPTO_SHA2_ARM64_CE + tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_HASH + +config CRYPTO_GHASH_ARM64_CE + tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_HASH + +config CRYPTO_AES_ARM64_CE + tristate "AES core cipher using ARMv8 Crypto Extensions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_AES + +config CRYPTO_AES_ARM64_CE_CCM + tristate "AES in CCM mode using ARMv8 Crypto Extensions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_AES + select CRYPTO_AEAD + +config CRYPTO_AES_ARM64_CE_BLK + tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_AES + select CRYPTO_ABLK_HELPER + +config CRYPTO_AES_ARM64_NEON_BLK + tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_AES + select CRYPTO_ABLK_HELPER + +endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile new file mode 100644 index 000000000000..2070a56ecc46 --- /dev/null +++ b/arch/arm64/crypto/Makefile @@ -0,0 +1,38 @@ +# +# linux/arch/arm64/crypto/Makefile +# +# Copyright (C) 2014 Linaro Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# + +obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o +sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o + +obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o +sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o + +obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o +ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o + +obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o +CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto + +obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o +aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o + +obj-$(CONFIG_CRYPTO_AES_ARM64_CE_BLK) += aes-ce-blk.o +aes-ce-blk-y := aes-glue-ce.o aes-ce.o + +obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o +aes-neon-blk-y := aes-glue-neon.o aes-neon.o + +AFLAGS_aes-ce.o := -DINTERLEAVE=2 -DINTERLEAVE_INLINE +AFLAGS_aes-neon.o := -DINTERLEAVE=4 + +CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS + +$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE + $(call if_changed_dep,cc_o_c) diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S new file mode 100644 index 000000000000..432e4841cd81 --- /dev/null +++ b/arch/arm64/crypto/aes-ce-ccm-core.S @@ -0,0 +1,222 @@ +/* + * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions + * + * Copyright (C) 2013 - 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + + .text + .arch armv8-a+crypto + + /* + * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, + * u32 *macp, u8 const rk[], u32 rounds); + */ +ENTRY(ce_aes_ccm_auth_data) + ldr w8, [x3] /* leftover from prev round? */ + ld1 {v0.2d}, [x0] /* load mac */ + cbz w8, 1f + sub w8, w8, #16 + eor v1.16b, v1.16b, v1.16b +0: ldrb w7, [x1], #1 /* get 1 byte of input */ + subs w2, w2, #1 + add w8, w8, #1 + ins v1.b[0], w7 + ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ + beq 8f /* out of input? */ + cbnz w8, 0b + eor v0.16b, v0.16b, v1.16b +1: ld1 {v3.2d}, [x4] /* load first round key */ + prfm pldl1strm, [x1] + cmp w5, #12 /* which key size? */ + add x6, x4, #16 + sub w7, w5, #2 /* modified # of rounds */ + bmi 2f + bne 5f + mov v5.16b, v3.16b + b 4f +2: mov v4.16b, v3.16b + ld1 {v5.2d}, [x6], #16 /* load 2nd round key */ +3: aese v0.16b, v4.16b + aesmc v0.16b, v0.16b +4: ld1 {v3.2d}, [x6], #16 /* load next round key */ + aese v0.16b, v5.16b + aesmc v0.16b, v0.16b +5: ld1 {v4.2d}, [x6], #16 /* load next round key */ + subs w7, w7, #3 + aese v0.16b, v3.16b + aesmc v0.16b, v0.16b + ld1 {v5.2d}, [x6], #16 /* load next round key */ + bpl 3b + aese v0.16b, v4.16b + subs w2, w2, #16 /* last data? */ + eor v0.16b, v0.16b, v5.16b /* final round */ + bmi 6f + ld1 {v1.16b}, [x1], #16 /* load next input block */ + eor v0.16b, v0.16b, v1.16b /* xor with mac */ + bne 1b +6: st1 {v0.2d}, [x0] /* store mac */ + beq 10f + adds w2, w2, #16 + beq 10f + mov w8, w2 +7: ldrb w7, [x1], #1 + umov w6, v0.b[0] + eor w6, w6, w7 + strb w6, [x0], #1 + subs w2, w2, #1 + beq 10f + ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ + b 7b +8: mov w7, w8 + add w8, w8, #16 +9: ext v1.16b, v1.16b, v1.16b, #1 + adds w7, w7, #1 + bne 9b + eor v0.16b, v0.16b, v1.16b + st1 {v0.2d}, [x0] +10: str w8, [x3] + ret +ENDPROC(ce_aes_ccm_auth_data) + + /* + * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[], + * u32 rounds); + */ +ENTRY(ce_aes_ccm_final) + ld1 {v3.2d}, [x2], #16 /* load first round key */ + ld1 {v0.2d}, [x0] /* load mac */ + cmp w3, #12 /* which key size? */ + sub w3, w3, #2 /* modified # of rounds */ + ld1 {v1.2d}, [x1] /* load 1st ctriv */ + bmi 0f + bne 3f + mov v5.16b, v3.16b + b 2f +0: mov v4.16b, v3.16b +1: ld1 {v5.2d}, [x2], #16 /* load next round key */ + aese v0.16b, v4.16b + aese v1.16b, v4.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b +2: ld1 {v3.2d}, [x2], #16 /* load next round key */ + aese v0.16b, v5.16b + aese v1.16b, v5.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b +3: ld1 {v4.2d}, [x2], #16 /* load next round key */ + subs w3, w3, #3 + aese v0.16b, v3.16b + aese v1.16b, v3.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b + bpl 1b + aese v0.16b, v4.16b + aese v1.16b, v4.16b + /* final round key cancels out */ + eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ + st1 {v0.2d}, [x0] /* store result */ + ret +ENDPROC(ce_aes_ccm_final) + + .macro aes_ccm_do_crypt,enc + ldr x8, [x6, #8] /* load lower ctr */ + ld1 {v0.2d}, [x5] /* load mac */ + rev x8, x8 /* keep swabbed ctr in reg */ +0: /* outer loop */ + ld1 {v1.1d}, [x6] /* load upper ctr */ + prfm pldl1strm, [x1] + add x8, x8, #1 + rev x9, x8 + cmp w4, #12 /* which key size? */ + sub w7, w4, #2 /* get modified # of rounds */ + ins v1.d[1], x9 /* no carry in lower ctr */ + ld1 {v3.2d}, [x3] /* load first round key */ + add x10, x3, #16 + bmi 1f + bne 4f + mov v5.16b, v3.16b + b 3f +1: mov v4.16b, v3.16b + ld1 {v5.2d}, [x10], #16 /* load 2nd round key */ +2: /* inner loop: 3 rounds, 2x interleaved */ + aese v0.16b, v4.16b + aese v1.16b, v4.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b +3: ld1 {v3.2d}, [x10], #16 /* load next round key */ + aese v0.16b, v5.16b + aese v1.16b, v5.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b +4: ld1 {v4.2d}, [x10], #16 /* load next round key */ + subs w7, w7, #3 + aese v0.16b, v3.16b + aese v1.16b, v3.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b + ld1 {v5.2d}, [x10], #16 /* load next round key */ + bpl 2b + aese v0.16b, v4.16b + aese v1.16b, v4.16b + subs w2, w2, #16 + bmi 6f /* partial block? */ + ld1 {v2.16b}, [x1], #16 /* load next input block */ + .if \enc == 1 + eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ + eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ + .else + eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */ + eor v1.16b, v2.16b, v5.16b /* final round enc */ + .endif + eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ + st1 {v1.16b}, [x0], #16 /* write output block */ + bne 0b + rev x8, x8 + st1 {v0.2d}, [x5] /* store mac */ + str x8, [x6, #8] /* store lsb end of ctr (BE) */ +5: ret + +6: eor v0.16b, v0.16b, v5.16b /* final round mac */ + eor v1.16b, v1.16b, v5.16b /* final round enc */ + st1 {v0.2d}, [x5] /* store mac */ + add w2, w2, #16 /* process partial tail block */ +7: ldrb w9, [x1], #1 /* get 1 byte of input */ + umov w6, v1.b[0] /* get top crypted ctr byte */ + umov w7, v0.b[0] /* get top mac byte */ + .if \enc == 1 + eor w7, w7, w9 + eor w9, w9, w6 + .else + eor w9, w9, w6 + eor w7, w7, w9 + .endif + strb w9, [x0], #1 /* store out byte */ + strb w7, [x5], #1 /* store mac byte */ + subs w2, w2, #1 + beq 5b + ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ + ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ + b 7b + .endm + + /* + * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, + * u8 const rk[], u32 rounds, u8 mac[], + * u8 ctr[]); + * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, + * u8 const rk[], u32 rounds, u8 mac[], + * u8 ctr[]); + */ +ENTRY(ce_aes_ccm_encrypt) + aes_ccm_do_crypt 1 +ENDPROC(ce_aes_ccm_encrypt) + +ENTRY(ce_aes_ccm_decrypt) + aes_ccm_do_crypt 0 +ENDPROC(ce_aes_ccm_decrypt) diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c new file mode 100644 index 000000000000..9e6cdde9b43d --- /dev/null +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -0,0 +1,297 @@ +/* + * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions + * + * Copyright (C) 2013 - 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +static int num_rounds(struct crypto_aes_ctx *ctx) +{ + /* + * # of rounds specified by AES: + * 128 bit key 10 rounds + * 192 bit key 12 rounds + * 256 bit key 14 rounds + * => n byte key => 6 + (n/4) rounds + */ + return 6 + ctx->key_length / 4; +} + +asmlinkage void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, + u32 *macp, u32 const rk[], u32 rounds); + +asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, + u32 const rk[], u32 rounds, u8 mac[], + u8 ctr[]); + +asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, + u32 const rk[], u32 rounds, u8 mac[], + u8 ctr[]); + +asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[], + u32 rounds); + +static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm); + int ret; + + ret = crypto_aes_expand_key(ctx, in_key, key_len); + if (!ret) + return 0; + + tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; +} + +static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + if ((authsize & 1) || authsize < 4) + return -EINVAL; + return 0; +} + +static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + __be32 *n = (__be32 *)&maciv[AES_BLOCK_SIZE - 8]; + u32 l = req->iv[0] + 1; + + /* verify that CCM dimension 'L' is set correctly in the IV */ + if (l < 2 || l > 8) + return -EINVAL; + + /* verify that msglen can in fact be represented in L bytes */ + if (l < 4 && msglen >> (8 * l)) + return -EOVERFLOW; + + /* + * Even if the CCM spec allows L values of up to 8, the Linux cryptoapi + * uses a u32 type to represent msglen so the top 4 bytes are always 0. + */ + n[0] = 0; + n[1] = cpu_to_be32(msglen); + + memcpy(maciv, req->iv, AES_BLOCK_SIZE - l); + + /* + * Meaning of byte 0 according to CCM spec (RFC 3610/NIST 800-38C) + * - bits 0..2 : max # of bytes required to represent msglen, minus 1 + * (already set by caller) + * - bits 3..5 : size of auth tag (1 => 4 bytes, 2 => 6 bytes, etc) + * - bit 6 : indicates presence of authenticate-only data + */ + maciv[0] |= (crypto_aead_authsize(aead) - 2) << 2; + if (req->assoclen) + maciv[0] |= 0x40; + + memset(&req->iv[AES_BLOCK_SIZE - l], 0, l); + return 0; +} + +static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[]) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead); + struct __packed { __be16 l; __be32 h; u16 len; } ltag; + struct scatter_walk walk; + u32 len = req->assoclen; + u32 macp = 0; + + /* prepend the AAD with a length tag */ + if (len < 0xff00) { + ltag.l = cpu_to_be16(len); + ltag.len = 2; + } else { + ltag.l = cpu_to_be16(0xfffe); + put_unaligned_be32(len, <ag.h); + ltag.len = 6; + } + + ce_aes_ccm_auth_data(mac, (u8 *)<ag, ltag.len, &macp, ctx->key_enc, + num_rounds(ctx)); + scatterwalk_start(&walk, req->assoc); + + do { + u32 n = scatterwalk_clamp(&walk, len); + u8 *p; + + if (!n) { + scatterwalk_start(&walk, sg_next(walk.sg)); + n = scatterwalk_clamp(&walk, len); + } + p = scatterwalk_map(&walk); + ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc, + num_rounds(ctx)); + len -= n; + + scatterwalk_unmap(p); + scatterwalk_advance(&walk, n); + scatterwalk_done(&walk, 0, len); + } while (len); +} + +static int ccm_encrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead); + struct blkcipher_desc desc = { .info = req->iv }; + struct blkcipher_walk walk; + u8 __aligned(8) mac[AES_BLOCK_SIZE]; + u8 buf[AES_BLOCK_SIZE]; + u32 len = req->cryptlen; + int err; + + err = ccm_init_mac(req, mac, len); + if (err) + return err; + + kernel_neon_begin_partial(6); + + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); + + /* preserve the original iv for the final round */ + memcpy(buf, req->iv, AES_BLOCK_SIZE); + + blkcipher_walk_init(&walk, req->dst, req->src, len); + err = blkcipher_aead_walk_virt_block(&desc, &walk, aead, + AES_BLOCK_SIZE); + + while (walk.nbytes) { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; + + if (walk.nbytes == len) + tail = 0; + + ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes - tail, ctx->key_enc, + num_rounds(ctx), mac, walk.iv); + + len -= walk.nbytes - tail; + err = blkcipher_walk_done(&desc, &walk, tail); + } + if (!err) + ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); + + kernel_neon_end(); + + if (err) + return err; + + /* copy authtag to end of dst */ + scatterwalk_map_and_copy(mac, req->dst, req->cryptlen, + crypto_aead_authsize(aead), 1); + + return 0; +} + +static int ccm_decrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead); + unsigned int authsize = crypto_aead_authsize(aead); + struct blkcipher_desc desc = { .info = req->iv }; + struct blkcipher_walk walk; + u8 __aligned(8) mac[AES_BLOCK_SIZE]; + u8 buf[AES_BLOCK_SIZE]; + u32 len = req->cryptlen - authsize; + int err; + + err = ccm_init_mac(req, mac, len); + if (err) + return err; + + kernel_neon_begin_partial(6); + + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); + + /* preserve the original iv for the final round */ + memcpy(buf, req->iv, AES_BLOCK_SIZE); + + blkcipher_walk_init(&walk, req->dst, req->src, len); + err = blkcipher_aead_walk_virt_block(&desc, &walk, aead, + AES_BLOCK_SIZE); + + while (walk.nbytes) { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; + + if (walk.nbytes == len) + tail = 0; + + ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes - tail, ctx->key_enc, + num_rounds(ctx), mac, walk.iv); + + len -= walk.nbytes - tail; + err = blkcipher_walk_done(&desc, &walk, tail); + } + if (!err) + ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); + + kernel_neon_end(); + + if (err) + return err; + + /* compare calculated auth tag with the stored one */ + scatterwalk_map_and_copy(buf, req->src, req->cryptlen - authsize, + authsize, 0); + + if (memcmp(mac, buf, authsize)) + return -EBADMSG; + return 0; +} + +static struct crypto_alg ccm_aes_alg = { + .cra_name = "ccm(aes)", + .cra_driver_name = "ccm-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AEAD, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_aead_type, + .cra_module = THIS_MODULE, + .cra_aead = { + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = AES_BLOCK_SIZE, + .setkey = ccm_setkey, + .setauthsize = ccm_setauthsize, + .encrypt = ccm_encrypt, + .decrypt = ccm_decrypt, + } +}; + +static int __init aes_mod_init(void) +{ + if (!(elf_hwcap & HWCAP_AES)) + return -ENODEV; + return crypto_register_alg(&ccm_aes_alg); +} + +static void __exit aes_mod_exit(void) +{ + crypto_unregister_alg(&ccm_aes_alg); +} + +module_init(aes_mod_init); +module_exit(aes_mod_exit); + +MODULE_DESCRIPTION("Synchronous AES in CCM mode using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("ccm(aes)"); diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c new file mode 100644 index 000000000000..2075e1acae6b --- /dev/null +++ b/arch/arm64/crypto/aes-ce-cipher.c @@ -0,0 +1,155 @@ +/* + * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions + * + * Copyright (C) 2013 - 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); + +struct aes_block { + u8 b[AES_BLOCK_SIZE]; +}; + +static int num_rounds(struct crypto_aes_ctx *ctx) +{ + /* + * # of rounds specified by AES: + * 128 bit key 10 rounds + * 192 bit key 12 rounds + * 256 bit key 14 rounds + * => n byte key => 6 + (n/4) rounds + */ + return 6 + ctx->key_length / 4; +} + +static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + struct aes_block *out = (struct aes_block *)dst; + struct aes_block const *in = (struct aes_block *)src; + void *dummy0; + int dummy1; + + kernel_neon_begin_partial(4); + + __asm__(" ld1 {v0.16b}, %[in] ;" + " ld1 {v1.2d}, [%[key]], #16 ;" + " cmp %w[rounds], #10 ;" + " bmi 0f ;" + " bne 3f ;" + " mov v3.16b, v1.16b ;" + " b 2f ;" + "0: mov v2.16b, v1.16b ;" + " ld1 {v3.2d}, [%[key]], #16 ;" + "1: aese v0.16b, v2.16b ;" + " aesmc v0.16b, v0.16b ;" + "2: ld1 {v1.2d}, [%[key]], #16 ;" + " aese v0.16b, v3.16b ;" + " aesmc v0.16b, v0.16b ;" + "3: ld1 {v2.2d}, [%[key]], #16 ;" + " subs %w[rounds], %w[rounds], #3 ;" + " aese v0.16b, v1.16b ;" + " aesmc v0.16b, v0.16b ;" + " ld1 {v3.2d}, [%[key]], #16 ;" + " bpl 1b ;" + " aese v0.16b, v2.16b ;" + " eor v0.16b, v0.16b, v3.16b ;" + " st1 {v0.16b}, %[out] ;" + + : [out] "=Q"(*out), + [key] "=r"(dummy0), + [rounds] "=r"(dummy1) + : [in] "Q"(*in), + "1"(ctx->key_enc), + "2"(num_rounds(ctx) - 2) + : "cc"); + + kernel_neon_end(); +} + +static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + struct aes_block *out = (struct aes_block *)dst; + struct aes_block const *in = (struct aes_block *)src; + void *dummy0; + int dummy1; + + kernel_neon_begin_partial(4); + + __asm__(" ld1 {v0.16b}, %[in] ;" + " ld1 {v1.2d}, [%[key]], #16 ;" + " cmp %w[rounds], #10 ;" + " bmi 0f ;" + " bne 3f ;" + " mov v3.16b, v1.16b ;" + " b 2f ;" + "0: mov v2.16b, v1.16b ;" + " ld1 {v3.2d}, [%[key]], #16 ;" + "1: aesd v0.16b, v2.16b ;" + " aesimc v0.16b, v0.16b ;" + "2: ld1 {v1.2d}, [%[key]], #16 ;" + " aesd v0.16b, v3.16b ;" + " aesimc v0.16b, v0.16b ;" + "3: ld1 {v2.2d}, [%[key]], #16 ;" + " subs %w[rounds], %w[rounds], #3 ;" + " aesd v0.16b, v1.16b ;" + " aesimc v0.16b, v0.16b ;" + " ld1 {v3.2d}, [%[key]], #16 ;" + " bpl 1b ;" + " aesd v0.16b, v2.16b ;" + " eor v0.16b, v0.16b, v3.16b ;" + " st1 {v0.16b}, %[out] ;" + + : [out] "=Q"(*out), + [key] "=r"(dummy0), + [rounds] "=r"(dummy1) + : [in] "Q"(*in), + "1"(ctx->key_dec), + "2"(num_rounds(ctx) - 2) + : "cc"); + + kernel_neon_end(); +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_module = THIS_MODULE, + .cra_cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = crypto_aes_set_key, + .cia_encrypt = aes_cipher_encrypt, + .cia_decrypt = aes_cipher_decrypt + } +}; + +static int __init aes_mod_init(void) +{ + return crypto_register_alg(&aes_alg); +} + +static void __exit aes_mod_exit(void) +{ + crypto_unregister_alg(&aes_alg); +} + +module_cpu_feature_match(AES, aes_mod_init); +module_exit(aes_mod_exit); diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S new file mode 100644 index 000000000000..685a18f731eb --- /dev/null +++ b/arch/arm64/crypto/aes-ce.S @@ -0,0 +1,133 @@ +/* + * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with + * Crypto Extensions + * + * Copyright (C) 2013 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +#define AES_ENTRY(func) ENTRY(ce_ ## func) +#define AES_ENDPROC(func) ENDPROC(ce_ ## func) + + .arch armv8-a+crypto + + /* preload all round keys */ + .macro load_round_keys, rounds, rk + cmp \rounds, #12 + blo 2222f /* 128 bits */ + beq 1111f /* 192 bits */ + ld1 {v17.16b-v18.16b}, [\rk], #32 +1111: ld1 {v19.16b-v20.16b}, [\rk], #32 +2222: ld1 {v21.16b-v24.16b}, [\rk], #64 + ld1 {v25.16b-v28.16b}, [\rk], #64 + ld1 {v29.16b-v31.16b}, [\rk] + .endm + + /* prepare for encryption with key in rk[] */ + .macro enc_prepare, rounds, rk, ignore + load_round_keys \rounds, \rk + .endm + + /* prepare for encryption (again) but with new key in rk[] */ + .macro enc_switch_key, rounds, rk, ignore + load_round_keys \rounds, \rk + .endm + + /* prepare for decryption with key in rk[] */ + .macro dec_prepare, rounds, rk, ignore + load_round_keys \rounds, \rk + .endm + + .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3 + aes\de \i0\().16b, \k\().16b + .ifnb \i1 + aes\de \i1\().16b, \k\().16b + .ifnb \i3 + aes\de \i2\().16b, \k\().16b + aes\de \i3\().16b, \k\().16b + .endif + .endif + aes\mc \i0\().16b, \i0\().16b + .ifnb \i1 + aes\mc \i1\().16b, \i1\().16b + .ifnb \i3 + aes\mc \i2\().16b, \i2\().16b + aes\mc \i3\().16b, \i3\().16b + .endif + .endif + .endm + + /* up to 4 interleaved encryption rounds with the same round key */ + .macro round_Nx, enc, k, i0, i1, i2, i3 + .ifc \enc, e + do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3 + .else + do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3 + .endif + .endm + + /* up to 4 interleaved final rounds */ + .macro fin_round_Nx, de, k, k2, i0, i1, i2, i3 + aes\de \i0\().16b, \k\().16b + .ifnb \i1 + aes\de \i1\().16b, \k\().16b + .ifnb \i3 + aes\de \i2\().16b, \k\().16b + aes\de \i3\().16b, \k\().16b + .endif + .endif + eor \i0\().16b, \i0\().16b, \k2\().16b + .ifnb \i1 + eor \i1\().16b, \i1\().16b, \k2\().16b + .ifnb \i3 + eor \i2\().16b, \i2\().16b, \k2\().16b + eor \i3\().16b, \i3\().16b, \k2\().16b + .endif + .endif + .endm + + /* up to 4 interleaved blocks */ + .macro do_block_Nx, enc, rounds, i0, i1, i2, i3 + cmp \rounds, #12 + blo 2222f /* 128 bits */ + beq 1111f /* 192 bits */ + round_Nx \enc, v17, \i0, \i1, \i2, \i3 + round_Nx \enc, v18, \i0, \i1, \i2, \i3 +1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3 + round_Nx \enc, v20, \i0, \i1, \i2, \i3 +2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 + round_Nx \enc, \key, \i0, \i1, \i2, \i3 + .endr + fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3 + .endm + + .macro encrypt_block, in, rounds, t0, t1, t2 + do_block_Nx e, \rounds, \in + .endm + + .macro encrypt_block2x, i0, i1, rounds, t0, t1, t2 + do_block_Nx e, \rounds, \i0, \i1 + .endm + + .macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2 + do_block_Nx e, \rounds, \i0, \i1, \i2, \i3 + .endm + + .macro decrypt_block, in, rounds, t0, t1, t2 + do_block_Nx d, \rounds, \in + .endm + + .macro decrypt_block2x, i0, i1, rounds, t0, t1, t2 + do_block_Nx d, \rounds, \i0, \i1 + .endm + + .macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2 + do_block_Nx d, \rounds, \i0, \i1, \i2, \i3 + .endm + +#include "aes-modes.S" diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c new file mode 100644 index 000000000000..60f2f4c12256 --- /dev/null +++ b/arch/arm64/crypto/aes-glue.c @@ -0,0 +1,446 @@ +/* + * linux/arch/arm64/crypto/aes-glue.c - wrapper code for ARMv8 AES + * + * Copyright (C) 2013 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef USE_V8_CRYPTO_EXTENSIONS +#define MODE "ce" +#define PRIO 300 +#define aes_ecb_encrypt ce_aes_ecb_encrypt +#define aes_ecb_decrypt ce_aes_ecb_decrypt +#define aes_cbc_encrypt ce_aes_cbc_encrypt +#define aes_cbc_decrypt ce_aes_cbc_decrypt +#define aes_ctr_encrypt ce_aes_ctr_encrypt +#define aes_xts_encrypt ce_aes_xts_encrypt +#define aes_xts_decrypt ce_aes_xts_decrypt +MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); +#else +#define MODE "neon" +#define PRIO 200 +#define aes_ecb_encrypt neon_aes_ecb_encrypt +#define aes_ecb_decrypt neon_aes_ecb_decrypt +#define aes_cbc_encrypt neon_aes_cbc_encrypt +#define aes_cbc_decrypt neon_aes_cbc_decrypt +#define aes_ctr_encrypt neon_aes_ctr_encrypt +#define aes_xts_encrypt neon_aes_xts_encrypt +#define aes_xts_decrypt neon_aes_xts_decrypt +MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON"); +MODULE_ALIAS("ecb(aes)"); +MODULE_ALIAS("cbc(aes)"); +MODULE_ALIAS("ctr(aes)"); +MODULE_ALIAS("xts(aes)"); +#endif + +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); + +/* defined in aes-modes.S */ +asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, int first); +asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, int first); + +asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[], int first); +asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[], int first); + +asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 ctr[], int first); + +asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], + int rounds, int blocks, u8 const rk2[], u8 iv[], + int first); +asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], + int rounds, int blocks, u8 const rk2[], u8 iv[], + int first); + +struct crypto_aes_xts_ctx { + struct crypto_aes_ctx key1; + struct crypto_aes_ctx __aligned(8) key2; +}; + +static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + int ret; + + ret = crypto_aes_expand_key(&ctx->key1, in_key, key_len / 2); + if (!ret) + ret = crypto_aes_expand_key(&ctx->key2, &in_key[key_len / 2], + key_len / 2); + if (!ret) + return 0; + + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, rounds, blocks, first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + return err; +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_dec, rounds, blocks, first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + return err; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, rounds, blocks, walk.iv, + first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + return err; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_dec, rounds, blocks, walk.iv, + first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + return err; +} + +static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); + + first = 1; + kernel_neon_begin(); + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, rounds, blocks, walk.iv, + first); + first = 0; + nbytes -= blocks * AES_BLOCK_SIZE; + if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE) + break; + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + if (nbytes) { + u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; + u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + u8 __aligned(8) tail[AES_BLOCK_SIZE]; + + /* + * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need + * to tell aes_ctr_encrypt() to only read half a block. + */ + blocks = (nbytes <= 8) ? -1 : 1; + + aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds, + blocks, walk.iv, first); + memcpy(tdst, tail, nbytes); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + + return err; +} + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key1.key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key1.key_enc, rounds, blocks, + (u8 *)ctx->key2.key_enc, walk.iv, first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + + return err; +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key1.key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key1.key_dec, rounds, blocks, + (u8 *)ctx->key2.key_enc, walk.iv, first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + + return err; +} + +static struct crypto_alg aes_algs[] = { { + .cra_name = "__ecb-aes-" MODE, + .cra_driver_name = "__driver-ecb-aes-" MODE, + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = crypto_aes_set_key, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, +}, { + .cra_name = "__cbc-aes-" MODE, + .cra_driver_name = "__driver-cbc-aes-" MODE, + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = crypto_aes_set_key, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, +}, { + .cra_name = "__ctr-aes-" MODE, + .cra_driver_name = "__driver-ctr-aes-" MODE, + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = crypto_aes_set_key, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, + }, +}, { + .cra_name = "__xts-aes-" MODE, + .cra_driver_name = "__driver-xts-aes-" MODE, + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_set_key, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, +}, { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +} }; + +static int __init aes_init(void) +{ + return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs)); +} + +static void __exit aes_exit(void) +{ + crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs)); +} + +#ifdef USE_V8_CRYPTO_EXTENSIONS +module_cpu_feature_match(AES, aes_init); +#else +module_init(aes_init); +#endif +module_exit(aes_exit); diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S new file mode 100644 index 000000000000..f6e372c528eb --- /dev/null +++ b/arch/arm64/crypto/aes-modes.S @@ -0,0 +1,532 @@ +/* + * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES + * + * Copyright (C) 2013 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* included by aes-ce.S and aes-neon.S */ + + .text + .align 4 + +/* + * There are several ways to instantiate this code: + * - no interleave, all inline + * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2) + * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE) + * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4) + * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE) + * + * Macros imported by this code: + * - enc_prepare - setup NEON registers for encryption + * - dec_prepare - setup NEON registers for decryption + * - enc_switch_key - change to new key after having prepared for encryption + * - encrypt_block - encrypt a single block + * - decrypt block - decrypt a single block + * - encrypt_block2x - encrypt 2 blocks in parallel (if INTERLEAVE == 2) + * - decrypt_block2x - decrypt 2 blocks in parallel (if INTERLEAVE == 2) + * - encrypt_block4x - encrypt 4 blocks in parallel (if INTERLEAVE == 4) + * - decrypt_block4x - decrypt 4 blocks in parallel (if INTERLEAVE == 4) + */ + +#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE) +#define FRAME_PUSH stp x29, x30, [sp,#-16]! ; mov x29, sp +#define FRAME_POP ldp x29, x30, [sp],#16 + +#if INTERLEAVE == 2 + +aes_encrypt_block2x: + encrypt_block2x v0, v1, w3, x2, x6, w7 + ret +ENDPROC(aes_encrypt_block2x) + +aes_decrypt_block2x: + decrypt_block2x v0, v1, w3, x2, x6, w7 + ret +ENDPROC(aes_decrypt_block2x) + +#elif INTERLEAVE == 4 + +aes_encrypt_block4x: + encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + ret +ENDPROC(aes_encrypt_block4x) + +aes_decrypt_block4x: + decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + ret +ENDPROC(aes_decrypt_block4x) + +#else +#error INTERLEAVE should equal 2 or 4 +#endif + + .macro do_encrypt_block2x + bl aes_encrypt_block2x + .endm + + .macro do_decrypt_block2x + bl aes_decrypt_block2x + .endm + + .macro do_encrypt_block4x + bl aes_encrypt_block4x + .endm + + .macro do_decrypt_block4x + bl aes_decrypt_block4x + .endm + +#else +#define FRAME_PUSH +#define FRAME_POP + + .macro do_encrypt_block2x + encrypt_block2x v0, v1, w3, x2, x6, w7 + .endm + + .macro do_decrypt_block2x + decrypt_block2x v0, v1, w3, x2, x6, w7 + .endm + + .macro do_encrypt_block4x + encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + .endm + + .macro do_decrypt_block4x + decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + .endm + +#endif + + /* + * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, int first) + * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, int first) + */ + +AES_ENTRY(aes_ecb_encrypt) + FRAME_PUSH + cbz w5, .LecbencloopNx + + enc_prepare w3, x2, x5 + +.LecbencloopNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lecbenc1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */ + do_encrypt_block2x + st1 {v0.16b-v1.16b}, [x0], #32 +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ + do_encrypt_block4x + st1 {v0.16b-v3.16b}, [x0], #64 +#endif + b .LecbencloopNx +.Lecbenc1x: + adds w4, w4, #INTERLEAVE + beq .Lecbencout +#endif +.Lecbencloop: + ld1 {v0.16b}, [x1], #16 /* get next pt block */ + encrypt_block v0, w3, x2, x5, w6 + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + bne .Lecbencloop +.Lecbencout: + FRAME_POP + ret +AES_ENDPROC(aes_ecb_encrypt) + + +AES_ENTRY(aes_ecb_decrypt) + FRAME_PUSH + cbz w5, .LecbdecloopNx + + dec_prepare w3, x2, x5 + +.LecbdecloopNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lecbdec1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */ + do_decrypt_block2x + st1 {v0.16b-v1.16b}, [x0], #32 +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ + do_decrypt_block4x + st1 {v0.16b-v3.16b}, [x0], #64 +#endif + b .LecbdecloopNx +.Lecbdec1x: + adds w4, w4, #INTERLEAVE + beq .Lecbdecout +#endif +.Lecbdecloop: + ld1 {v0.16b}, [x1], #16 /* get next ct block */ + decrypt_block v0, w3, x2, x5, w6 + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + bne .Lecbdecloop +.Lecbdecout: + FRAME_POP + ret +AES_ENDPROC(aes_ecb_decrypt) + + + /* + * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[], int first) + * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[], int first) + */ + +AES_ENTRY(aes_cbc_encrypt) + cbz w6, .Lcbcencloop + + ld1 {v0.16b}, [x5] /* get iv */ + enc_prepare w3, x2, x5 + +.Lcbcencloop: + ld1 {v1.16b}, [x1], #16 /* get next pt block */ + eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */ + encrypt_block v0, w3, x2, x5, w6 + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + bne .Lcbcencloop + ret +AES_ENDPROC(aes_cbc_encrypt) + + +AES_ENTRY(aes_cbc_decrypt) + FRAME_PUSH + cbz w6, .LcbcdecloopNx + + ld1 {v7.16b}, [x5] /* get iv */ + dec_prepare w3, x2, x5 + +.LcbcdecloopNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lcbcdec1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */ + mov v2.16b, v0.16b + mov v3.16b, v1.16b + do_decrypt_block2x + eor v0.16b, v0.16b, v7.16b + eor v1.16b, v1.16b, v2.16b + mov v7.16b, v3.16b + st1 {v0.16b-v1.16b}, [x0], #32 +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ + mov v4.16b, v0.16b + mov v5.16b, v1.16b + mov v6.16b, v2.16b + do_decrypt_block4x + sub x1, x1, #16 + eor v0.16b, v0.16b, v7.16b + eor v1.16b, v1.16b, v4.16b + ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */ + eor v2.16b, v2.16b, v5.16b + eor v3.16b, v3.16b, v6.16b + st1 {v0.16b-v3.16b}, [x0], #64 +#endif + b .LcbcdecloopNx +.Lcbcdec1x: + adds w4, w4, #INTERLEAVE + beq .Lcbcdecout +#endif +.Lcbcdecloop: + ld1 {v1.16b}, [x1], #16 /* get next ct block */ + mov v0.16b, v1.16b /* ...and copy to v0 */ + decrypt_block v0, w3, x2, x5, w6 + eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */ + mov v7.16b, v1.16b /* ct is next iv */ + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + bne .Lcbcdecloop +.Lcbcdecout: + FRAME_POP + ret +AES_ENDPROC(aes_cbc_decrypt) + + + /* + * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 ctr[], int first) + */ + +AES_ENTRY(aes_ctr_encrypt) + FRAME_PUSH + cbnz w6, .Lctrfirst /* 1st time around? */ + umov x5, v4.d[1] /* keep swabbed ctr in reg */ + rev x5, x5 +#if INTERLEAVE >= 2 + cmn w5, w4 /* 32 bit overflow? */ + bcs .Lctrinc + add x5, x5, #1 /* increment BE ctr */ + b .LctrincNx +#else + b .Lctrinc +#endif +.Lctrfirst: + enc_prepare w3, x2, x6 + ld1 {v4.16b}, [x5] + umov x5, v4.d[1] /* keep swabbed ctr in reg */ + rev x5, x5 +#if INTERLEAVE >= 2 + cmn w5, w4 /* 32 bit overflow? */ + bcs .Lctrloop +.LctrloopNx: + subs w4, w4, #INTERLEAVE + bmi .Lctr1x +#if INTERLEAVE == 2 + mov v0.8b, v4.8b + mov v1.8b, v4.8b + rev x7, x5 + add x5, x5, #1 + ins v0.d[1], x7 + rev x7, x5 + add x5, x5, #1 + ins v1.d[1], x7 + ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */ + do_encrypt_block2x + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v3.16b + st1 {v0.16b-v1.16b}, [x0], #32 +#else + ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ + dup v7.4s, w5 + mov v0.16b, v4.16b + add v7.4s, v7.4s, v8.4s + mov v1.16b, v4.16b + rev32 v8.16b, v7.16b + mov v2.16b, v4.16b + mov v3.16b, v4.16b + mov v1.s[3], v8.s[0] + mov v2.s[3], v8.s[1] + mov v3.s[3], v8.s[2] + ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ + do_encrypt_block4x + eor v0.16b, v5.16b, v0.16b + ld1 {v5.16b}, [x1], #16 /* get 1 input block */ + eor v1.16b, v6.16b, v1.16b + eor v2.16b, v7.16b, v2.16b + eor v3.16b, v5.16b, v3.16b + st1 {v0.16b-v3.16b}, [x0], #64 + add x5, x5, #INTERLEAVE +#endif + cbz w4, .LctroutNx +.LctrincNx: + rev x7, x5 + ins v4.d[1], x7 + b .LctrloopNx +.LctroutNx: + sub x5, x5, #1 + rev x7, x5 + ins v4.d[1], x7 + b .Lctrout +.Lctr1x: + adds w4, w4, #INTERLEAVE + beq .Lctrout +#endif +.Lctrloop: + mov v0.16b, v4.16b + encrypt_block v0, w3, x2, x6, w7 + subs w4, w4, #1 + bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */ + ld1 {v3.16b}, [x1], #16 + eor v3.16b, v0.16b, v3.16b + st1 {v3.16b}, [x0], #16 + beq .Lctrout +.Lctrinc: + adds x5, x5, #1 /* increment BE ctr */ + rev x7, x5 + ins v4.d[1], x7 + bcc .Lctrloop /* no overflow? */ + umov x7, v4.d[0] /* load upper word of ctr */ + rev x7, x7 /* ... to handle the carry */ + add x7, x7, #1 + rev x7, x7 + ins v4.d[0], x7 + b .Lctrloop +.Lctrhalfblock: + ld1 {v3.8b}, [x1] + eor v3.8b, v0.8b, v3.8b + st1 {v3.8b}, [x0] +.Lctrout: + FRAME_POP + ret +AES_ENDPROC(aes_ctr_encrypt) + .ltorg + + + /* + * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, + * int blocks, u8 const rk2[], u8 iv[], int first) + * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, + * int blocks, u8 const rk2[], u8 iv[], int first) + */ + + .macro next_tweak, out, in, const, tmp + sshr \tmp\().2d, \in\().2d, #63 + and \tmp\().16b, \tmp\().16b, \const\().16b + add \out\().2d, \in\().2d, \in\().2d + ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 + eor \out\().16b, \out\().16b, \tmp\().16b + .endm + +.Lxts_mul_x: + .word 1, 0, 0x87, 0 + +AES_ENTRY(aes_xts_encrypt) + FRAME_PUSH + cbz w7, .LxtsencloopNx + + ld1 {v4.16b}, [x6] + enc_prepare w3, x5, x6 + encrypt_block v4, w3, x5, x6, w7 /* first tweak */ + enc_switch_key w3, x2, x6 + ldr q7, .Lxts_mul_x + b .LxtsencNx + +.LxtsencloopNx: + ldr q7, .Lxts_mul_x + next_tweak v4, v4, v7, v8 +.LxtsencNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lxtsenc1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */ + next_tweak v5, v4, v7, v8 + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + do_encrypt_block2x + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + st1 {v0.16b-v1.16b}, [x0], #32 + cbz w4, .LxtsencoutNx + next_tweak v4, v5, v7, v8 + b .LxtsencNx +.LxtsencoutNx: + mov v4.16b, v5.16b + b .Lxtsencout +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ + next_tweak v5, v4, v7, v8 + eor v0.16b, v0.16b, v4.16b + next_tweak v6, v5, v7, v8 + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + next_tweak v7, v6, v7, v8 + eor v3.16b, v3.16b, v7.16b + do_encrypt_block4x + eor v3.16b, v3.16b, v7.16b + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + st1 {v0.16b-v3.16b}, [x0], #64 + mov v4.16b, v7.16b + cbz w4, .Lxtsencout + b .LxtsencloopNx +#endif +.Lxtsenc1x: + adds w4, w4, #INTERLEAVE + beq .Lxtsencout +#endif +.Lxtsencloop: + ld1 {v1.16b}, [x1], #16 + eor v0.16b, v1.16b, v4.16b + encrypt_block v0, w3, x2, x6, w7 + eor v0.16b, v0.16b, v4.16b + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + beq .Lxtsencout + next_tweak v4, v4, v7, v8 + b .Lxtsencloop +.Lxtsencout: + FRAME_POP + ret +AES_ENDPROC(aes_xts_encrypt) + + +AES_ENTRY(aes_xts_decrypt) + FRAME_PUSH + cbz w7, .LxtsdecloopNx + + ld1 {v4.16b}, [x6] + enc_prepare w3, x5, x6 + encrypt_block v4, w3, x5, x6, w7 /* first tweak */ + dec_prepare w3, x2, x6 + ldr q7, .Lxts_mul_x + b .LxtsdecNx + +.LxtsdecloopNx: + ldr q7, .Lxts_mul_x + next_tweak v4, v4, v7, v8 +.LxtsdecNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lxtsdec1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */ + next_tweak v5, v4, v7, v8 + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + do_decrypt_block2x + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + st1 {v0.16b-v1.16b}, [x0], #32 + cbz w4, .LxtsdecoutNx + next_tweak v4, v5, v7, v8 + b .LxtsdecNx +.LxtsdecoutNx: + mov v4.16b, v5.16b + b .Lxtsdecout +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ + next_tweak v5, v4, v7, v8 + eor v0.16b, v0.16b, v4.16b + next_tweak v6, v5, v7, v8 + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + next_tweak v7, v6, v7, v8 + eor v3.16b, v3.16b, v7.16b + do_decrypt_block4x + eor v3.16b, v3.16b, v7.16b + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + st1 {v0.16b-v3.16b}, [x0], #64 + mov v4.16b, v7.16b + cbz w4, .Lxtsdecout + b .LxtsdecloopNx +#endif +.Lxtsdec1x: + adds w4, w4, #INTERLEAVE + beq .Lxtsdecout +#endif +.Lxtsdecloop: + ld1 {v1.16b}, [x1], #16 + eor v0.16b, v1.16b, v4.16b + decrypt_block v0, w3, x2, x6, w7 + eor v0.16b, v0.16b, v4.16b + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + beq .Lxtsdecout + next_tweak v4, v4, v7, v8 + b .Lxtsdecloop +.Lxtsdecout: + FRAME_POP + ret +AES_ENDPROC(aes_xts_decrypt) diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S new file mode 100644 index 000000000000..b93170e1cc93 --- /dev/null +++ b/arch/arm64/crypto/aes-neon.S @@ -0,0 +1,382 @@ +/* + * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON + * + * Copyright (C) 2013 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +#define AES_ENTRY(func) ENTRY(neon_ ## func) +#define AES_ENDPROC(func) ENDPROC(neon_ ## func) + + /* multiply by polynomial 'x' in GF(2^8) */ + .macro mul_by_x, out, in, temp, const + sshr \temp, \in, #7 + add \out, \in, \in + and \temp, \temp, \const + eor \out, \out, \temp + .endm + + /* preload the entire Sbox */ + .macro prepare, sbox, shiftrows, temp + adr \temp, \sbox + movi v12.16b, #0x40 + ldr q13, \shiftrows + movi v14.16b, #0x1b + ld1 {v16.16b-v19.16b}, [\temp], #64 + ld1 {v20.16b-v23.16b}, [\temp], #64 + ld1 {v24.16b-v27.16b}, [\temp], #64 + ld1 {v28.16b-v31.16b}, [\temp] + .endm + + /* do preload for encryption */ + .macro enc_prepare, ignore0, ignore1, temp + prepare .LForward_Sbox, .LForward_ShiftRows, \temp + .endm + + .macro enc_switch_key, ignore0, ignore1, temp + /* do nothing */ + .endm + + /* do preload for decryption */ + .macro dec_prepare, ignore0, ignore1, temp + prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp + .endm + + /* apply SubBytes transformation using the the preloaded Sbox */ + .macro sub_bytes, in + sub v9.16b, \in\().16b, v12.16b + tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b + sub v10.16b, v9.16b, v12.16b + tbx \in\().16b, {v20.16b-v23.16b}, v9.16b + sub v11.16b, v10.16b, v12.16b + tbx \in\().16b, {v24.16b-v27.16b}, v10.16b + tbx \in\().16b, {v28.16b-v31.16b}, v11.16b + .endm + + /* apply MixColumns transformation */ + .macro mix_columns, in + mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b + rev32 v8.8h, \in\().8h + eor \in\().16b, v10.16b, \in\().16b + shl v9.4s, v8.4s, #24 + shl v11.4s, \in\().4s, #24 + sri v9.4s, v8.4s, #8 + sri v11.4s, \in\().4s, #8 + eor v9.16b, v9.16b, v8.16b + eor v10.16b, v10.16b, v9.16b + eor \in\().16b, v10.16b, v11.16b + .endm + + /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ + .macro inv_mix_columns, in + mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b + mul_by_x v11.16b, v11.16b, v10.16b, v14.16b + eor \in\().16b, \in\().16b, v11.16b + rev32 v11.8h, v11.8h + eor \in\().16b, \in\().16b, v11.16b + mix_columns \in + .endm + + .macro do_block, enc, in, rounds, rk, rkp, i + ld1 {v15.16b}, [\rk] + add \rkp, \rk, #16 + mov \i, \rounds +1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ + tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ + sub_bytes \in + ld1 {v15.16b}, [\rkp], #16 + subs \i, \i, #1 + beq 2222f + .if \enc == 1 + mix_columns \in + .else + inv_mix_columns \in + .endif + b 1111b +2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ + .endm + + .macro encrypt_block, in, rounds, rk, rkp, i + do_block 1, \in, \rounds, \rk, \rkp, \i + .endm + + .macro decrypt_block, in, rounds, rk, rkp, i + do_block 0, \in, \rounds, \rk, \rkp, \i + .endm + + /* + * Interleaved versions: functionally equivalent to the + * ones above, but applied to 2 or 4 AES states in parallel. + */ + + .macro sub_bytes_2x, in0, in1 + sub v8.16b, \in0\().16b, v12.16b + sub v9.16b, \in1\().16b, v12.16b + tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b + tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b + sub v10.16b, v8.16b, v12.16b + sub v11.16b, v9.16b, v12.16b + tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b + tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b + sub v8.16b, v10.16b, v12.16b + sub v9.16b, v11.16b, v12.16b + tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b + tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b + tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b + tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b + .endm + + .macro sub_bytes_4x, in0, in1, in2, in3 + sub v8.16b, \in0\().16b, v12.16b + tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b + sub v9.16b, \in1\().16b, v12.16b + tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b + sub v10.16b, \in2\().16b, v12.16b + tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b + sub v11.16b, \in3\().16b, v12.16b + tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b + tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b + tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b + sub v8.16b, v8.16b, v12.16b + tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b + sub v9.16b, v9.16b, v12.16b + tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b + sub v10.16b, v10.16b, v12.16b + tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b + sub v11.16b, v11.16b, v12.16b + tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b + sub v8.16b, v8.16b, v12.16b + tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b + sub v9.16b, v9.16b, v12.16b + tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b + sub v10.16b, v10.16b, v12.16b + tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b + sub v11.16b, v11.16b, v12.16b + tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b + tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b + tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b + .endm + + .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const + sshr \tmp0\().16b, \in0\().16b, #7 + add \out0\().16b, \in0\().16b, \in0\().16b + sshr \tmp1\().16b, \in1\().16b, #7 + and \tmp0\().16b, \tmp0\().16b, \const\().16b + add \out1\().16b, \in1\().16b, \in1\().16b + and \tmp1\().16b, \tmp1\().16b, \const\().16b + eor \out0\().16b, \out0\().16b, \tmp0\().16b + eor \out1\().16b, \out1\().16b, \tmp1\().16b + .endm + + .macro mix_columns_2x, in0, in1 + mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 + rev32 v10.8h, \in0\().8h + rev32 v11.8h, \in1\().8h + eor \in0\().16b, v8.16b, \in0\().16b + eor \in1\().16b, v9.16b, \in1\().16b + shl v12.4s, v10.4s, #24 + shl v13.4s, v11.4s, #24 + eor v8.16b, v8.16b, v10.16b + sri v12.4s, v10.4s, #8 + shl v10.4s, \in0\().4s, #24 + eor v9.16b, v9.16b, v11.16b + sri v13.4s, v11.4s, #8 + shl v11.4s, \in1\().4s, #24 + sri v10.4s, \in0\().4s, #8 + eor \in0\().16b, v8.16b, v12.16b + sri v11.4s, \in1\().4s, #8 + eor \in1\().16b, v9.16b, v13.16b + eor \in0\().16b, v10.16b, \in0\().16b + eor \in1\().16b, v11.16b, \in1\().16b + .endm + + .macro inv_mix_cols_2x, in0, in1 + mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 + mul_by_x_2x v8, v9, v8, v9, v10, v11, v14 + eor \in0\().16b, \in0\().16b, v8.16b + eor \in1\().16b, \in1\().16b, v9.16b + rev32 v8.8h, v8.8h + rev32 v9.8h, v9.8h + eor \in0\().16b, \in0\().16b, v8.16b + eor \in1\().16b, \in1\().16b, v9.16b + mix_columns_2x \in0, \in1 + .endm + + .macro inv_mix_cols_4x, in0, in1, in2, in3 + mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 + mul_by_x_2x v10, v11, \in2, \in3, v12, v13, v14 + mul_by_x_2x v8, v9, v8, v9, v12, v13, v14 + mul_by_x_2x v10, v11, v10, v11, v12, v13, v14 + eor \in0\().16b, \in0\().16b, v8.16b + eor \in1\().16b, \in1\().16b, v9.16b + eor \in2\().16b, \in2\().16b, v10.16b + eor \in3\().16b, \in3\().16b, v11.16b + rev32 v8.8h, v8.8h + rev32 v9.8h, v9.8h + rev32 v10.8h, v10.8h + rev32 v11.8h, v11.8h + eor \in0\().16b, \in0\().16b, v8.16b + eor \in1\().16b, \in1\().16b, v9.16b + eor \in2\().16b, \in2\().16b, v10.16b + eor \in3\().16b, \in3\().16b, v11.16b + mix_columns_2x \in0, \in1 + mix_columns_2x \in2, \in3 + .endm + + .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i + ld1 {v15.16b}, [\rk] + add \rkp, \rk, #16 + mov \i, \rounds +1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ + eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ + sub_bytes_2x \in0, \in1 + tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ + tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ + ld1 {v15.16b}, [\rkp], #16 + subs \i, \i, #1 + beq 2222f + .if \enc == 1 + mix_columns_2x \in0, \in1 + ldr q13, .LForward_ShiftRows + .else + inv_mix_cols_2x \in0, \in1 + ldr q13, .LReverse_ShiftRows + .endif + movi v12.16b, #0x40 + b 1111b +2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ + eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ + .endm + + .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i + ld1 {v15.16b}, [\rk] + add \rkp, \rk, #16 + mov \i, \rounds +1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ + eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ + eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ + eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ + sub_bytes_4x \in0, \in1, \in2, \in3 + tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ + tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ + tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ + tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ + ld1 {v15.16b}, [\rkp], #16 + subs \i, \i, #1 + beq 2222f + .if \enc == 1 + mix_columns_2x \in0, \in1 + mix_columns_2x \in2, \in3 + ldr q13, .LForward_ShiftRows + .else + inv_mix_cols_4x \in0, \in1, \in2, \in3 + ldr q13, .LReverse_ShiftRows + .endif + movi v12.16b, #0x40 + b 1111b +2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ + eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ + eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ + eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ + .endm + + .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i + do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i + .endm + + .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i + do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i + .endm + + .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i + do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i + .endm + + .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i + do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i + .endm + +#include "aes-modes.S" + + .text + .align 4 +.LForward_ShiftRows: + .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 + .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb + +.LReverse_ShiftRows: + .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb + .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 + +.LForward_Sbox: + .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 + .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 + .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 + .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 + .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc + .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 + .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a + .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 + .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 + .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 + .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b + .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf + .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 + .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 + .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 + .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 + .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 + .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 + .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 + .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb + .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c + .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 + .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 + .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 + .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 + .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a + .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e + .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e + .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 + .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf + .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 + .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 + +.LReverse_Sbox: + .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 + .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb + .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 + .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb + .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d + .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e + .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 + .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 + .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 + .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 + .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda + .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 + .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a + .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 + .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 + .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b + .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea + .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 + .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 + .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e + .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 + .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b + .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 + .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 + .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 + .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f + .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d + .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef + .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 + .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 + .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 + .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S new file mode 100644 index 000000000000..b9e6eaf41c9b --- /dev/null +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -0,0 +1,95 @@ +/* + * Accelerated GHASH implementation with ARMv8 PMULL instructions. + * + * Copyright (C) 2014 Linaro Ltd. + * + * Based on arch/x86/crypto/ghash-pmullni-intel_asm.S + * + * Copyright (c) 2009 Intel Corp. + * Author: Huang Ying + * Vinodh Gopal + * Erdinc Ozturk + * Deniz Karakoyunlu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include + + DATA .req v0 + SHASH .req v1 + IN1 .req v2 + T1 .req v2 + T2 .req v3 + T3 .req v4 + VZR .req v5 + + .text + .arch armv8-a+crypto + + /* + * void pmull_ghash_update(int blocks, u64 dg[], const char *src, + * struct ghash_key const *k, const char *head) + */ +ENTRY(pmull_ghash_update) + ld1 {DATA.16b}, [x1] + ld1 {SHASH.16b}, [x3] + eor VZR.16b, VZR.16b, VZR.16b + + /* do the head block first, if supplied */ + cbz x4, 0f + ld1 {IN1.2d}, [x4] + b 1f + +0: ld1 {IN1.2d}, [x2], #16 + sub w0, w0, #1 +1: ext IN1.16b, IN1.16b, IN1.16b, #8 +CPU_LE( rev64 IN1.16b, IN1.16b ) + eor DATA.16b, DATA.16b, IN1.16b + + /* multiply DATA by SHASH in GF(2^128) */ + ext T2.16b, DATA.16b, DATA.16b, #8 + ext T3.16b, SHASH.16b, SHASH.16b, #8 + eor T2.16b, T2.16b, DATA.16b + eor T3.16b, T3.16b, SHASH.16b + + pmull2 T1.1q, SHASH.2d, DATA.2d // a1 * b1 + pmull DATA.1q, SHASH.1d, DATA.1d // a0 * b0 + pmull T2.1q, T2.1d, T3.1d // (a1 + a0)(b1 + b0) + eor T2.16b, T2.16b, T1.16b // (a0 * b1) + (a1 * b0) + eor T2.16b, T2.16b, DATA.16b + + ext T3.16b, VZR.16b, T2.16b, #8 + ext T2.16b, T2.16b, VZR.16b, #8 + eor DATA.16b, DATA.16b, T3.16b + eor T1.16b, T1.16b, T2.16b // is result of + // carry-less multiplication + + /* first phase of the reduction */ + shl T3.2d, DATA.2d, #1 + eor T3.16b, T3.16b, DATA.16b + shl T3.2d, T3.2d, #5 + eor T3.16b, T3.16b, DATA.16b + shl T3.2d, T3.2d, #57 + ext T2.16b, VZR.16b, T3.16b, #8 + ext T3.16b, T3.16b, VZR.16b, #8 + eor DATA.16b, DATA.16b, T2.16b + eor T1.16b, T1.16b, T3.16b + + /* second phase of the reduction */ + ushr T2.2d, DATA.2d, #5 + eor T2.16b, T2.16b, DATA.16b + ushr T2.2d, T2.2d, #1 + eor T2.16b, T2.16b, DATA.16b + ushr T2.2d, T2.2d, #1 + eor T1.16b, T1.16b, T2.16b + eor DATA.16b, DATA.16b, T1.16b + + cbnz w0, 0b + + st1 {DATA.16b}, [x1] + ret +ENDPROC(pmull_ghash_update) diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c new file mode 100644 index 000000000000..b92baf3f68c7 --- /dev/null +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -0,0 +1,155 @@ +/* + * Accelerated GHASH implementation with ARMv8 PMULL instructions. + * + * Copyright (C) 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); + +#define GHASH_BLOCK_SIZE 16 +#define GHASH_DIGEST_SIZE 16 + +struct ghash_key { + u64 a; + u64 b; +}; + +struct ghash_desc_ctx { + u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)]; + u8 buf[GHASH_BLOCK_SIZE]; + u32 count; +}; + +asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src, + struct ghash_key const *k, const char *head); + +static int ghash_init(struct shash_desc *desc) +{ + struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); + + *ctx = (struct ghash_desc_ctx){}; + return 0; +} + +static int ghash_update(struct shash_desc *desc, const u8 *src, + unsigned int len) +{ + struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); + unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; + + ctx->count += len; + + if ((partial + len) >= GHASH_BLOCK_SIZE) { + struct ghash_key *key = crypto_shash_ctx(desc->tfm); + int blocks; + + if (partial) { + int p = GHASH_BLOCK_SIZE - partial; + + memcpy(ctx->buf + partial, src, p); + src += p; + len -= p; + } + + blocks = len / GHASH_BLOCK_SIZE; + len %= GHASH_BLOCK_SIZE; + + kernel_neon_begin_partial(6); + pmull_ghash_update(blocks, ctx->digest, src, key, + partial ? ctx->buf : NULL); + kernel_neon_end(); + src += blocks * GHASH_BLOCK_SIZE; + } + if (len) + memcpy(ctx->buf + partial, src, len); + return 0; +} + +static int ghash_final(struct shash_desc *desc, u8 *dst) +{ + struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); + unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; + + if (partial) { + struct ghash_key *key = crypto_shash_ctx(desc->tfm); + + memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial); + + kernel_neon_begin_partial(6); + pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL); + kernel_neon_end(); + } + put_unaligned_be64(ctx->digest[1], dst); + put_unaligned_be64(ctx->digest[0], dst + 8); + + *ctx = (struct ghash_desc_ctx){}; + return 0; +} + +static int ghash_setkey(struct crypto_shash *tfm, + const u8 *inkey, unsigned int keylen) +{ + struct ghash_key *key = crypto_shash_ctx(tfm); + u64 a, b; + + if (keylen != GHASH_BLOCK_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + /* perform multiplication by 'x' in GF(2^128) */ + b = get_unaligned_be64(inkey); + a = get_unaligned_be64(inkey + 8); + + key->a = (a << 1) | (b >> 63); + key->b = (b << 1) | (a >> 63); + + if (b >> 63) + key->b ^= 0xc200000000000000UL; + + return 0; +} + +static struct shash_alg ghash_alg = { + .digestsize = GHASH_DIGEST_SIZE, + .init = ghash_init, + .update = ghash_update, + .final = ghash_final, + .setkey = ghash_setkey, + .descsize = sizeof(struct ghash_desc_ctx), + .base = { + .cra_name = "ghash", + .cra_driver_name = "ghash-ce", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ghash_key), + .cra_module = THIS_MODULE, + }, +}; + +static int __init ghash_ce_mod_init(void) +{ + return crypto_register_shash(&ghash_alg); +} + +static void __exit ghash_ce_mod_exit(void) +{ + crypto_unregister_shash(&ghash_alg); +} + +module_cpu_feature_match(PMULL, ghash_ce_mod_init); +module_exit(ghash_ce_mod_exit); diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S new file mode 100644 index 000000000000..09d57d98609c --- /dev/null +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -0,0 +1,153 @@ +/* + * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions + * + * Copyright (C) 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + + .text + .arch armv8-a+crypto + + k0 .req v0 + k1 .req v1 + k2 .req v2 + k3 .req v3 + + t0 .req v4 + t1 .req v5 + + dga .req q6 + dgav .req v6 + dgb .req s7 + dgbv .req v7 + + dg0q .req q12 + dg0s .req s12 + dg0v .req v12 + dg1s .req s13 + dg1v .req v13 + dg2s .req s14 + + .macro add_only, op, ev, rc, s0, dg1 + .ifc \ev, ev + add t1.4s, v\s0\().4s, \rc\().4s + sha1h dg2s, dg0s + .ifnb \dg1 + sha1\op dg0q, \dg1, t0.4s + .else + sha1\op dg0q, dg1s, t0.4s + .endif + .else + .ifnb \s0 + add t0.4s, v\s0\().4s, \rc\().4s + .endif + sha1h dg1s, dg0s + sha1\op dg0q, dg2s, t1.4s + .endif + .endm + + .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 + sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s + add_only \op, \ev, \rc, \s1, \dg1 + sha1su1 v\s0\().4s, v\s3\().4s + .endm + + /* + * The SHA1 round constants + */ + .align 4 +.Lsha1_rcon: + .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 + + /* + * void sha1_ce_transform(int blocks, u8 const *src, u32 *state, + * u8 *head, long bytes) + */ +ENTRY(sha1_ce_transform) + /* load round constants */ + adr x6, .Lsha1_rcon + ld1r {k0.4s}, [x6], #4 + ld1r {k1.4s}, [x6], #4 + ld1r {k2.4s}, [x6], #4 + ld1r {k3.4s}, [x6] + + /* load state */ + ldr dga, [x2] + ldr dgb, [x2, #16] + + /* load partial state (if supplied) */ + cbz x3, 0f + ld1 {v8.4s-v11.4s}, [x3] + b 1f + + /* load input */ +0: ld1 {v8.4s-v11.4s}, [x1], #64 + sub w0, w0, #1 + +1: +CPU_LE( rev32 v8.16b, v8.16b ) +CPU_LE( rev32 v9.16b, v9.16b ) +CPU_LE( rev32 v10.16b, v10.16b ) +CPU_LE( rev32 v11.16b, v11.16b ) + +2: add t0.4s, v8.4s, k0.4s + mov dg0v.16b, dgav.16b + + add_update c, ev, k0, 8, 9, 10, 11, dgb + add_update c, od, k0, 9, 10, 11, 8 + add_update c, ev, k0, 10, 11, 8, 9 + add_update c, od, k0, 11, 8, 9, 10 + add_update c, ev, k1, 8, 9, 10, 11 + + add_update p, od, k1, 9, 10, 11, 8 + add_update p, ev, k1, 10, 11, 8, 9 + add_update p, od, k1, 11, 8, 9, 10 + add_update p, ev, k1, 8, 9, 10, 11 + add_update p, od, k2, 9, 10, 11, 8 + + add_update m, ev, k2, 10, 11, 8, 9 + add_update m, od, k2, 11, 8, 9, 10 + add_update m, ev, k2, 8, 9, 10, 11 + add_update m, od, k2, 9, 10, 11, 8 + add_update m, ev, k3, 10, 11, 8, 9 + + add_update p, od, k3, 11, 8, 9, 10 + add_only p, ev, k3, 9 + add_only p, od, k3, 10 + add_only p, ev, k3, 11 + add_only p, od + + /* update state */ + add dgbv.2s, dgbv.2s, dg1v.2s + add dgav.4s, dgav.4s, dg0v.4s + + cbnz w0, 0b + + /* + * Final block: add padding and total bit count. + * Skip if we have no total byte count in x4. In that case, the input + * size was not a round multiple of the block size, and the padding is + * handled by the C code. + */ + cbz x4, 3f + movi v9.2d, #0 + mov x8, #0x80000000 + movi v10.2d, #0 + ror x7, x4, #29 // ror(lsl(x4, 3), 32) + fmov d8, x8 + mov x4, #0 + mov v11.d[0], xzr + mov v11.d[1], x7 + b 2b + + /* store new state */ +3: str dga, [x2] + str dgb, [x2, #16] + ret +ENDPROC(sha1_ce_transform) diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c new file mode 100644 index 000000000000..6fe83f37a750 --- /dev/null +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -0,0 +1,174 @@ +/* + * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions + * + * Copyright (C) 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); + +asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, + u8 *head, long bytes); + +static int sha1_init(struct shash_desc *desc) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha1_state){ + .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, + }; + return 0; +} + +static int sha1_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; + + sctx->count += len; + + if ((partial + len) >= SHA1_BLOCK_SIZE) { + int blocks; + + if (partial) { + int p = SHA1_BLOCK_SIZE - partial; + + memcpy(sctx->buffer + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA1_BLOCK_SIZE; + len %= SHA1_BLOCK_SIZE; + + kernel_neon_begin_partial(16); + sha1_ce_transform(blocks, data, sctx->state, + partial ? sctx->buffer : NULL, 0); + kernel_neon_end(); + + data += blocks * SHA1_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx->buffer + partial, data, len); + return 0; +} + +static int sha1_final(struct shash_desc *desc, u8 *out) +{ + static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; + + struct sha1_state *sctx = shash_desc_ctx(desc); + __be64 bits = cpu_to_be64(sctx->count << 3); + __be32 *dst = (__be32 *)out; + int i; + + u32 padlen = SHA1_BLOCK_SIZE + - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE); + + sha1_update(desc, padding, padlen); + sha1_update(desc, (const u8 *)&bits, sizeof(bits)); + + for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha1_state){}; + return 0; +} + +static int sha1_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int blocks; + int i; + + if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) { + sha1_update(desc, data, len); + return sha1_final(desc, out); + } + + /* + * Use a fast path if the input is a multiple of 64 bytes. In + * this case, there is no need to copy data around, and we can + * perform the entire digest calculation in a single invocation + * of sha1_ce_transform() + */ + blocks = len / SHA1_BLOCK_SIZE; + + kernel_neon_begin_partial(16); + sha1_ce_transform(blocks, data, sctx->state, NULL, len); + kernel_neon_end(); + + for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha1_state){}; + return 0; +} + +static int sha1_export(struct shash_desc *desc, void *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + struct sha1_state *dst = out; + + *dst = *sctx; + return 0; +} + +static int sha1_import(struct shash_desc *desc, const void *in) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + struct sha1_state const *src = in; + + *sctx = *src; + return 0; +} + +static struct shash_alg alg = { + .init = sha1_init, + .update = sha1_update, + .final = sha1_final, + .finup = sha1_finup, + .export = sha1_export, + .import = sha1_import, + .descsize = sizeof(struct sha1_state), + .digestsize = SHA1_DIGEST_SIZE, + .statesize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-ce", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init sha1_ce_mod_init(void) +{ + return crypto_register_shash(&alg); +} + +static void __exit sha1_ce_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_cpu_feature_match(SHA1, sha1_ce_mod_init); +module_exit(sha1_ce_mod_fini); diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S new file mode 100644 index 000000000000..7f29fc031ea8 --- /dev/null +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -0,0 +1,156 @@ +/* + * sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions + * + * Copyright (C) 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + + .text + .arch armv8-a+crypto + + dga .req q20 + dgav .req v20 + dgb .req q21 + dgbv .req v21 + + t0 .req v22 + t1 .req v23 + + dg0q .req q24 + dg0v .req v24 + dg1q .req q25 + dg1v .req v25 + dg2q .req q26 + dg2v .req v26 + + .macro add_only, ev, rc, s0 + mov dg2v.16b, dg0v.16b + .ifeq \ev + add t1.4s, v\s0\().4s, \rc\().4s + sha256h dg0q, dg1q, t0.4s + sha256h2 dg1q, dg2q, t0.4s + .else + .ifnb \s0 + add t0.4s, v\s0\().4s, \rc\().4s + .endif + sha256h dg0q, dg1q, t1.4s + sha256h2 dg1q, dg2q, t1.4s + .endif + .endm + + .macro add_update, ev, rc, s0, s1, s2, s3 + sha256su0 v\s0\().4s, v\s1\().4s + add_only \ev, \rc, \s1 + sha256su1 v\s0\().4s, v\s2\().4s, v\s3\().4s + .endm + + /* + * The SHA-256 round constants + */ + .align 4 +.Lsha2_rcon: + .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 + .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 + .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 + .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 + .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc + .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da + .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 + .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 + .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 + .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 + .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 + .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 + .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 + .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 + .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 + .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 + + /* + * void sha2_ce_transform(int blocks, u8 const *src, u32 *state, + * u8 *head, long bytes) + */ +ENTRY(sha2_ce_transform) + /* load round constants */ + adr x8, .Lsha2_rcon + ld1 { v0.4s- v3.4s}, [x8], #64 + ld1 { v4.4s- v7.4s}, [x8], #64 + ld1 { v8.4s-v11.4s}, [x8], #64 + ld1 {v12.4s-v15.4s}, [x8] + + /* load state */ + ldp dga, dgb, [x2] + + /* load partial input (if supplied) */ + cbz x3, 0f + ld1 {v16.4s-v19.4s}, [x3] + b 1f + + /* load input */ +0: ld1 {v16.4s-v19.4s}, [x1], #64 + sub w0, w0, #1 + +1: +CPU_LE( rev32 v16.16b, v16.16b ) +CPU_LE( rev32 v17.16b, v17.16b ) +CPU_LE( rev32 v18.16b, v18.16b ) +CPU_LE( rev32 v19.16b, v19.16b ) + +2: add t0.4s, v16.4s, v0.4s + mov dg0v.16b, dgav.16b + mov dg1v.16b, dgbv.16b + + add_update 0, v1, 16, 17, 18, 19 + add_update 1, v2, 17, 18, 19, 16 + add_update 0, v3, 18, 19, 16, 17 + add_update 1, v4, 19, 16, 17, 18 + + add_update 0, v5, 16, 17, 18, 19 + add_update 1, v6, 17, 18, 19, 16 + add_update 0, v7, 18, 19, 16, 17 + add_update 1, v8, 19, 16, 17, 18 + + add_update 0, v9, 16, 17, 18, 19 + add_update 1, v10, 17, 18, 19, 16 + add_update 0, v11, 18, 19, 16, 17 + add_update 1, v12, 19, 16, 17, 18 + + add_only 0, v13, 17 + add_only 1, v14, 18 + add_only 0, v15, 19 + add_only 1 + + /* update state */ + add dgav.4s, dgav.4s, dg0v.4s + add dgbv.4s, dgbv.4s, dg1v.4s + + /* handled all input blocks? */ + cbnz w0, 0b + + /* + * Final block: add padding and total bit count. + * Skip if we have no total byte count in x4. In that case, the input + * size was not a round multiple of the block size, and the padding is + * handled by the C code. + */ + cbz x4, 3f + movi v17.2d, #0 + mov x8, #0x80000000 + movi v18.2d, #0 + ror x7, x4, #29 // ror(lsl(x4, 3), 32) + fmov d16, x8 + mov x4, #0 + mov v19.d[0], xzr + mov v19.d[1], x7 + b 2b + + /* store new state */ +3: stp dga, dgb, [x2] + ret +ENDPROC(sha2_ce_transform) diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c new file mode 100644 index 000000000000..c294e67d3925 --- /dev/null +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -0,0 +1,255 @@ +/* + * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions + * + * Copyright (C) 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); + +asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state, + u8 *head, long bytes); + +static int sha224_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha256_state){ + .state = { + SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, + SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, + } + }; + return 0; +} + +static int sha256_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha256_state){ + .state = { + SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, + SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, + } + }; + return 0; +} + +static int sha2_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; + + sctx->count += len; + + if ((partial + len) >= SHA256_BLOCK_SIZE) { + int blocks; + + if (partial) { + int p = SHA256_BLOCK_SIZE - partial; + + memcpy(sctx->buf + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA256_BLOCK_SIZE; + len %= SHA256_BLOCK_SIZE; + + kernel_neon_begin_partial(28); + sha2_ce_transform(blocks, data, sctx->state, + partial ? sctx->buf : NULL, 0); + kernel_neon_end(); + + data += blocks * SHA256_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx->buf + partial, data, len); + return 0; +} + +static void sha2_final(struct shash_desc *desc) +{ + static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; + + struct sha256_state *sctx = shash_desc_ctx(desc); + __be64 bits = cpu_to_be64(sctx->count << 3); + u32 padlen = SHA256_BLOCK_SIZE + - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE); + + sha2_update(desc, padding, padlen); + sha2_update(desc, (const u8 *)&bits, sizeof(bits)); +} + +static int sha224_final(struct shash_desc *desc, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int i; + + sha2_final(desc); + + for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha256_state){}; + return 0; +} + +static int sha256_final(struct shash_desc *desc, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int i; + + sha2_final(desc); + + for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha256_state){}; + return 0; +} + +static void sha2_finup(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + int blocks; + + if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) { + sha2_update(desc, data, len); + sha2_final(desc); + return; + } + + /* + * Use a fast path if the input is a multiple of 64 bytes. In + * this case, there is no need to copy data around, and we can + * perform the entire digest calculation in a single invocation + * of sha2_ce_transform() + */ + blocks = len / SHA256_BLOCK_SIZE; + + kernel_neon_begin_partial(28); + sha2_ce_transform(blocks, data, sctx->state, NULL, len); + kernel_neon_end(); + data += blocks * SHA256_BLOCK_SIZE; +} + +static int sha224_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int i; + + sha2_finup(desc, data, len); + + for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha256_state){}; + return 0; +} + +static int sha256_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int i; + + sha2_finup(desc, data, len); + + for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha256_state){}; + return 0; +} + +static int sha2_export(struct shash_desc *desc, void *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state *dst = out; + + *dst = *sctx; + return 0; +} + +static int sha2_import(struct shash_desc *desc, const void *in) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state const *src = in; + + *sctx = *src; + return 0; +} + +static struct shash_alg algs[] = { { + .init = sha224_init, + .update = sha2_update, + .final = sha224_final, + .finup = sha224_finup, + .export = sha2_export, + .import = sha2_import, + .descsize = sizeof(struct sha256_state), + .digestsize = SHA224_DIGEST_SIZE, + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-ce", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .init = sha256_init, + .update = sha2_update, + .final = sha256_final, + .finup = sha256_finup, + .export = sha2_export, + .import = sha2_import, + .descsize = sizeof(struct sha256_state), + .digestsize = SHA256_DIGEST_SIZE, + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-ce", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static int __init sha2_ce_mod_init(void) +{ + return crypto_register_shashes(algs, ARRAY_SIZE(algs)); +} + +static void __exit sha2_ce_mod_fini(void) +{ + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); +} + +module_cpu_feature_match(SHA2, sha2_ce_mod_init); +module_exit(sha2_ce_mod_fini); diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index bc5da00f8d84..cfe9860b2076 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -37,6 +37,7 @@ generic-y += segment.h generic-y += sembuf.h generic-y += serial.h generic-y += shmbuf.h +generic-y += simd.h generic-y += sizes.h generic-y += socket.h generic-y += sockios.h diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index cb2be3b86c6b..be56d33c5dbf 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -26,7 +26,13 @@ #include -static inline void arch_timer_reg_write(int access, int reg, u32 val) +/* + * These register accessors are marked inline so the compiler can + * nicely work out which register we want, and chuck away the rest of + * the code. + */ +static __always_inline +void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val) { if (access == ARCH_TIMER_PHYS_ACCESS) { switch (reg) { @@ -36,8 +42,6 @@ static inline void arch_timer_reg_write(int access, int reg, u32 val) case ARCH_TIMER_REG_TVAL: asm volatile("msr cntp_tval_el0, %0" : : "r" (val)); break; - default: - BUILD_BUG(); } } else if (access == ARCH_TIMER_VIRT_ACCESS) { switch (reg) { @@ -47,17 +51,14 @@ static inline void arch_timer_reg_write(int access, int reg, u32 val) case ARCH_TIMER_REG_TVAL: asm volatile("msr cntv_tval_el0, %0" : : "r" (val)); break; - default: - BUILD_BUG(); } - } else { - BUILD_BUG(); } isb(); } -static inline u32 arch_timer_reg_read(int access, int reg) +static __always_inline +u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg) { u32 val; @@ -69,8 +70,6 @@ static inline u32 arch_timer_reg_read(int access, int reg) case ARCH_TIMER_REG_TVAL: asm volatile("mrs %0, cntp_tval_el0" : "=r" (val)); break; - default: - BUILD_BUG(); } } else if (access == ARCH_TIMER_VIRT_ACCESS) { switch (reg) { @@ -80,11 +79,7 @@ static inline u32 arch_timer_reg_read(int access, int reg) case ARCH_TIMER_REG_TVAL: asm volatile("mrs %0, cntv_tval_el0" : "=r" (val)); break; - default: - BUILD_BUG(); } - } else { - BUILD_BUG(); } return val; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h new file mode 100644 index 000000000000..cd4ac0516488 --- /dev/null +++ b/arch/arm64/include/asm/cpufeature.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_CPUFEATURE_H +#define __ASM_CPUFEATURE_H + +#include + +/* + * In the arm64 world (as in the ARM world), elf_hwcap is used both internally + * in the kernel and for user space to keep track of which optional features + * are supported by the current system. So let's map feature 'x' to HWCAP_x. + * Note that HWCAP_x constants are bit fields so we need to take the log. + */ + +#define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap)) +#define cpu_feature(x) ilog2(HWCAP_ ## x) + +static inline bool cpu_have_feature(unsigned int num) +{ + return elf_hwcap & (1UL << num); +} + +#endif diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 10cd80af2aec..4fe77b887457 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -187,6 +187,14 @@ config GENERIC_CPU_DEVICES bool default n +config HAVE_CPU_AUTOPROBE + def_bool ARCH_HAS_CPU_AUTOPROBE + +config GENERIC_CPU_AUTOPROBE + bool + depends on !ARCH_HAS_CPU_AUTOPROBE + select HAVE_CPU_AUTOPROBE + config SOC_BUS bool diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 3d48fc887ef4..607efe6b7dc8 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -13,6 +13,9 @@ #include #include #include +#include +#include +#include #include "base.h" @@ -260,6 +263,45 @@ static void cpu_device_release(struct device *dev) */ } +#ifdef CONFIG_HAVE_CPU_AUTOPROBE +#ifdef CONFIG_GENERIC_CPU_AUTOPROBE +static ssize_t print_cpu_modalias(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + ssize_t n; + u32 i; + + n = sprintf(buf, "cpu:type:" CPU_FEATURE_TYPEFMT ":feature:", + CPU_FEATURE_TYPEVAL); + + for (i = 0; i < MAX_CPU_FEATURES; i++) + if (cpu_have_feature(i)) { + if (PAGE_SIZE < n + sizeof(",XXXX\n")) { + WARN(1, "CPU features overflow page\n"); + break; + } + n += sprintf(&buf[n], ",%04X", i); + } + buf[n++] = '\n'; + return n; +} +#else +#define print_cpu_modalias arch_print_cpu_modalias +#endif + +static int cpu_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (buf) { + print_cpu_modalias(NULL, NULL, buf); + add_uevent_var(env, "MODALIAS=%s", buf); + kfree(buf); + } + return 0; +} +#endif + /* * register_cpu - Setup a sysfs device for a CPU. * @cpu - cpu->hotpluggable field set to 1 will generate a control file in @@ -277,8 +319,8 @@ int __cpuinit register_cpu(struct cpu *cpu, int num) cpu->dev.id = num; cpu->dev.bus = &cpu_subsys; cpu->dev.release = cpu_device_release; -#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE - cpu->dev.bus->uevent = arch_cpu_uevent; +#ifdef CONFIG_HAVE_CPU_AUTOPROBE + cpu->dev.bus->uevent = cpu_uevent; #endif error = device_register(&cpu->dev); if (!error && cpu->hotpluggable) @@ -307,8 +349,8 @@ struct device *get_cpu_device(unsigned cpu) } EXPORT_SYMBOL_GPL(get_cpu_device); -#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE -static DEVICE_ATTR(modalias, 0444, arch_print_cpu_modalias, NULL); +#ifdef CONFIG_HAVE_CPU_AUTOPROBE +static DEVICE_ATTR(modalias, 0444, print_cpu_modalias, NULL); #endif static struct attribute *cpu_root_attrs[] = { @@ -321,7 +363,7 @@ static struct attribute *cpu_root_attrs[] = { &cpu_attrs[2].attr.attr, &dev_attr_kernel_max.attr, &dev_attr_offline.attr, -#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE +#ifdef CONFIG_HAVE_CPU_AUTOPROBE &dev_attr_modalias.attr, #endif NULL diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 958591e94f2a..67bbbd8ae507 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -17,13 +17,39 @@ #include #include #include +#include #include +#include #include #include #include +#define CNTTIDR 0x08 +#define CNTTIDR_VIRT(n) (BIT(1) << ((n) * 4)) + +#define CNTVCT_LO 0x08 +#define CNTVCT_HI 0x0c +#define CNTFRQ 0x10 +#define CNTP_TVAL 0x28 +#define CNTP_CTL 0x2c +#define CNTV_TVAL 0x38 +#define CNTV_CTL 0x3c + +#define ARCH_CP15_TIMER BIT(0) +#define ARCH_MEM_TIMER BIT(1) +static unsigned arch_timers_present __initdata; + +static void __iomem *arch_counter_base; + +struct arch_timer { + void __iomem *base; + struct clock_event_device evt; +}; + +#define to_arch_timer(e) container_of(e, struct arch_timer, evt) + static u32 arch_timer_rate; enum ppi_nr { @@ -39,19 +65,82 @@ static int arch_timer_ppi[MAX_TIMER_PPI]; static struct clock_event_device __percpu *arch_timer_evt; static bool arch_timer_use_virtual = true; +static bool arch_timer_mem_use_virtual; /* * Architected system timer support. */ -static inline irqreturn_t timer_handler(const int access, +static __always_inline +void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val, + struct clock_event_device *clk) +{ + if (access == ARCH_TIMER_MEM_PHYS_ACCESS) { + struct arch_timer *timer = to_arch_timer(clk); + switch (reg) { + case ARCH_TIMER_REG_CTRL: + writel_relaxed(val, timer->base + CNTP_CTL); + break; + case ARCH_TIMER_REG_TVAL: + writel_relaxed(val, timer->base + CNTP_TVAL); + break; + } + } else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) { + struct arch_timer *timer = to_arch_timer(clk); + switch (reg) { + case ARCH_TIMER_REG_CTRL: + writel_relaxed(val, timer->base + CNTV_CTL); + break; + case ARCH_TIMER_REG_TVAL: + writel_relaxed(val, timer->base + CNTV_TVAL); + break; + } + } else { + arch_timer_reg_write_cp15(access, reg, val); + } +} + +static __always_inline +u32 arch_timer_reg_read(int access, enum arch_timer_reg reg, + struct clock_event_device *clk) +{ + u32 val; + + if (access == ARCH_TIMER_MEM_PHYS_ACCESS) { + struct arch_timer *timer = to_arch_timer(clk); + switch (reg) { + case ARCH_TIMER_REG_CTRL: + val = readl_relaxed(timer->base + CNTP_CTL); + break; + case ARCH_TIMER_REG_TVAL: + val = readl_relaxed(timer->base + CNTP_TVAL); + break; + } + } else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) { + struct arch_timer *timer = to_arch_timer(clk); + switch (reg) { + case ARCH_TIMER_REG_CTRL: + val = readl_relaxed(timer->base + CNTV_CTL); + break; + case ARCH_TIMER_REG_TVAL: + val = readl_relaxed(timer->base + CNTV_TVAL); + break; + } + } else { + val = arch_timer_reg_read_cp15(access, reg); + } + + return val; +} + +static __always_inline irqreturn_t timer_handler(const int access, struct clock_event_device *evt) { unsigned long ctrl; - ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL); + ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, evt); if (ctrl & ARCH_TIMER_CTRL_IT_STAT) { ctrl |= ARCH_TIMER_CTRL_IT_MASK; - arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl); + arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, evt); evt->event_handler(evt); return IRQ_HANDLED; } @@ -73,15 +162,30 @@ static irqreturn_t arch_timer_handler_phys(int irq, void *dev_id) return timer_handler(ARCH_TIMER_PHYS_ACCESS, evt); } -static inline void timer_set_mode(const int access, int mode) +static irqreturn_t arch_timer_handler_phys_mem(int irq, void *dev_id) +{ + struct clock_event_device *evt = dev_id; + + return timer_handler(ARCH_TIMER_MEM_PHYS_ACCESS, evt); +} + +static irqreturn_t arch_timer_handler_virt_mem(int irq, void *dev_id) +{ + struct clock_event_device *evt = dev_id; + + return timer_handler(ARCH_TIMER_MEM_VIRT_ACCESS, evt); +} + +static __always_inline void timer_set_mode(const int access, int mode, + struct clock_event_device *clk) { unsigned long ctrl; switch (mode) { case CLOCK_EVT_MODE_UNUSED: case CLOCK_EVT_MODE_SHUTDOWN: - ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL); + ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk); ctrl &= ~ARCH_TIMER_CTRL_ENABLE; - arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl); + arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); break; default: break; @@ -91,36 +195,49 @@ static inline void timer_set_mode(const int access, int mode) static void arch_timer_set_mode_virt(enum clock_event_mode mode, struct clock_event_device *clk) { - timer_set_mode(ARCH_TIMER_VIRT_ACCESS, mode); + timer_set_mode(ARCH_TIMER_VIRT_ACCESS, mode, clk); } static void arch_timer_set_mode_phys(enum clock_event_mode mode, struct clock_event_device *clk) { - timer_set_mode(ARCH_TIMER_PHYS_ACCESS, mode); + timer_set_mode(ARCH_TIMER_PHYS_ACCESS, mode, clk); } -static inline void set_next_event(const int access, unsigned long evt) +static void arch_timer_set_mode_virt_mem(enum clock_event_mode mode, + struct clock_event_device *clk) +{ + timer_set_mode(ARCH_TIMER_MEM_VIRT_ACCESS, mode, clk); +} + +static void arch_timer_set_mode_phys_mem(enum clock_event_mode mode, + struct clock_event_device *clk) +{ + timer_set_mode(ARCH_TIMER_MEM_PHYS_ACCESS, mode, clk); +} + +static __always_inline void set_next_event(const int access, unsigned long evt, + struct clock_event_device *clk) { unsigned long ctrl; - ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL); + ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk); ctrl |= ARCH_TIMER_CTRL_ENABLE; ctrl &= ~ARCH_TIMER_CTRL_IT_MASK; - arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt); - arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl); + arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt, clk); + arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); } static int arch_timer_set_next_event_virt(unsigned long evt, - struct clock_event_device *unused) + struct clock_event_device *clk) { - set_next_event(ARCH_TIMER_VIRT_ACCESS, evt); + set_next_event(ARCH_TIMER_VIRT_ACCESS, evt, clk); return 0; } static int arch_timer_set_next_event_phys(unsigned long evt, - struct clock_event_device *unused) + struct clock_event_device *clk) { - set_next_event(ARCH_TIMER_PHYS_ACCESS, evt); + set_next_event(ARCH_TIMER_PHYS_ACCESS, evt, clk); return 0; } @@ -137,27 +254,62 @@ static void arch_timer_configure_evtstream(void) arch_timer_evtstrm_enable(min(pos, 15)); } -static int __cpuinit arch_timer_setup(struct clock_event_device *clk) +static int arch_timer_set_next_event_virt_mem(unsigned long evt, + struct clock_event_device *clk) { - clk->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP; - clk->name = "arch_sys_timer"; - clk->rating = 450; - if (arch_timer_use_virtual) { - clk->irq = arch_timer_ppi[VIRT_PPI]; - clk->set_mode = arch_timer_set_mode_virt; - clk->set_next_event = arch_timer_set_next_event_virt; + set_next_event(ARCH_TIMER_MEM_VIRT_ACCESS, evt, clk); + return 0; +} + +static int arch_timer_set_next_event_phys_mem(unsigned long evt, + struct clock_event_device *clk) +{ + set_next_event(ARCH_TIMER_MEM_PHYS_ACCESS, evt, clk); + return 0; +} + +static void __cpuinit __arch_timer_setup(unsigned type, + struct clock_event_device *clk) +{ + clk->features = CLOCK_EVT_FEAT_ONESHOT; + + if (type == ARCH_CP15_TIMER) { + clk->features |= CLOCK_EVT_FEAT_C3STOP; + clk->name = "arch_sys_timer"; + clk->rating = 450; + clk->cpumask = cpumask_of(smp_processor_id()); + if (arch_timer_use_virtual) { + clk->irq = arch_timer_ppi[VIRT_PPI]; + clk->set_mode = arch_timer_set_mode_virt; + clk->set_next_event = arch_timer_set_next_event_virt; + } else { + clk->irq = arch_timer_ppi[PHYS_SECURE_PPI]; + clk->set_mode = arch_timer_set_mode_phys; + clk->set_next_event = arch_timer_set_next_event_phys; + } } else { - clk->irq = arch_timer_ppi[PHYS_SECURE_PPI]; - clk->set_mode = arch_timer_set_mode_phys; - clk->set_next_event = arch_timer_set_next_event_phys; + clk->name = "arch_mem_timer"; + clk->rating = 400; + clk->cpumask = cpu_all_mask; + if (arch_timer_mem_use_virtual) { + clk->set_mode = arch_timer_set_mode_virt_mem; + clk->set_next_event = + arch_timer_set_next_event_virt_mem; + } else { + clk->set_mode = arch_timer_set_mode_phys_mem; + clk->set_next_event = + arch_timer_set_next_event_phys_mem; + } } - clk->cpumask = cpumask_of(smp_processor_id()); + clk->set_mode(CLOCK_EVT_MODE_SHUTDOWN, clk); - clk->set_mode(CLOCK_EVT_MODE_SHUTDOWN, NULL); + clockevents_config_and_register(clk, arch_timer_rate, 0xf, 0x7fffffff); +} - clockevents_config_and_register(clk, arch_timer_rate, - 0xf, 0x7fffffff); +static int __cpuinit arch_timer_setup(struct clock_event_device *clk) +{ + __arch_timer_setup(ARCH_CP15_TIMER, clk); if (arch_timer_use_virtual) enable_percpu_irq(arch_timer_ppi[VIRT_PPI], 0); @@ -174,27 +326,41 @@ static int __cpuinit arch_timer_setup(struct clock_event_device *clk) return 0; } -static int arch_timer_available(void) +static void +arch_timer_detect_rate(void __iomem *cntbase, struct device_node *np) { - u32 freq; + /* Who has more than one independent system counter? */ + if (arch_timer_rate) + return; - if (arch_timer_rate == 0) { - freq = arch_timer_get_cntfrq(); - - /* Check the timer frequency. */ - if (freq == 0) { - pr_warn("Architected timer frequency not available\n"); - return -EINVAL; - } - - arch_timer_rate = freq; + /* Try to determine the frequency from the device tree or CNTFRQ */ + if (of_property_read_u32(np, "clock-frequency", &arch_timer_rate)) { + if (cntbase) + arch_timer_rate = readl_relaxed(cntbase + CNTFRQ); + else + arch_timer_rate = arch_timer_get_cntfrq(); } - pr_info_once("Architected local timer running at %lu.%02luMHz (%s).\n", + /* Check the timer frequency. */ + if (arch_timer_rate == 0) + pr_warn("Architected timer frequency not available\n"); +} + +static void arch_timer_banner(unsigned type) +{ + pr_info("Architected %s%s%s timer(s) running at %lu.%02luMHz (%s%s%s).\n", + type & ARCH_CP15_TIMER ? "cp15" : "", + type == (ARCH_CP15_TIMER | ARCH_MEM_TIMER) ? " and " : "", + type & ARCH_MEM_TIMER ? "mmio" : "", (unsigned long)arch_timer_rate / 1000000, (unsigned long)(arch_timer_rate / 10000) % 100, - arch_timer_use_virtual ? "virt" : "phys"); - return 0; + type & ARCH_CP15_TIMER ? + arch_timer_use_virtual ? "virt" : "phys" : + "", + type == (ARCH_CP15_TIMER | ARCH_MEM_TIMER) ? "/" : "", + type & ARCH_MEM_TIMER ? + arch_timer_mem_use_virtual ? "virt" : "phys" : + ""); } u32 arch_timer_get_rate(void) @@ -202,11 +368,27 @@ u32 arch_timer_get_rate(void) return arch_timer_rate; } -u64 arch_timer_read_counter(void) +static u64 arch_counter_get_cntvct_mem(void) { - return arch_counter_get_cntvct(); + u32 vct_lo, vct_hi, tmp_hi; + + do { + vct_hi = readl_relaxed(arch_counter_base + CNTVCT_HI); + vct_lo = readl_relaxed(arch_counter_base + CNTVCT_LO); + tmp_hi = readl_relaxed(arch_counter_base + CNTVCT_HI); + } while (vct_hi != tmp_hi); + + return ((u64) vct_hi << 32) | vct_lo; } +/* + * Default to cp15 based access because arm64 uses this function for + * sched_clock() before DT is probed and the cp15 method is guaranteed + * to exist on arm64. arm doesn't use this before DT is probed so even + * if we don't have the cp15 accessors we won't have a problem. + */ +u64 (*arch_timer_read_counter)(void) = arch_counter_get_cntvct; + static cycle_t arch_counter_read(struct clocksource *cs) { return arch_counter_get_cntvct(); @@ -237,6 +419,23 @@ struct timecounter *arch_timer_get_timecounter(void) return &timecounter; } +static void __init arch_counter_register(unsigned type) +{ + u64 start_count; + + /* Register the CP15 based counter if we have one */ + if (type & ARCH_CP15_TIMER) + arch_timer_read_counter = arch_counter_get_cntvct; + else + arch_timer_read_counter = arch_counter_get_cntvct_mem; + + start_count = arch_timer_read_counter(); + clocksource_register_hz(&clocksource_counter, arch_timer_rate); + cyclecounter.mult = clocksource_counter.mult; + cyclecounter.shift = clocksource_counter.shift; + timecounter_init(&timecounter, &cyclecounter, start_count); +} + static void __cpuinit arch_timer_stop(struct clock_event_device *clk) { pr_debug("arch_timer_teardown disable IRQ%d cpu #%d\n", @@ -308,22 +507,12 @@ static int __init arch_timer_register(void) int err; int ppi; - err = arch_timer_available(); - if (err) - goto out; - arch_timer_evt = alloc_percpu(struct clock_event_device); if (!arch_timer_evt) { err = -ENOMEM; goto out; } - clocksource_register_hz(&clocksource_counter, arch_timer_rate); - cyclecounter.mult = clocksource_counter.mult; - cyclecounter.shift = clocksource_counter.shift; - timecounter_init(&timecounter, &cyclecounter, - arch_counter_get_cntvct()); - if (arch_timer_use_virtual) { ppi = arch_timer_ppi[VIRT_PPI]; err = request_percpu_irq(ppi, arch_timer_handler_virt, @@ -380,24 +569,77 @@ static int __init arch_timer_register(void) return err; } +static int __init arch_timer_mem_register(void __iomem *base, unsigned int irq) +{ + int ret; + irq_handler_t func; + struct arch_timer *t; + + t = kzalloc(sizeof(*t), GFP_KERNEL); + if (!t) + return -ENOMEM; + + t->base = base; + t->evt.irq = irq; + __arch_timer_setup(ARCH_MEM_TIMER, &t->evt); + + if (arch_timer_mem_use_virtual) + func = arch_timer_handler_virt_mem; + else + func = arch_timer_handler_phys_mem; + + ret = request_irq(irq, func, IRQF_TIMER, "arch_mem_timer", &t->evt); + if (ret) { + pr_err("arch_timer: Failed to request mem timer irq\n"); + kfree(t); + } + + return ret; +} + +static const struct of_device_id arch_timer_of_match[] __initconst = { + { .compatible = "arm,armv7-timer", }, + { .compatible = "arm,armv8-timer", }, + {}, +}; + +static const struct of_device_id arch_timer_mem_of_match[] __initconst = { + { .compatible = "arm,armv7-timer-mem", }, + {}, +}; + +static void __init arch_timer_common_init(void) +{ + unsigned mask = ARCH_CP15_TIMER | ARCH_MEM_TIMER; + + /* Wait until both nodes are probed if we have two timers */ + if ((arch_timers_present & mask) != mask) { + if (of_find_matching_node(NULL, arch_timer_mem_of_match) && + !(arch_timers_present & ARCH_MEM_TIMER)) + return; + if (of_find_matching_node(NULL, arch_timer_of_match) && + !(arch_timers_present & ARCH_CP15_TIMER)) + return; + } + + arch_timer_banner(arch_timers_present); + arch_counter_register(arch_timers_present); + arch_timer_arch_init(); +} + static void __init arch_timer_init(struct device_node *np) { - u32 freq; int i; - if (arch_timer_get_rate()) { + if (arch_timers_present & ARCH_CP15_TIMER) { pr_warn("arch_timer: multiple nodes in dt, skipping\n"); return; } - /* Try to determine the frequency from the device tree or CNTFRQ */ - if (!of_property_read_u32(np, "clock-frequency", &freq)) - arch_timer_rate = freq; - + arch_timers_present |= ARCH_CP15_TIMER; for (i = PHYS_SECURE_PPI; i < MAX_TIMER_PPI; i++) arch_timer_ppi[i] = irq_of_parse_and_map(np, i); - - of_node_put(np); + arch_timer_detect_rate(NULL, np); /* * If HYP mode is available, we know that the physical timer @@ -418,7 +660,73 @@ static void __init arch_timer_init(struct device_node *np) } arch_timer_register(); - arch_timer_arch_init(); + arch_timer_common_init(); } CLOCKSOURCE_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_init); CLOCKSOURCE_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_init); + +static void __init arch_timer_mem_init(struct device_node *np) +{ + struct device_node *frame, *best_frame = NULL; + void __iomem *cntctlbase, *base; + unsigned int irq; + u32 cnttidr; + + arch_timers_present |= ARCH_MEM_TIMER; + cntctlbase = of_iomap(np, 0); + if (!cntctlbase) { + pr_err("arch_timer: Can't find CNTCTLBase\n"); + return; + } + + cnttidr = readl_relaxed(cntctlbase + CNTTIDR); + iounmap(cntctlbase); + + /* + * Try to find a virtual capable frame. Otherwise fall back to a + * physical capable frame. + */ + for_each_available_child_of_node(np, frame) { + int n; + + if (of_property_read_u32(frame, "frame-number", &n)) { + pr_err("arch_timer: Missing frame-number\n"); + of_node_put(best_frame); + of_node_put(frame); + return; + } + + if (cnttidr & CNTTIDR_VIRT(n)) { + of_node_put(best_frame); + best_frame = frame; + arch_timer_mem_use_virtual = true; + break; + } + of_node_put(best_frame); + best_frame = of_node_get(frame); + } + + base = arch_counter_base = of_iomap(best_frame, 0); + if (!base) { + pr_err("arch_timer: Can't map frame's registers\n"); + of_node_put(best_frame); + return; + } + + if (arch_timer_mem_use_virtual) + irq = irq_of_parse_and_map(best_frame, 1); + else + irq = irq_of_parse_and_map(best_frame, 0); + of_node_put(best_frame); + if (!irq) { + pr_err("arch_timer: Frame missing %s irq", + arch_timer_mem_use_virtual ? "virt" : "phys"); + return; + } + + arch_timer_detect_rate(base, np); + arch_timer_mem_register(base, irq); + arch_timer_common_init(); +} +CLOCKSOURCE_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem", + arch_timer_mem_init); diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c index 8c4771da1b89..9216c2020ec1 100644 --- a/drivers/cpufreq/highbank-cpufreq.c +++ b/drivers/cpufreq/highbank-cpufreq.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #define HB_CPUFREQ_CHANGE_NOTE 0x80000001 diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile index 543ad6a79505..fefef7ebcbec 100644 --- a/drivers/mailbox/Makefile +++ b/drivers/mailbox/Makefile @@ -1 +1,5 @@ +# Generic MAILBOX API + +obj-$(CONFIG_MAILBOX) += mailbox.o + obj-$(CONFIG_PL320_MBOX) += pl320-ipc.o diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c new file mode 100644 index 000000000000..607dd91e87a1 --- /dev/null +++ b/drivers/mailbox/mailbox.c @@ -0,0 +1,488 @@ +/* + * Mailbox: Common code for Mailbox controllers and users + * + * Copyright (C) 2014 Linaro Ltd. + * Author: Jassi Brar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define TXDONE_BY_IRQ (1 << 0) /* controller has remote RTR irq */ +#define TXDONE_BY_POLL (1 << 1) /* controller can read status of last TX */ +#define TXDONE_BY_ACK (1 << 2) /* S/W ACK recevied by Client ticks the TX */ + +static LIST_HEAD(mbox_cons); +static DEFINE_MUTEX(con_mutex); + +static int _add_to_rbuf(struct mbox_chan *chan, void *mssg) +{ + int idx; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + + /* See if there is any space left */ + if (chan->msg_count == MBOX_TX_QUEUE_LEN) { + spin_unlock_irqrestore(&chan->lock, flags); + return -ENOMEM; + } + + idx = chan->msg_free; + chan->msg_data[idx] = mssg; + chan->msg_count++; + + if (idx == MBOX_TX_QUEUE_LEN - 1) + chan->msg_free = 0; + else + chan->msg_free++; + + spin_unlock_irqrestore(&chan->lock, flags); + + return idx; +} + +static void _msg_submit(struct mbox_chan *chan) +{ + unsigned count, idx; + unsigned long flags; + void *data; + int err; + + spin_lock_irqsave(&chan->lock, flags); + + if (!chan->msg_count || chan->active_req) { + spin_unlock_irqrestore(&chan->lock, flags); + return; + } + + count = chan->msg_count; + idx = chan->msg_free; + if (idx >= count) + idx -= count; + else + idx += MBOX_TX_QUEUE_LEN - count; + + data = chan->msg_data[idx]; + + /* Try to submit a message to the MBOX controller */ + err = chan->mbox->ops->send_data(chan, data); + if (!err) { + chan->active_req = data; + chan->msg_count--; + } + + spin_unlock_irqrestore(&chan->lock, flags); +} + +static void tx_tick(struct mbox_chan *chan, int r) +{ + unsigned long flags; + void *mssg; + + spin_lock_irqsave(&chan->lock, flags); + mssg = chan->active_req; + chan->active_req = NULL; + spin_unlock_irqrestore(&chan->lock, flags); + + /* Submit next message */ + _msg_submit(chan); + + /* Notify the client */ + if (chan->cl->tx_block) + complete(&chan->tx_complete); + else if (mssg && chan->cl->tx_done) + chan->cl->tx_done(chan->cl, mssg, r); +} + +static void poll_txdone(unsigned long data) +{ + struct mbox_controller *mbox = (struct mbox_controller *)data; + bool txdone, resched = false; + int i; + + for (i = 0; i < mbox->num_chans; i++) { + struct mbox_chan *chan = &mbox->chans[i]; + + if (chan->active_req && chan->cl) { + resched = true; + txdone = chan->mbox->ops->last_tx_done(chan); + if (txdone) + tx_tick(chan, 0); + } + } + + if (resched) + mod_timer(&mbox->poll, + jiffies + msecs_to_jiffies(mbox->period)); +} + +/** + * mbox_chan_received_data - A way for controller driver to push data + * received from remote to the upper layer. + * @chan: Pointer to the mailbox channel on which RX happened. + * @data: Client specific message typecasted as void * + * + * After startup and before shutdown any data received on the chan + * is passed on to the API via atomic mbox_chan_received_data(). + * The controller should ACK the RX only after this call returns. + */ +void mbox_chan_received_data(struct mbox_chan *chan, void *mssg) +{ + /* No buffering the received data */ + if (chan->cl->rx_callback) + chan->cl->rx_callback(chan->cl, mssg); +} +EXPORT_SYMBOL_GPL(mbox_chan_received_data); + +/** + * mbox_chan_txdone - A way for controller driver to notify the + * framework that the last TX has completed. + * @chan: Pointer to the mailbox chan on which TX happened. + * @r: Status of last TX - OK or ERROR + * + * The controller that has IRQ for TX ACK calls this atomic API + * to tick the TX state machine. It works only if txdone_irq + * is set by the controller. + */ +void mbox_chan_txdone(struct mbox_chan *chan, int r) +{ + if (unlikely(!(chan->txdone_method & TXDONE_BY_IRQ))) { + pr_err("Controller can't run the TX ticker\n"); + return; + } + + tx_tick(chan, r); +} +EXPORT_SYMBOL_GPL(mbox_chan_txdone); + +/** + * mbox_client_txdone - The way for a client to run the TX state machine. + * @chan: Mailbox channel assigned to this client. + * @r: Success status of last transmission. + * + * The client/protocol had received some 'ACK' packet and it notifies + * the API that the last packet was sent successfully. This only works + * if the controller can't sense TX-Done. + */ +void mbox_client_txdone(struct mbox_chan *chan, int r) +{ + if (unlikely(!(chan->txdone_method & TXDONE_BY_ACK))) { + pr_err("Client can't run the TX ticker\n"); + return; + } + + tx_tick(chan, r); +} +EXPORT_SYMBOL_GPL(mbox_client_txdone); + +/** + * mbox_client_peek_data - A way for client driver to pull data + * received from remote by the controller. + * @chan: Mailbox channel assigned to this client. + * + * A poke to controller driver for any received data. + * The data is actually passed onto client via the + * mbox_chan_received_data() + * The call can be made from atomic context, so the controller's + * implementation of peek_data() must not sleep. + * + * Return: True, if controller has, and is going to push after this, + * some data. + * False, if controller doesn't have any data to be read. + */ +bool mbox_client_peek_data(struct mbox_chan *chan) +{ + if (chan->mbox->ops->peek_data) + return chan->mbox->ops->peek_data(chan); + + return false; +} +EXPORT_SYMBOL_GPL(mbox_client_peek_data); + +/** + * mbox_send_message - For client to submit a message to be + * sent to the remote. + * @chan: Mailbox channel assigned to this client. + * @mssg: Client specific message typecasted. + * + * For client to submit data to the controller destined for a remote + * processor. If the client had set 'tx_block', the call will return + * either when the remote receives the data or when 'tx_tout' millisecs + * run out. + * In non-blocking mode, the requests are buffered by the API and a + * non-negative token is returned for each queued request. If the request + * is not queued, a negative token is returned. Upon failure or successful + * TX, the API calls 'tx_done' from atomic context, from which the client + * could submit yet another request. + * In blocking mode, 'tx_done' is not called, effectively making the + * queue length 1. + * The pointer to message should be preserved until it is sent + * over the chan, i.e, tx_done() is made. + * This function could be called from atomic context as it simply + * queues the data and returns a token against the request. + * + * Return: Non-negative integer for successful submission (non-blocking mode) + * or transmission over chan (blocking mode). + * Negative value denotes failure. + */ +int mbox_send_message(struct mbox_chan *chan, void *mssg) +{ + int t; + + if (!chan || !chan->cl) + return -EINVAL; + + t = _add_to_rbuf(chan, mssg); + if (t < 0) { + pr_err("Try increasing MBOX_TX_QUEUE_LEN\n"); + return t; + } + + _msg_submit(chan); + + init_completion(&chan->tx_complete); + + if (chan->txdone_method == TXDONE_BY_POLL) + poll_txdone((unsigned long)chan->mbox); + + if (chan->cl->tx_block && chan->active_req) { + unsigned long wait; + int ret; + + if (!chan->cl->tx_tout) /* wait for ever */ + wait = msecs_to_jiffies(3600000); + else + wait = msecs_to_jiffies(chan->cl->tx_tout); + + ret = wait_for_completion_timeout(&chan->tx_complete, wait); + if (ret == 0) { + t = -EIO; + tx_tick(chan, -EIO); + } + } + + return t; +} +EXPORT_SYMBOL_GPL(mbox_send_message); + +/** + * mbox_request_channel - Request a mailbox channel. + * @cl: Identity of the client requesting the channel. + * + * The Client specifies its requirements and capabilities while asking for + * a mailbox channel. It can't be called from atomic context. + * The channel is exclusively allocated and can't be used by another + * client before the owner calls mbox_free_channel. + * After assignment, any packet received on this channel will be + * handed over to the client via the 'rx_callback'. + * The framework holds reference to the client, so the mbox_client + * structure shouldn't be modified until the mbox_free_channel returns. + * + * Return: Pointer to the channel assigned to the client if successful. + * ERR_PTR for request failure. + */ +struct mbox_chan *mbox_request_channel(struct mbox_client *cl) +{ + struct device *dev = cl->dev; + struct mbox_controller *mbox; + struct of_phandle_args spec; + struct mbox_chan *chan; + unsigned long flags; + int count, i, ret; + + if (!dev || !dev->of_node) { + pr_err("%s: No owner device node\n", __func__); + return ERR_PTR(-ENODEV); + } + + count = of_property_count_strings(dev->of_node, "mbox-names"); + if (count < 0) { + pr_err("%s: mbox-names property of node '%s' missing\n", + __func__, dev->of_node->full_name); + return ERR_PTR(-ENODEV); + } + + mutex_lock(&con_mutex); + + ret = -ENODEV; + for (i = 0; i < count; i++) { + const char *s; + + if (of_property_read_string_index(dev->of_node, + "mbox-names", i, &s)) + continue; + + if (strcmp(cl->chan_name, s)) + continue; + + if (of_parse_phandle_with_args(dev->of_node, + "mbox", "#mbox-cells", i, &spec)) + continue; + + chan = NULL; + list_for_each_entry(mbox, &mbox_cons, node) + if (mbox->dev->of_node == spec.np) { + chan = mbox->of_xlate(mbox, &spec); + break; + } + + of_node_put(spec.np); + + if (!chan) + continue; + + ret = -EBUSY; + if (!chan->cl && try_module_get(mbox->dev->driver->owner)) + break; + } + + if (i == count) { + mutex_unlock(&con_mutex); + return ERR_PTR(ret); + } + + spin_lock_irqsave(&chan->lock, flags); + chan->msg_free = 0; + chan->msg_count = 0; + chan->active_req = NULL; + chan->cl = cl; + init_completion(&chan->tx_complete); + + if (chan->txdone_method == TXDONE_BY_POLL + && cl->knows_txdone) + chan->txdone_method |= TXDONE_BY_ACK; + spin_unlock_irqrestore(&chan->lock, flags); + + ret = chan->mbox->ops->startup(chan); + if (ret) { + pr_err("Unable to startup the chan (%d)\n", ret); + mbox_free_channel(chan); + chan = ERR_PTR(ret); + } + + mutex_unlock(&con_mutex); + return chan; +} +EXPORT_SYMBOL_GPL(mbox_request_channel); + +/** + * mbox_free_channel - The client relinquishes control of a mailbox + * channel by this call. + * @chan: The mailbox channel to be freed. + */ +void mbox_free_channel(struct mbox_chan *chan) +{ + unsigned long flags; + + if (!chan || !chan->cl) + return; + + chan->mbox->ops->shutdown(chan); + + /* The queued TX requests are simply aborted, no callbacks are made */ + spin_lock_irqsave(&chan->lock, flags); + chan->cl = NULL; + chan->active_req = NULL; + if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK)) + chan->txdone_method = TXDONE_BY_POLL; + + module_put(chan->mbox->dev->driver->owner); + spin_unlock_irqrestore(&chan->lock, flags); +} +EXPORT_SYMBOL_GPL(mbox_free_channel); + +static struct mbox_chan * +of_mbox_index_xlate(struct mbox_controller *mbox, + const struct of_phandle_args *sp) +{ + int ind = sp->args[0]; + + if (ind >= mbox->num_chans) + return NULL; + + return &mbox->chans[ind]; +} + +/** + * mbox_controller_register - Register the mailbox controller + * @mbox: Pointer to the mailbox controller. + * + * The controller driver registers its communication chans + */ +int mbox_controller_register(struct mbox_controller *mbox) +{ + int i, txdone; + + /* Sanity check */ + if (!mbox || !mbox->dev || !mbox->ops || !mbox->num_chans) + return -EINVAL; + + if (mbox->txdone_irq) + txdone = TXDONE_BY_IRQ; + else if (mbox->txdone_poll) + txdone = TXDONE_BY_POLL; + else /* It has to be ACK then */ + txdone = TXDONE_BY_ACK; + + if (txdone == TXDONE_BY_POLL) { + mbox->poll.function = &poll_txdone; + mbox->poll.data = (unsigned long)mbox; + init_timer(&mbox->poll); + } + + for (i = 0; i < mbox->num_chans; i++) { + struct mbox_chan *chan = &mbox->chans[i]; + chan->cl = NULL; + chan->mbox = mbox; + chan->txdone_method = txdone; + spin_lock_init(&chan->lock); + } + + if (!mbox->of_xlate) + mbox->of_xlate = of_mbox_index_xlate; + + mutex_lock(&con_mutex); + list_add_tail(&mbox->node, &mbox_cons); + mutex_unlock(&con_mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(mbox_controller_register); + +/** + * mbox_controller_unregister - UnRegister the mailbox controller + * @mbox: Pointer to the mailbox controller. + */ +void mbox_controller_unregister(struct mbox_controller *mbox) +{ + int i; + + if (!mbox) + return; + + mutex_lock(&con_mutex); + + list_del(&mbox->node); + + for (i = 0; i < mbox->num_chans; i++) + mbox_free_channel(&mbox->chans[i]); + + if (mbox->txdone_poll) + del_timer_sync(&mbox->poll); + + mutex_unlock(&con_mutex); +} +EXPORT_SYMBOL_GPL(mbox_controller_unregister); diff --git a/drivers/mailbox/pl320-ipc.c b/drivers/mailbox/pl320-ipc.c index d873cbae2fbb..f3755e0aa935 100644 --- a/drivers/mailbox/pl320-ipc.c +++ b/drivers/mailbox/pl320-ipc.c @@ -26,7 +26,7 @@ #include #include -#include +#include #define IPCMxSOURCE(m) ((m) * 0x40) #define IPCMxDSET(m) (((m) * 0x40) + 0x004) diff --git a/drivers/pinctrl/pinconf.c b/drivers/pinctrl/pinconf.c index ad30263a7410..596a2522a6b1 100644 --- a/drivers/pinctrl/pinconf.c +++ b/drivers/pinctrl/pinconf.c @@ -35,7 +35,9 @@ int pinconf_check_ops(struct pinctrl_dev *pctldev) return -EINVAL; } /* We have to be able to config the pins in SOME way */ - if (!ops->pin_config_set && !ops->pin_config_group_set) { + if (!ops->pin_config_set && !ops->pin_config_group_set + && !ops->pin_config_set_bulk + && !ops->pin_config_group_set_bulk) { dev_err(pctldev->dev, "pinconf has to be able to set a pins config\n"); return -EINVAL; @@ -171,14 +173,14 @@ int pinconf_apply_setting(struct pinctrl_setting const *setting) dev_err(pctldev->dev, "missing pin_config_set op\n"); return -EINVAL; } - if (ops->pin_config_group_set_bulk) { - ret = ops->pin_config_group_set_bulk(pctldev, + if (ops->pin_config_set_bulk) { + ret = ops->pin_config_set_bulk(pctldev, setting->data.configs.group_or_pin, setting->data.configs.configs, setting->data.configs.num_configs); if (ret < 0) { dev_err(pctldev->dev, - "pin_config_set op failed for pin %d\n", + "pin_config_set_bulk op failed for pin %d\n", setting->data.configs.group_or_pin); return ret; } @@ -211,7 +213,7 @@ int pinconf_apply_setting(struct pinctrl_setting const *setting) setting->data.configs.num_configs); if (ret < 0) { dev_err(pctldev->dev, - "pin_config_group_set op failed for group %d\n", + "pin_config_group_set_bulk op failed for group %d\n", setting->data.configs.group_or_pin); return ret; } diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index a81147e2e4ef..4d24d17bcfc1 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c @@ -88,6 +88,11 @@ static void cputime_to_compat_timeval(const cputime_t cputime, #define ELF_HWCAP COMPAT_ELF_HWCAP #endif +#ifdef COMPAT_ELF_HWCAP2 +#undef ELF_HWCAP2 +#define ELF_HWCAP2 COMPAT_ELF_HWCAP2 +#endif + #ifdef COMPAT_ARCH_DLINFO #undef ARCH_DLINFO #define ARCH_DLINFO COMPAT_ARCH_DLINFO diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index c4d0fc4855ab..6d26b40cbf5d 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h @@ -23,11 +23,15 @@ #define ARCH_TIMER_CTRL_IT_MASK (1 << 1) #define ARCH_TIMER_CTRL_IT_STAT (1 << 2) -#define ARCH_TIMER_REG_CTRL 0 -#define ARCH_TIMER_REG_TVAL 1 +enum arch_timer_reg { + ARCH_TIMER_REG_CTRL, + ARCH_TIMER_REG_TVAL, +}; #define ARCH_TIMER_PHYS_ACCESS 0 #define ARCH_TIMER_VIRT_ACCESS 1 +#define ARCH_TIMER_MEM_PHYS_ACCESS 2 +#define ARCH_TIMER_MEM_VIRT_ACCESS 3 #define ARCH_TIMER_USR_PCT_ACCESS_EN (1 << 0) /* physical counter */ #define ARCH_TIMER_USR_VCT_ACCESS_EN (1 << 1) /* virtual counter */ @@ -42,7 +46,7 @@ #ifdef CONFIG_ARM_ARCH_TIMER extern u32 arch_timer_get_rate(void); -extern u64 arch_timer_read_counter(void); +extern u64 (*arch_timer_read_counter)(void); extern struct timecounter *arch_timer_get_timecounter(void); #else diff --git a/include/linux/cpufeature.h b/include/linux/cpufeature.h new file mode 100644 index 000000000000..c4d4eb8ac9fe --- /dev/null +++ b/include/linux/cpufeature.h @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_CPUFEATURE_H +#define __LINUX_CPUFEATURE_H + +#ifdef CONFIG_GENERIC_CPU_AUTOPROBE + +#include +#include + +/* + * Macros imported from : + * - cpu_feature(x) ordinal value of feature called 'x' + * - cpu_have_feature(u32 n) whether feature #n is available + * - MAX_CPU_FEATURES upper bound for feature ordinal values + * Optional: + * - CPU_FEATURE_TYPEFMT format string fragment for printing the cpu type + * - CPU_FEATURE_TYPEVAL set of values matching the format string above + */ + +#ifndef CPU_FEATURE_TYPEFMT +#define CPU_FEATURE_TYPEFMT "%s" +#endif + +#ifndef CPU_FEATURE_TYPEVAL +#define CPU_FEATURE_TYPEVAL ELF_PLATFORM +#endif + +/* + * Use module_cpu_feature_match(feature, module_init_function) to + * declare that + * a) the module shall be probed upon discovery of CPU feature 'feature' + * (typically at boot time using udev) + * b) the module must not be loaded if CPU feature 'feature' is not present + * (not even by manual insmod). + * + * For a list of legal values for 'feature', please consult the file + * 'asm/cpufeature.h' of your favorite architecture. + */ +#define module_cpu_feature_match(x, __init) \ +static struct cpu_feature const cpu_feature_match_ ## x[] = \ + { { .feature = cpu_feature(x) }, { } }; \ +MODULE_DEVICE_TABLE(cpu, cpu_feature_match_ ## x); \ + \ +static int cpu_feature_match_ ## x ## _init(void) \ +{ \ + if (!cpu_have_feature(cpu_feature(x))) \ + return -ENODEV; \ + return __init(); \ +} \ +module_init(cpu_feature_match_ ## x ## _init) + +#endif +#endif diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h new file mode 100644 index 000000000000..955f3d7641e8 --- /dev/null +++ b/include/linux/mailbox_client.h @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2014 Linaro Ltd. + * Author: Jassi Brar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __MAILBOX_CLIENT_H +#define __MAILBOX_CLIENT_H + +#include + +struct mbox_chan; + +/** + * struct mbox_client - User of a mailbox + * @dev: The client device + * @chan_name: The "controller:channel" this client wants + * @rx_callback: Atomic callback to provide client the data received + * @tx_done: Atomic callback to tell client of data transmission + * @tx_block: If the mbox_send_message should block until data is + * transmitted. + * @tx_tout: Max block period in ms before TX is assumed failure + * @knows_txdone: if the client could run the TX state machine. Usually + * if the client receives some ACK packet for transmission. + * Unused if the controller already has TX_Done/RTR IRQ. + */ +struct mbox_client { + struct device *dev; + const char *chan_name; + void (*rx_callback)(struct mbox_client *cl, void *mssg); + void (*tx_done)(struct mbox_client *cl, void *mssg, int r); + bool tx_block; + unsigned long tx_tout; + bool knows_txdone; +}; + +struct mbox_chan *mbox_request_channel(struct mbox_client *cl); +int mbox_send_message(struct mbox_chan *chan, void *mssg); +void mbox_client_txdone(struct mbox_chan *chan, int r); +bool mbox_client_peek_data(struct mbox_chan *chan); +void mbox_free_channel(struct mbox_chan *chan); + +#endif /* __MAILBOX_CLIENT_H */ diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h new file mode 100644 index 000000000000..5d1915b9af60 --- /dev/null +++ b/include/linux/mailbox_controller.h @@ -0,0 +1,121 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __MAILBOX_CONTROLLER_H +#define __MAILBOX_CONTROLLER_H + +#include + +struct mbox_chan; + +/** + * struct mbox_chan_ops - s/w representation of a communication chan + * @send_data: The API asks the MBOX controller driver, in atomic + * context try to transmit a message on the bus. Returns 0 if + * data is accepted for transmission, -EBUSY while rejecting + * if the remote hasn't yet read the last data sent. Actual + * transmission of data is reported by the controller via + * mbox_chan_txdone (if it has some TX ACK irq). It must not + * block. + * @startup: Called when a client requests the chan. The controller + * could ask clients for additional parameters of communication + * to be provided via client's chan_data. This call may + * block. After this call the Controller must forward any + * data received on the chan by calling mbox_chan_received_data. + * @shutdown: Called when a client relinquishes control of a chan. + * This call may block too. The controller must not forwared + * any received data anymore. + * @last_tx_done: If the controller sets 'txdone_poll', the API calls + * this to poll status of last TX. The controller must + * give priority to IRQ method over polling and never + * set both txdone_poll and txdone_irq. Only in polling + * mode 'send_data' is expected to return -EBUSY. + * Used only if txdone_poll:=true && txdone_irq:=false + * @peek_data: Atomic check for any received data. Return true if controller + * has some data to push to the client. False otherwise. + */ +struct mbox_chan_ops { + int (*send_data)(struct mbox_chan *chan, void *data); + int (*startup)(struct mbox_chan *chan); + void (*shutdown)(struct mbox_chan *chan); + bool (*last_tx_done)(struct mbox_chan *chan); + bool (*peek_data)(struct mbox_chan *chan); +}; + +/** + * struct mbox_controller - Controller of a class of communication chans + * @dev: Device backing this controller + * @controller_name: Literal name of the controller. + * @ops: Operators that work on each communication chan + * @chans: Null terminated array of chans. + * @txdone_irq: Indicates if the controller can report to API when + * the last transmitted data was read by the remote. + * Eg, if it has some TX ACK irq. + * @txdone_poll: If the controller can read but not report the TX + * done. Ex, some register shows the TX status but + * no interrupt rises. Ignored if 'txdone_irq' is set. + * @txpoll_period: If 'txdone_poll' is in effect, the API polls for + * last TX's status after these many millisecs + */ +struct mbox_controller { + struct device *dev; + struct mbox_chan_ops *ops; + struct mbox_chan *chans; + int num_chans; + bool txdone_irq; + bool txdone_poll; + unsigned txpoll_period; + struct mbox_chan *(*of_xlate)(struct mbox_controller *mbox, + const struct of_phandle_args *sp); + /* + * If the controller supports only TXDONE_BY_POLL, + * this timer polls all the links for txdone. + */ + struct timer_list poll; + unsigned period; + /* Hook to add to the global controller list */ + struct list_head node; +}; + +/* + * The length of circular buffer for queuing messages from a client. + * 'msg_count' tracks the number of buffered messages while 'msg_free' + * is the index where the next message would be buffered. + * We shouldn't need it too big because every transferr is interrupt + * triggered and if we have lots of data to transfer, the interrupt + * latencies are going to be the bottleneck, not the buffer length. + * Besides, mbox_send_message could be called from atomic context and + * the client could also queue another message from the notifier 'tx_done' + * of the last transfer done. + * REVIST: If too many platforms see the "Try increasing MBOX_TX_QUEUE_LEN" + * print, it needs to be taken from config option or somesuch. + */ +#define MBOX_TX_QUEUE_LEN 20 + +struct mbox_chan { + struct mbox_controller *mbox; /* Parent Controller */ + unsigned txdone_method; + + /* client */ + struct mbox_client *cl; + struct completion tx_complete; + + void *active_req; + unsigned msg_count, msg_free; + void *msg_data[MBOX_TX_QUEUE_LEN]; + /* Access to the channel */ + spinlock_t lock; + + /* Private data for controller */ + void *con_priv; +}; + +int mbox_controller_register(struct mbox_controller *mbox); +void mbox_chan_received_data(struct mbox_chan *chan, void *data); +void mbox_chan_txdone(struct mbox_chan *chan, int r); +void mbox_controller_unregister(struct mbox_controller *mbox); + +#endif /* __MAILBOX_CONTROLLER_H */ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index b3bd7e737e8b..08450a6146e1 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -563,6 +563,15 @@ struct x86_cpu_id { #define X86_MODEL_ANY 0 #define X86_FEATURE_ANY 0 /* Same as FPU, you can't test for that */ +/* + * Generic table type for matching CPU features. + * @feature: the bit number of the feature (0 - 65535) + */ + +struct cpu_feature { + __u16 feature; +}; + #define IPACK_ANY_FORMAT 0xff #define IPACK_ANY_ID (~0) struct ipack_device_id { diff --git a/include/linux/mailbox.h b/include/linux/pl320-ipc.h similarity index 100% rename from include/linux/mailbox.h rename to include/linux/pl320-ipc.h diff --git a/include/sound/soc-dpcm.h b/include/sound/soc-dpcm.h index 04598f1efd77..a2e15cad76a0 100644 --- a/include/sound/soc-dpcm.h +++ b/include/sound/soc-dpcm.h @@ -11,6 +11,7 @@ #ifndef __LINUX_SND_SOC_DPCM_H #define __LINUX_SND_SOC_DPCM_H +#include #include #include @@ -135,4 +136,25 @@ int soc_dpcm_be_digital_mute(struct snd_soc_pcm_runtime *fe, int mute); int soc_dpcm_debugfs_add(struct snd_soc_pcm_runtime *rtd); int soc_dpcm_runtime_update(struct snd_soc_dapm_widget *); +int dpcm_path_get(struct snd_soc_pcm_runtime *fe, + int stream, struct snd_soc_dapm_widget_list **list_); +int dpcm_process_paths(struct snd_soc_pcm_runtime *fe, + int stream, struct snd_soc_dapm_widget_list **list, int new); +int dpcm_be_dai_startup(struct snd_soc_pcm_runtime *fe, int stream); +int dpcm_be_dai_shutdown(struct snd_soc_pcm_runtime *fe, int stream); +void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream); +void dpcm_clear_pending_state(struct snd_soc_pcm_runtime *fe, int stream); +int dpcm_be_dai_hw_free(struct snd_soc_pcm_runtime *fe, int stream); +int dpcm_be_dai_hw_params(struct snd_soc_pcm_runtime *fe, int tream); +int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, int cmd); +int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream); +int dpcm_dapm_stream_event(struct snd_soc_pcm_runtime *fe, int dir, + int event); + +static inline void dpcm_path_put(struct snd_soc_dapm_widget_list **list) +{ + kfree(*list); +} + + #endif diff --git a/include/sound/soc.h b/include/sound/soc.h index 85c15226103b..5bbdc653a826 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1063,6 +1063,7 @@ struct snd_soc_pcm_runtime { /* Dynamic PCM BE runtime data */ struct snd_soc_dpcm_runtime dpcm[2]; + int fe_compr; long pmdown_time; unsigned char pop_wait:1; diff --git a/linaro/configs/linaro-base.conf b/linaro/configs/linaro-base.conf index 4d2ac0209671..fa38b9e44367 100644 --- a/linaro/configs/linaro-base.conf +++ b/linaro/configs/linaro-base.conf @@ -102,3 +102,4 @@ CONFIG_FUNCTION_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_FUNCTION_PROFILER=y +CONFIG_MAILBOX=y diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index e66d4d258e1a..5e74595f2e85 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -174,6 +174,9 @@ int main(void) DEVID_FIELD(x86_cpu_id, model); DEVID_FIELD(x86_cpu_id, vendor); + DEVID(cpu_feature); + DEVID_FIELD(cpu_feature, feature); + DEVID(mei_cl_device_id); DEVID_FIELD(mei_cl_device_id, name); diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 4319a3824727..09632f04576b 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -1133,6 +1133,16 @@ static int do_x86cpu_entry(const char *filename, void *symval, } ADD_TO_DEVTABLE("x86cpu", x86_cpu_id, do_x86cpu_entry); +/* LOOKS like cpu:type:*:feature:*FEAT* */ +static int do_cpu_entry(const char *filename, void *symval, char *alias) +{ + DEF_FIELD(symval, cpu_feature, feature); + + sprintf(alias, "cpu:type:*:feature:*%04X*", feature); + return 1; +} +ADD_TO_DEVTABLE("cpu", cpu_feature, do_cpu_entry); + /* Looks like: mei:S */ static int do_mei_entry(const char *filename, void *symval, char *alias) diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c index 97f04afae23f..5e9690c85d8f 100644 --- a/sound/soc/soc-compress.c +++ b/sound/soc/soc-compress.c @@ -24,6 +24,7 @@ #include #include #include +#include static int soc_compr_open(struct snd_compr_stream *cstream) { @@ -75,6 +76,98 @@ static int soc_compr_open(struct snd_compr_stream *cstream) return ret; } +static int soc_compr_open_fe(struct snd_compr_stream *cstream) +{ + struct snd_soc_pcm_runtime *fe = cstream->private_data; + struct snd_pcm_substream *fe_substream = fe->pcm->streams[0].substream; + struct snd_soc_platform *platform = fe->platform; + struct snd_soc_dai *cpu_dai = fe->cpu_dai; + struct snd_soc_dai *codec_dai = fe->codec_dai; + struct snd_soc_dpcm *dpcm; + struct snd_soc_dapm_widget_list *list; + int stream; + int ret = 0; + + if (cstream->direction == SND_COMPRESS_PLAYBACK) + stream = SNDRV_PCM_STREAM_PLAYBACK; + else + stream = SNDRV_PCM_STREAM_CAPTURE; + + mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME); + + if (platform->driver->compr_ops && platform->driver->compr_ops->open) { + ret = platform->driver->compr_ops->open(cstream); + if (ret < 0) { + pr_err("compress asoc: can't open platform %s\n", platform->name); + goto out; + } + } + + if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->startup) { + ret = fe->dai_link->compr_ops->startup(cstream); + if (ret < 0) { + pr_err("compress asoc: %s startup failed\n", fe->dai_link->name); + goto machine_err; + } + } + + fe->dpcm[stream].runtime = fe_substream->runtime; + + if (dpcm_path_get(fe, stream, &list) <= 0) { + dev_dbg(fe->dev, "ASoC: %s no valid %s route\n", + fe->dai_link->name, stream ? "capture" : "playback"); + } + + /* calculate valid and active FE <-> BE dpcms */ + dpcm_process_paths(fe, stream, &list, 1); + + fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + + ret = dpcm_be_dai_startup(fe, stream); + if (ret < 0) { + /* clean up all links */ + list_for_each_entry(dpcm, &fe->dpcm[stream].be_clients, list_be) + dpcm->state = SND_SOC_DPCM_LINK_STATE_FREE; + + dpcm_be_disconnect(fe, stream); + fe->dpcm[stream].runtime = NULL; + goto fe_err; + } + + dpcm_clear_pending_state(fe, stream); + dpcm_path_put(&list); + + fe->dpcm[stream].state = SND_SOC_DPCM_STATE_OPEN; + fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + + if (cstream->direction == SND_COMPRESS_PLAYBACK) { + cpu_dai->playback_active++; + codec_dai->playback_active++; + } else { + cpu_dai->capture_active++; + codec_dai->capture_active++; + } + + cpu_dai->active++; + codec_dai->active++; + fe->codec->active++; + + mutex_unlock(&fe->card->mutex); + + return 0; + +fe_err: + if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->shutdown) + fe->dai_link->compr_ops->shutdown(cstream); +machine_err: + if (platform->driver->compr_ops && platform->driver->compr_ops->free) + platform->driver->compr_ops->free(cstream); +out: + fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + mutex_unlock(&fe->card->mutex); + return ret; +} + /* * Power down the audio subsystem pmdown_time msecs after close is called. * This is to ensure there are no pops or clicks in between any music tracks @@ -164,6 +257,65 @@ static int soc_compr_free(struct snd_compr_stream *cstream) return 0; } +static int soc_compr_free_fe(struct snd_compr_stream *cstream) +{ + struct snd_soc_pcm_runtime *fe = cstream->private_data; + struct snd_soc_platform *platform = fe->platform; + struct snd_soc_dai *cpu_dai = fe->cpu_dai; + struct snd_soc_dai *codec_dai = fe->codec_dai; + struct snd_soc_dpcm *dpcm; + int stream, ret; + + mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME); + + if (cstream->direction == SND_COMPRESS_PLAYBACK) { + stream = SNDRV_PCM_STREAM_PLAYBACK; + cpu_dai->playback_active--; + codec_dai->playback_active--; + } else { + stream = SNDRV_PCM_STREAM_CAPTURE; + cpu_dai->capture_active--; + codec_dai->capture_active--; + } + + cpu_dai->active--; + codec_dai->active--; + fe->codec->active--; + + fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + + ret = dpcm_be_dai_hw_free(fe, stream); + if (ret < 0) + dev_err(fe->dev, "compressed hw_free failed %d\n", ret); + + ret = dpcm_be_dai_shutdown(fe, stream); + + /* mark FE's links ready to prune */ + list_for_each_entry(dpcm, &fe->dpcm[stream].be_clients, list_be) + dpcm->state = SND_SOC_DPCM_LINK_STATE_FREE; + + if (stream == SNDRV_PCM_STREAM_PLAYBACK) + dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_STOP); + else + dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_STOP); + + fe->dpcm[stream].state = SND_SOC_DPCM_STATE_CLOSE; + fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + + dpcm_be_disconnect(fe, stream); + + fe->dpcm[stream].runtime = NULL; + + if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->shutdown) + fe->dai_link->compr_ops->shutdown(cstream); + + if (platform->driver->compr_ops && platform->driver->compr_ops->free) + platform->driver->compr_ops->free(cstream); + + mutex_unlock(&fe->card->mutex); + return 0; +} + static int soc_compr_trigger(struct snd_compr_stream *cstream, int cmd) { @@ -194,6 +346,59 @@ static int soc_compr_trigger(struct snd_compr_stream *cstream, int cmd) return ret; } +static int soc_compr_trigger_fe(struct snd_compr_stream *cstream, int cmd) +{ + struct snd_soc_pcm_runtime *fe = cstream->private_data; + struct snd_soc_platform *platform = fe->platform; + int ret = 0, stream; + + if (cmd == SND_COMPR_TRIGGER_PARTIAL_DRAIN || + cmd == SND_COMPR_TRIGGER_DRAIN) { + + if (platform->driver->compr_ops && + platform->driver->compr_ops->trigger) + return platform->driver->compr_ops->trigger(cstream, cmd); + } + + if (cstream->direction == SND_COMPRESS_PLAYBACK) + stream = SNDRV_PCM_STREAM_PLAYBACK; + else + stream = SNDRV_PCM_STREAM_CAPTURE; + + + mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME); + + if (platform->driver->compr_ops && platform->driver->compr_ops->trigger) { + ret = platform->driver->compr_ops->trigger(cstream, cmd); + if (ret < 0) + goto out; + } + + fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + + ret = dpcm_be_dai_trigger(fe, stream, cmd); + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + fe->dpcm[stream].state = SND_SOC_DPCM_STATE_START; + break; + case SNDRV_PCM_TRIGGER_STOP: + case SNDRV_PCM_TRIGGER_SUSPEND: + fe->dpcm[stream].state = SND_SOC_DPCM_STATE_STOP; + break; + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + fe->dpcm[stream].state = SND_SOC_DPCM_STATE_PAUSED; + break; + } + +out: + fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + mutex_unlock(&fe->card->mutex); + return ret; +} + static int soc_compr_set_params(struct snd_compr_stream *cstream, struct snd_compr_params *params) { @@ -241,6 +446,64 @@ static int soc_compr_set_params(struct snd_compr_stream *cstream, return ret; } +static int soc_compr_set_params_fe(struct snd_compr_stream *cstream, + struct snd_compr_params *params) +{ + struct snd_soc_pcm_runtime *fe = cstream->private_data; + struct snd_pcm_substream *fe_substream = fe->pcm->streams[0].substream; + struct snd_soc_platform *platform = fe->platform; + int ret = 0, stream; + + if (cstream->direction == SND_COMPRESS_PLAYBACK) + stream = SNDRV_PCM_STREAM_PLAYBACK; + else + stream = SNDRV_PCM_STREAM_CAPTURE; + + mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME); + + if (platform->driver->compr_ops && platform->driver->compr_ops->set_params) { + ret = platform->driver->compr_ops->set_params(cstream, params); + if (ret < 0) + goto out; + } + + if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->set_params) { + ret = fe->dai_link->compr_ops->set_params(cstream); + if (ret < 0) + goto out; + } + + /* + * Create an empty hw_params for the BE as the machine driver must + * fix this up to match DSP decoder and ASRC configuration. + * I.e. machine driver fixup for compressed BE is mandatory. + */ + memset(&fe->dpcm[fe_substream->stream].hw_params, 0, + sizeof(struct snd_pcm_hw_params)); + + fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + + ret = dpcm_be_dai_hw_params(fe, stream); + if (ret < 0) + goto out; + + ret = dpcm_be_dai_prepare(fe, stream); + if (ret < 0) + goto out; + + if (stream == SNDRV_PCM_STREAM_PLAYBACK) + dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_START); + else + dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_START); + + fe->dpcm[stream].state = SND_SOC_DPCM_STATE_PREPARE; + +out: + fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + mutex_unlock(&fe->card->mutex); + return ret; +} + static int soc_compr_get_params(struct snd_compr_stream *cstream, struct snd_codec *params) { @@ -335,7 +598,7 @@ static int soc_compr_copy(struct snd_compr_stream *cstream, return ret; } -static int sst_compr_set_metadata(struct snd_compr_stream *cstream, +static int soc_compr_set_metadata(struct snd_compr_stream *cstream, struct snd_compr_metadata *metadata) { struct snd_soc_pcm_runtime *rtd = cstream->private_data; @@ -348,7 +611,7 @@ static int sst_compr_set_metadata(struct snd_compr_stream *cstream, return ret; } -static int sst_compr_get_metadata(struct snd_compr_stream *cstream, +static int soc_compr_get_metadata(struct snd_compr_stream *cstream, struct snd_compr_metadata *metadata) { struct snd_soc_pcm_runtime *rtd = cstream->private_data; @@ -360,13 +623,14 @@ static int sst_compr_get_metadata(struct snd_compr_stream *cstream, return ret; } + /* ASoC Compress operations */ static struct snd_compr_ops soc_compr_ops = { .open = soc_compr_open, .free = soc_compr_free, .set_params = soc_compr_set_params, - .set_metadata = sst_compr_set_metadata, - .get_metadata = sst_compr_get_metadata, + .set_metadata = soc_compr_set_metadata, + .get_metadata = soc_compr_get_metadata, .get_params = soc_compr_get_params, .trigger = soc_compr_trigger, .pointer = soc_compr_pointer, @@ -375,6 +639,21 @@ static struct snd_compr_ops soc_compr_ops = { .get_codec_caps = soc_compr_get_codec_caps }; +/* ASoC Dynamic Compress operations */ +static struct snd_compr_ops soc_compr_dyn_ops = { + .open = soc_compr_open_fe, + .free = soc_compr_free_fe, + .set_params = soc_compr_set_params_fe, + .get_params = soc_compr_get_params, + .set_metadata = soc_compr_set_metadata, + .get_metadata = soc_compr_get_metadata, + .trigger = soc_compr_trigger_fe, + .pointer = soc_compr_pointer, + .ack = soc_compr_ack, + .get_caps = soc_compr_get_caps, + .get_codec_caps = soc_compr_get_codec_caps +}; + /* create a new compress */ int soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num) { @@ -383,6 +662,7 @@ int soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num) struct snd_soc_dai *codec_dai = rtd->codec_dai; struct snd_soc_dai *cpu_dai = rtd->cpu_dai; struct snd_compr *compr; + struct snd_pcm *be_pcm; char new_name[64]; int ret = 0, direction = 0; @@ -410,7 +690,26 @@ int soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num) ret = -ENOMEM; goto compr_err; } - memcpy(compr->ops, &soc_compr_ops, sizeof(soc_compr_ops)); + + if (rtd->dai_link->dynamic) { + snprintf(new_name, sizeof(new_name), "(%s)", + rtd->dai_link->stream_name); + + ret = snd_pcm_new_internal(rtd->card->snd_card, new_name, num, + 1, 0, &be_pcm); + if (ret < 0) { + dev_err(rtd->card->dev, "ASoC: can't create compressed for %s\n", + rtd->dai_link->name); + goto compr_err; + } + + rtd->pcm = be_pcm; + rtd->fe_compr = 1; + be_pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream->private_data = rtd; + be_pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream->private_data = rtd; + memcpy(compr->ops, &soc_compr_dyn_ops, sizeof(soc_compr_dyn_ops)); + } else + memcpy(compr->ops, &soc_compr_ops, sizeof(soc_compr_ops)); /* Add copy callback for not memory mapped DSPs */ if (platform->driver->compr_ops && platform->driver->compr_ops->copy) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 6d9bed4fe7d2..e9ec917cf529 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -34,7 +34,7 @@ #define DPCM_MAX_BE_USERS 8 /* DPCM stream event, send event to FE and all active BEs. */ -static int dpcm_dapm_stream_event(struct snd_soc_pcm_runtime *fe, int dir, +int dpcm_dapm_stream_event(struct snd_soc_pcm_runtime *fe, int dir, int event) { struct snd_soc_dpcm *dpcm; @@ -758,7 +758,7 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe, } /* disconnect a BE and FE */ -static void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream) +void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm, *d; @@ -854,7 +854,7 @@ static int widget_in_list(struct snd_soc_dapm_widget_list *list, return 0; } -static int dpcm_path_get(struct snd_soc_pcm_runtime *fe, +int dpcm_path_get(struct snd_soc_pcm_runtime *fe, int stream, struct snd_soc_dapm_widget_list **list_) { struct snd_soc_dai *cpu_dai = fe->cpu_dai; @@ -876,11 +876,6 @@ static int dpcm_path_get(struct snd_soc_pcm_runtime *fe, return paths; } -static inline void dpcm_path_put(struct snd_soc_dapm_widget_list **list) -{ - kfree(*list); -} - static int dpcm_prune_paths(struct snd_soc_pcm_runtime *fe, int stream, struct snd_soc_dapm_widget_list **list_) { @@ -950,7 +945,7 @@ static int dpcm_add_paths(struct snd_soc_pcm_runtime *fe, int stream, continue; /* don't connect if FE is not running */ - if (!fe->dpcm[stream].runtime) + if (!fe->dpcm[stream].runtime && !fe->fe_compr) continue; /* newly connected FE and BE */ @@ -975,7 +970,7 @@ static int dpcm_add_paths(struct snd_soc_pcm_runtime *fe, int stream, * Find the corresponding BE DAIs that source or sink audio to this * FE substream. */ -static int dpcm_process_paths(struct snd_soc_pcm_runtime *fe, +int dpcm_process_paths(struct snd_soc_pcm_runtime *fe, int stream, struct snd_soc_dapm_widget_list **list, int new) { if (new) @@ -984,7 +979,7 @@ static int dpcm_process_paths(struct snd_soc_pcm_runtime *fe, return dpcm_prune_paths(fe, stream, list); } -static void dpcm_clear_pending_state(struct snd_soc_pcm_runtime *fe, int stream) +void dpcm_clear_pending_state(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm; @@ -1022,7 +1017,7 @@ static void dpcm_be_dai_startup_unwind(struct snd_soc_pcm_runtime *fe, } } -static int dpcm_be_dai_startup(struct snd_soc_pcm_runtime *fe, int stream) +int dpcm_be_dai_startup(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm; int err, count = 0; @@ -1164,7 +1159,7 @@ static int dpcm_fe_dai_startup(struct snd_pcm_substream *fe_substream) return ret; } -static int dpcm_be_dai_shutdown(struct snd_soc_pcm_runtime *fe, int stream) +int dpcm_be_dai_shutdown(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm; @@ -1225,7 +1220,7 @@ static int dpcm_fe_dai_shutdown(struct snd_pcm_substream *substream) return 0; } -static int dpcm_be_dai_hw_free(struct snd_soc_pcm_runtime *fe, int stream) +int dpcm_be_dai_hw_free(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm; @@ -1290,7 +1285,7 @@ static int dpcm_fe_dai_hw_free(struct snd_pcm_substream *substream) return 0; } -static int dpcm_be_dai_hw_params(struct snd_soc_pcm_runtime *fe, int stream) +int dpcm_be_dai_hw_params(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm; int ret; @@ -1420,7 +1415,7 @@ static int dpcm_do_trigger(struct snd_soc_dpcm *dpcm, return ret; } -static int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, +int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, int cmd) { struct snd_soc_dpcm *dpcm; @@ -1588,7 +1583,7 @@ static int dpcm_fe_dai_trigger(struct snd_pcm_substream *substream, int cmd) return ret; } -static int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream) +int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm; int ret = 0;