Merge branch 'linux-linaro-lsk' into linux-linaro-lsk-android

This commit is contained in:
Mark Brown 2014-06-16 22:28:02 +01:00
commit d3538e3017
49 changed files with 4956 additions and 131 deletions

View File

@ -1,10 +1,14 @@
* ARM architected timer
ARM cores may have a per-core architected timer, which provides per-cpu timers.
ARM cores may have a per-core architected timer, which provides per-cpu timers,
or a memory mapped architected timer, which provides up to 8 frames with a
physical and optional virtual timer per frame.
The timer is attached to a GIC to deliver its per-processor interrupts.
The per-core architected timer is attached to a GIC to deliver its
per-processor interrupts via PPIs. The memory mapped timer is attached to a GIC
to deliver its interrupts via SPIs.
** Timer node properties:
** CP15 Timer node properties:
- compatible : Should at least contain one of
"arm,armv7-timer"
@ -26,3 +30,52 @@ Example:
<1 10 0xf08>;
clock-frequency = <100000000>;
};
** Memory mapped timer node properties:
- compatible : Should at least contain "arm,armv7-timer-mem".
- clock-frequency : The frequency of the main counter, in Hz. Optional.
- reg : The control frame base address.
Note that #address-cells, #size-cells, and ranges shall be present to ensure
the CPU can address a frame's registers.
A timer node has up to 8 frame sub-nodes, each with the following properties:
- frame-number: 0 to 7.
- interrupts : Interrupt list for physical and virtual timers in that order.
The virtual timer interrupt is optional.
- reg : The first and second view base addresses in that order. The second view
base address is optional.
- status : "disabled" indicates the frame is not available for use. Optional.
Example:
timer@f0000000 {
compatible = "arm,armv7-timer-mem";
#address-cells = <1>;
#size-cells = <1>;
ranges;
reg = <0xf0000000 0x1000>;
clock-frequency = <50000000>;
frame@f0001000 {
frame-number = <0>
interrupts = <0 13 0x8>,
<0 14 0x8>;
reg = <0xf0001000 0x1000>,
<0xf0002000 0x1000>;
};
frame@f0003000 {
frame-number = <1>
interrupts = <0 15 0x8>;
reg = <0xf0003000 0x1000>;
status = "disabled";
};
};

View File

@ -0,0 +1,33 @@
* Generic Mailbox Controller and client driver bindings
Generic binding to provide a way for Mailbox controller drivers to
assign appropriate mailbox channel to client drivers.
* Mailbox Controller
Required property:
- #mbox-cells: Must be at least 1. Number of cells in a mailbox
specifier.
Example:
mailbox: mailbox {
...
#mbox-cells = <1>;
};
* Mailbox Client
Required property:
- mbox: List of phandle and mailbox channel specifier.
- mbox-names: List of identifier strings for each mailbox channel
required by the client.
Example:
pwr_cntrl: power {
...
mbox-names = "pwr-ctrl", "rpc";
mbox = <&mailbox 0
&mailbox 1>;
};

View File

@ -5152,6 +5152,14 @@ S: Maintained
F: drivers/net/macvlan.c
F: include/linux/if_macvlan.h
MAILBOX API
M: Jassi Brar <jassisinghbrar@gmail.com>
L: linux-kernel@vger.kernel.org
S: Maintained
F: drivers/mailbox/
F: include/linux/mailbox_client.h
F: include/linux/mailbox_controller.h
MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
M: Michael Kerrisk <mtk.manpages@gmail.com>
W: http://www.kernel.org/doc/man-pages

View File

@ -17,7 +17,8 @@ int arch_timer_arch_init(void);
* nicely work out which register we want, and chuck away the rest of
* the code. At least it does so with a recent GCC (4.6.3).
*/
static inline void arch_timer_reg_write(const int access, const int reg, u32 val)
static __always_inline
void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
{
if (access == ARCH_TIMER_PHYS_ACCESS) {
switch (reg) {
@ -28,9 +29,7 @@ static inline void arch_timer_reg_write(const int access, const int reg, u32 val
asm volatile("mcr p15, 0, %0, c14, c2, 0" : : "r" (val));
break;
}
}
if (access == ARCH_TIMER_VIRT_ACCESS) {
} else if (access == ARCH_TIMER_VIRT_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
asm volatile("mcr p15, 0, %0, c14, c3, 1" : : "r" (val));
@ -44,7 +43,8 @@ static inline void arch_timer_reg_write(const int access, const int reg, u32 val
isb();
}
static inline u32 arch_timer_reg_read(const int access, const int reg)
static __always_inline
u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
{
u32 val = 0;
@ -57,9 +57,7 @@ static inline u32 arch_timer_reg_read(const int access, const int reg)
asm volatile("mrc p15, 0, %0, c14, c2, 0" : "=r" (val));
break;
}
}
if (access == ARCH_TIMER_VIRT_ACCESS) {
} else if (access == ARCH_TIMER_VIRT_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
asm volatile("mrc p15, 0, %0, c14, c3, 1" : "=r" (val));

View File

@ -21,6 +21,7 @@
#include <linux/irq.h>
#include <linux/irqchip.h>
#include <linux/irqdomain.h>
#include <linux/pl320-ipc.h>
#include <linux/of.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>

View File

@ -17,6 +17,7 @@ config ARM64
select DCACHE_WORD_ACCESS
select GENERIC_CLOCKEVENTS
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
select GENERIC_CPU_AUTOPROBE
select GENERIC_EARLY_IOREMAP
select GENERIC_IOMAP
select GENERIC_IRQ_PROBE
@ -461,5 +462,8 @@ source "arch/arm64/Kconfig.debug"
source "security/Kconfig"
source "crypto/Kconfig"
if CRYPTO
source "arch/arm64/crypto/Kconfig"
endif
source "lib/Kconfig"

View File

@ -43,6 +43,7 @@ TEXT_OFFSET := 0x00080000
export TEXT_OFFSET GZFLAGS
core-y += arch/arm64/kernel/ arch/arm64/mm/
core-$(CONFIG_CRYPTO) += arch/arm64/crypto/
libs-y := arch/arm64/lib/ $(libs-y)
libs-y += $(LIBGCC)

View File

@ -54,7 +54,7 @@ aliases {
psci {
compatible = "arm,psci";
method = "smc";
cpu_suspend = <0xc4000001>;
cpu_suspend = <0x84000001>;
cpu_off = <0x84000002>;
cpu_on = <0xc4000003>;
};
@ -63,12 +63,33 @@ cpus {
#address-cells = <2>;
#size-cells = <0>;
idle-states {
entry-method = "arm,psci";
CPU_SLEEP_0: cpu-sleep-0 {
compatible = "arm,idle-state";
entry-method-param = <0x0010000>;
entry-latency-us = <40>;
exit-latency-us = <100>;
min-residency-us = <150>;
};
CLUSTER_SLEEP_0: cluster-sleep-0 {
compatible = "arm,idle-state";
entry-method-param = <0x1010000>;
entry-latency-us = <500>;
exit-latency-us = <1000>;
min-residency-us = <2500>;
};
};
big0: cpu@0 {
device_type = "cpu";
compatible = "arm,cortex-a57", "arm,armv8";
reg = <0x0 0x0>;
enable-method = "psci";
clock-frequency = <1000000>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
};
big1: cpu@1 {
device_type = "cpu";
@ -76,6 +97,7 @@ big1: cpu@1 {
reg = <0x0 0x1>;
enable-method = "psci";
clock-frequency = <1000000>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
};
big2: cpu@2 {
device_type = "cpu";
@ -83,6 +105,7 @@ big2: cpu@2 {
reg = <0x0 0x2>;
enable-method = "psci";
clock-frequency = <1000000>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
};
big3: cpu@3 {
device_type = "cpu";
@ -90,6 +113,7 @@ big3: cpu@3 {
reg = <0x0 0x3>;
enable-method = "psci";
clock-frequency = <1000000>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
};
little0: cpu@100 {
device_type = "cpu";
@ -97,6 +121,7 @@ little0: cpu@100 {
reg = <0x0 0x100>;
enable-method = "psci";
clock-frequency = <1000000>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
};
little1: cpu@101 {
device_type = "cpu";
@ -104,6 +129,7 @@ little1: cpu@101 {
reg = <0x0 0x101>;
enable-method = "psci";
clock-frequency = <1000000>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
};
little2: cpu@102 {
device_type = "cpu";
@ -111,6 +137,7 @@ little2: cpu@102 {
reg = <0x0 0x102>;
enable-method = "psci";
clock-frequency = <1000000>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
};
little3: cpu@103 {
device_type = "cpu";
@ -118,6 +145,7 @@ little3: cpu@103 {
reg = <0x0 0x103>;
enable-method = "psci";
clock-frequency = <1000000>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
};
cpu-map {

View File

@ -68,7 +68,7 @@ cpu@1 {
memory@80000000 {
device_type = "memory";
reg = <0x00000000 0x80000000 0x0 0x80000000>,
reg = <0x00000000 0x80000000 0x0 0x7f000000>,
<0x00000008 0x80000000 0x1 0x80000000>;
};

53
arch/arm64/crypto/Kconfig Normal file
View File

@ -0,0 +1,53 @@
menuconfig ARM64_CRYPTO
bool "ARM64 Accelerated Cryptographic Algorithms"
depends on ARM64
help
Say Y here to choose from a selection of cryptographic algorithms
implemented using ARM64 specific CPU features or instructions.
if ARM64_CRYPTO
config CRYPTO_SHA1_ARM64_CE
tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
depends on ARM64 && KERNEL_MODE_NEON
select CRYPTO_HASH
config CRYPTO_SHA2_ARM64_CE
tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
depends on ARM64 && KERNEL_MODE_NEON
select CRYPTO_HASH
config CRYPTO_GHASH_ARM64_CE
tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
depends on ARM64 && KERNEL_MODE_NEON
select CRYPTO_HASH
config CRYPTO_AES_ARM64_CE
tristate "AES core cipher using ARMv8 Crypto Extensions"
depends on ARM64 && KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_AES
config CRYPTO_AES_ARM64_CE_CCM
tristate "AES in CCM mode using ARMv8 Crypto Extensions"
depends on ARM64 && KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_AES
select CRYPTO_AEAD
config CRYPTO_AES_ARM64_CE_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
depends on ARM64 && KERNEL_MODE_NEON
select CRYPTO_BLKCIPHER
select CRYPTO_AES
select CRYPTO_ABLK_HELPER
config CRYPTO_AES_ARM64_NEON_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
depends on ARM64 && KERNEL_MODE_NEON
select CRYPTO_BLKCIPHER
select CRYPTO_AES
select CRYPTO_ABLK_HELPER
endif

View File

@ -0,0 +1,38 @@
#
# linux/arch/arm64/crypto/Makefile
#
# Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o
sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o
sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
obj-$(CONFIG_CRYPTO_AES_ARM64_CE_BLK) += aes-ce-blk.o
aes-ce-blk-y := aes-glue-ce.o aes-ce.o
obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
aes-neon-blk-y := aes-glue-neon.o aes-neon.o
AFLAGS_aes-ce.o := -DINTERLEAVE=2 -DINTERLEAVE_INLINE
AFLAGS_aes-neon.o := -DINTERLEAVE=4
CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS
$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
$(call if_changed_dep,cc_o_c)

View File

@ -0,0 +1,222 @@
/*
* aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
*
* Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
.text
.arch armv8-a+crypto
/*
* void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
* u32 *macp, u8 const rk[], u32 rounds);
*/
ENTRY(ce_aes_ccm_auth_data)
ldr w8, [x3] /* leftover from prev round? */
ld1 {v0.2d}, [x0] /* load mac */
cbz w8, 1f
sub w8, w8, #16
eor v1.16b, v1.16b, v1.16b
0: ldrb w7, [x1], #1 /* get 1 byte of input */
subs w2, w2, #1
add w8, w8, #1
ins v1.b[0], w7
ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
beq 8f /* out of input? */
cbnz w8, 0b
eor v0.16b, v0.16b, v1.16b
1: ld1 {v3.2d}, [x4] /* load first round key */
prfm pldl1strm, [x1]
cmp w5, #12 /* which key size? */
add x6, x4, #16
sub w7, w5, #2 /* modified # of rounds */
bmi 2f
bne 5f
mov v5.16b, v3.16b
b 4f
2: mov v4.16b, v3.16b
ld1 {v5.2d}, [x6], #16 /* load 2nd round key */
3: aese v0.16b, v4.16b
aesmc v0.16b, v0.16b
4: ld1 {v3.2d}, [x6], #16 /* load next round key */
aese v0.16b, v5.16b
aesmc v0.16b, v0.16b
5: ld1 {v4.2d}, [x6], #16 /* load next round key */
subs w7, w7, #3
aese v0.16b, v3.16b
aesmc v0.16b, v0.16b
ld1 {v5.2d}, [x6], #16 /* load next round key */
bpl 3b
aese v0.16b, v4.16b
subs w2, w2, #16 /* last data? */
eor v0.16b, v0.16b, v5.16b /* final round */
bmi 6f
ld1 {v1.16b}, [x1], #16 /* load next input block */
eor v0.16b, v0.16b, v1.16b /* xor with mac */
bne 1b
6: st1 {v0.2d}, [x0] /* store mac */
beq 10f
adds w2, w2, #16
beq 10f
mov w8, w2
7: ldrb w7, [x1], #1
umov w6, v0.b[0]
eor w6, w6, w7
strb w6, [x0], #1
subs w2, w2, #1
beq 10f
ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
b 7b
8: mov w7, w8
add w8, w8, #16
9: ext v1.16b, v1.16b, v1.16b, #1
adds w7, w7, #1
bne 9b
eor v0.16b, v0.16b, v1.16b
st1 {v0.2d}, [x0]
10: str w8, [x3]
ret
ENDPROC(ce_aes_ccm_auth_data)
/*
* void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
* u32 rounds);
*/
ENTRY(ce_aes_ccm_final)
ld1 {v3.2d}, [x2], #16 /* load first round key */
ld1 {v0.2d}, [x0] /* load mac */
cmp w3, #12 /* which key size? */
sub w3, w3, #2 /* modified # of rounds */
ld1 {v1.2d}, [x1] /* load 1st ctriv */
bmi 0f
bne 3f
mov v5.16b, v3.16b
b 2f
0: mov v4.16b, v3.16b
1: ld1 {v5.2d}, [x2], #16 /* load next round key */
aese v0.16b, v4.16b
aese v1.16b, v4.16b
aesmc v0.16b, v0.16b
aesmc v1.16b, v1.16b
2: ld1 {v3.2d}, [x2], #16 /* load next round key */
aese v0.16b, v5.16b
aese v1.16b, v5.16b
aesmc v0.16b, v0.16b
aesmc v1.16b, v1.16b
3: ld1 {v4.2d}, [x2], #16 /* load next round key */
subs w3, w3, #3
aese v0.16b, v3.16b
aese v1.16b, v3.16b
aesmc v0.16b, v0.16b
aesmc v1.16b, v1.16b
bpl 1b
aese v0.16b, v4.16b
aese v1.16b, v4.16b
/* final round key cancels out */
eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
st1 {v0.2d}, [x0] /* store result */
ret
ENDPROC(ce_aes_ccm_final)
.macro aes_ccm_do_crypt,enc
ldr x8, [x6, #8] /* load lower ctr */
ld1 {v0.2d}, [x5] /* load mac */
rev x8, x8 /* keep swabbed ctr in reg */
0: /* outer loop */
ld1 {v1.1d}, [x6] /* load upper ctr */
prfm pldl1strm, [x1]
add x8, x8, #1
rev x9, x8
cmp w4, #12 /* which key size? */
sub w7, w4, #2 /* get modified # of rounds */
ins v1.d[1], x9 /* no carry in lower ctr */
ld1 {v3.2d}, [x3] /* load first round key */
add x10, x3, #16
bmi 1f
bne 4f
mov v5.16b, v3.16b
b 3f
1: mov v4.16b, v3.16b
ld1 {v5.2d}, [x10], #16 /* load 2nd round key */
2: /* inner loop: 3 rounds, 2x interleaved */
aese v0.16b, v4.16b
aese v1.16b, v4.16b
aesmc v0.16b, v0.16b
aesmc v1.16b, v1.16b
3: ld1 {v3.2d}, [x10], #16 /* load next round key */
aese v0.16b, v5.16b
aese v1.16b, v5.16b
aesmc v0.16b, v0.16b
aesmc v1.16b, v1.16b
4: ld1 {v4.2d}, [x10], #16 /* load next round key */
subs w7, w7, #3
aese v0.16b, v3.16b
aese v1.16b, v3.16b
aesmc v0.16b, v0.16b
aesmc v1.16b, v1.16b
ld1 {v5.2d}, [x10], #16 /* load next round key */
bpl 2b
aese v0.16b, v4.16b
aese v1.16b, v4.16b
subs w2, w2, #16
bmi 6f /* partial block? */
ld1 {v2.16b}, [x1], #16 /* load next input block */
.if \enc == 1
eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
.else
eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
eor v1.16b, v2.16b, v5.16b /* final round enc */
.endif
eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
st1 {v1.16b}, [x0], #16 /* write output block */
bne 0b
rev x8, x8
st1 {v0.2d}, [x5] /* store mac */
str x8, [x6, #8] /* store lsb end of ctr (BE) */
5: ret
6: eor v0.16b, v0.16b, v5.16b /* final round mac */
eor v1.16b, v1.16b, v5.16b /* final round enc */
st1 {v0.2d}, [x5] /* store mac */
add w2, w2, #16 /* process partial tail block */
7: ldrb w9, [x1], #1 /* get 1 byte of input */
umov w6, v1.b[0] /* get top crypted ctr byte */
umov w7, v0.b[0] /* get top mac byte */
.if \enc == 1
eor w7, w7, w9
eor w9, w9, w6
.else
eor w9, w9, w6
eor w7, w7, w9
.endif
strb w9, [x0], #1 /* store out byte */
strb w7, [x5], #1 /* store mac byte */
subs w2, w2, #1
beq 5b
ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
b 7b
.endm
/*
* void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
* u8 const rk[], u32 rounds, u8 mac[],
* u8 ctr[]);
* void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
* u8 const rk[], u32 rounds, u8 mac[],
* u8 ctr[]);
*/
ENTRY(ce_aes_ccm_encrypt)
aes_ccm_do_crypt 1
ENDPROC(ce_aes_ccm_encrypt)
ENTRY(ce_aes_ccm_decrypt)
aes_ccm_do_crypt 0
ENDPROC(ce_aes_ccm_decrypt)

View File

@ -0,0 +1,297 @@
/*
* aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
*
* Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/neon.h>
#include <asm/unaligned.h>
#include <crypto/aes.h>
#include <crypto/algapi.h>
#include <crypto/scatterwalk.h>
#include <linux/crypto.h>
#include <linux/module.h>
static int num_rounds(struct crypto_aes_ctx *ctx)
{
/*
* # of rounds specified by AES:
* 128 bit key 10 rounds
* 192 bit key 12 rounds
* 256 bit key 14 rounds
* => n byte key => 6 + (n/4) rounds
*/
return 6 + ctx->key_length / 4;
}
asmlinkage void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
u32 *macp, u32 const rk[], u32 rounds);
asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
u32 const rk[], u32 rounds, u8 mac[],
u8 ctr[]);
asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
u32 const rk[], u32 rounds, u8 mac[],
u8 ctr[]);
asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
u32 rounds);
static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
unsigned int key_len)
{
struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm);
int ret;
ret = crypto_aes_expand_key(ctx, in_key, key_len);
if (!ret)
return 0;
tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
}
static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
{
if ((authsize & 1) || authsize < 4)
return -EINVAL;
return 0;
}
static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
__be32 *n = (__be32 *)&maciv[AES_BLOCK_SIZE - 8];
u32 l = req->iv[0] + 1;
/* verify that CCM dimension 'L' is set correctly in the IV */
if (l < 2 || l > 8)
return -EINVAL;
/* verify that msglen can in fact be represented in L bytes */
if (l < 4 && msglen >> (8 * l))
return -EOVERFLOW;
/*
* Even if the CCM spec allows L values of up to 8, the Linux cryptoapi
* uses a u32 type to represent msglen so the top 4 bytes are always 0.
*/
n[0] = 0;
n[1] = cpu_to_be32(msglen);
memcpy(maciv, req->iv, AES_BLOCK_SIZE - l);
/*
* Meaning of byte 0 according to CCM spec (RFC 3610/NIST 800-38C)
* - bits 0..2 : max # of bytes required to represent msglen, minus 1
* (already set by caller)
* - bits 3..5 : size of auth tag (1 => 4 bytes, 2 => 6 bytes, etc)
* - bit 6 : indicates presence of authenticate-only data
*/
maciv[0] |= (crypto_aead_authsize(aead) - 2) << 2;
if (req->assoclen)
maciv[0] |= 0x40;
memset(&req->iv[AES_BLOCK_SIZE - l], 0, l);
return 0;
}
static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
struct __packed { __be16 l; __be32 h; u16 len; } ltag;
struct scatter_walk walk;
u32 len = req->assoclen;
u32 macp = 0;
/* prepend the AAD with a length tag */
if (len < 0xff00) {
ltag.l = cpu_to_be16(len);
ltag.len = 2;
} else {
ltag.l = cpu_to_be16(0xfffe);
put_unaligned_be32(len, &ltag.h);
ltag.len = 6;
}
ce_aes_ccm_auth_data(mac, (u8 *)&ltag, ltag.len, &macp, ctx->key_enc,
num_rounds(ctx));
scatterwalk_start(&walk, req->assoc);
do {
u32 n = scatterwalk_clamp(&walk, len);
u8 *p;
if (!n) {
scatterwalk_start(&walk, sg_next(walk.sg));
n = scatterwalk_clamp(&walk, len);
}
p = scatterwalk_map(&walk);
ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc,
num_rounds(ctx));
len -= n;
scatterwalk_unmap(p);
scatterwalk_advance(&walk, n);
scatterwalk_done(&walk, 0, len);
} while (len);
}
static int ccm_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
struct blkcipher_desc desc = { .info = req->iv };
struct blkcipher_walk walk;
u8 __aligned(8) mac[AES_BLOCK_SIZE];
u8 buf[AES_BLOCK_SIZE];
u32 len = req->cryptlen;
int err;
err = ccm_init_mac(req, mac, len);
if (err)
return err;
kernel_neon_begin_partial(6);
if (req->assoclen)
ccm_calculate_auth_mac(req, mac);
/* preserve the original iv for the final round */
memcpy(buf, req->iv, AES_BLOCK_SIZE);
blkcipher_walk_init(&walk, req->dst, req->src, len);
err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
AES_BLOCK_SIZE);
while (walk.nbytes) {
u32 tail = walk.nbytes % AES_BLOCK_SIZE;
if (walk.nbytes == len)
tail = 0;
ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
walk.nbytes - tail, ctx->key_enc,
num_rounds(ctx), mac, walk.iv);
len -= walk.nbytes - tail;
err = blkcipher_walk_done(&desc, &walk, tail);
}
if (!err)
ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
kernel_neon_end();
if (err)
return err;
/* copy authtag to end of dst */
scatterwalk_map_and_copy(mac, req->dst, req->cryptlen,
crypto_aead_authsize(aead), 1);
return 0;
}
static int ccm_decrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
unsigned int authsize = crypto_aead_authsize(aead);
struct blkcipher_desc desc = { .info = req->iv };
struct blkcipher_walk walk;
u8 __aligned(8) mac[AES_BLOCK_SIZE];
u8 buf[AES_BLOCK_SIZE];
u32 len = req->cryptlen - authsize;
int err;
err = ccm_init_mac(req, mac, len);
if (err)
return err;
kernel_neon_begin_partial(6);
if (req->assoclen)
ccm_calculate_auth_mac(req, mac);
/* preserve the original iv for the final round */
memcpy(buf, req->iv, AES_BLOCK_SIZE);
blkcipher_walk_init(&walk, req->dst, req->src, len);
err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
AES_BLOCK_SIZE);
while (walk.nbytes) {
u32 tail = walk.nbytes % AES_BLOCK_SIZE;
if (walk.nbytes == len)
tail = 0;
ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
walk.nbytes - tail, ctx->key_enc,
num_rounds(ctx), mac, walk.iv);
len -= walk.nbytes - tail;
err = blkcipher_walk_done(&desc, &walk, tail);
}
if (!err)
ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
kernel_neon_end();
if (err)
return err;
/* compare calculated auth tag with the stored one */
scatterwalk_map_and_copy(buf, req->src, req->cryptlen - authsize,
authsize, 0);
if (memcmp(mac, buf, authsize))
return -EBADMSG;
return 0;
}
static struct crypto_alg ccm_aes_alg = {
.cra_name = "ccm(aes)",
.cra_driver_name = "ccm-aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_AEAD,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_aead_type,
.cra_module = THIS_MODULE,
.cra_aead = {
.ivsize = AES_BLOCK_SIZE,
.maxauthsize = AES_BLOCK_SIZE,
.setkey = ccm_setkey,
.setauthsize = ccm_setauthsize,
.encrypt = ccm_encrypt,
.decrypt = ccm_decrypt,
}
};
static int __init aes_mod_init(void)
{
if (!(elf_hwcap & HWCAP_AES))
return -ENODEV;
return crypto_register_alg(&ccm_aes_alg);
}
static void __exit aes_mod_exit(void)
{
crypto_unregister_alg(&ccm_aes_alg);
}
module_init(aes_mod_init);
module_exit(aes_mod_exit);
MODULE_DESCRIPTION("Synchronous AES in CCM mode using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("ccm(aes)");

View File

@ -0,0 +1,155 @@
/*
* aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions
*
* Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/neon.h>
#include <crypto/aes.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
struct aes_block {
u8 b[AES_BLOCK_SIZE];
};
static int num_rounds(struct crypto_aes_ctx *ctx)
{
/*
* # of rounds specified by AES:
* 128 bit key 10 rounds
* 192 bit key 12 rounds
* 256 bit key 14 rounds
* => n byte key => 6 + (n/4) rounds
*/
return 6 + ctx->key_length / 4;
}
static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
struct aes_block *out = (struct aes_block *)dst;
struct aes_block const *in = (struct aes_block *)src;
void *dummy0;
int dummy1;
kernel_neon_begin_partial(4);
__asm__(" ld1 {v0.16b}, %[in] ;"
" ld1 {v1.2d}, [%[key]], #16 ;"
" cmp %w[rounds], #10 ;"
" bmi 0f ;"
" bne 3f ;"
" mov v3.16b, v1.16b ;"
" b 2f ;"
"0: mov v2.16b, v1.16b ;"
" ld1 {v3.2d}, [%[key]], #16 ;"
"1: aese v0.16b, v2.16b ;"
" aesmc v0.16b, v0.16b ;"
"2: ld1 {v1.2d}, [%[key]], #16 ;"
" aese v0.16b, v3.16b ;"
" aesmc v0.16b, v0.16b ;"
"3: ld1 {v2.2d}, [%[key]], #16 ;"
" subs %w[rounds], %w[rounds], #3 ;"
" aese v0.16b, v1.16b ;"
" aesmc v0.16b, v0.16b ;"
" ld1 {v3.2d}, [%[key]], #16 ;"
" bpl 1b ;"
" aese v0.16b, v2.16b ;"
" eor v0.16b, v0.16b, v3.16b ;"
" st1 {v0.16b}, %[out] ;"
: [out] "=Q"(*out),
[key] "=r"(dummy0),
[rounds] "=r"(dummy1)
: [in] "Q"(*in),
"1"(ctx->key_enc),
"2"(num_rounds(ctx) - 2)
: "cc");
kernel_neon_end();
}
static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
struct aes_block *out = (struct aes_block *)dst;
struct aes_block const *in = (struct aes_block *)src;
void *dummy0;
int dummy1;
kernel_neon_begin_partial(4);
__asm__(" ld1 {v0.16b}, %[in] ;"
" ld1 {v1.2d}, [%[key]], #16 ;"
" cmp %w[rounds], #10 ;"
" bmi 0f ;"
" bne 3f ;"
" mov v3.16b, v1.16b ;"
" b 2f ;"
"0: mov v2.16b, v1.16b ;"
" ld1 {v3.2d}, [%[key]], #16 ;"
"1: aesd v0.16b, v2.16b ;"
" aesimc v0.16b, v0.16b ;"
"2: ld1 {v1.2d}, [%[key]], #16 ;"
" aesd v0.16b, v3.16b ;"
" aesimc v0.16b, v0.16b ;"
"3: ld1 {v2.2d}, [%[key]], #16 ;"
" subs %w[rounds], %w[rounds], #3 ;"
" aesd v0.16b, v1.16b ;"
" aesimc v0.16b, v0.16b ;"
" ld1 {v3.2d}, [%[key]], #16 ;"
" bpl 1b ;"
" aesd v0.16b, v2.16b ;"
" eor v0.16b, v0.16b, v3.16b ;"
" st1 {v0.16b}, %[out] ;"
: [out] "=Q"(*out),
[key] "=r"(dummy0),
[rounds] "=r"(dummy1)
: [in] "Q"(*in),
"1"(ctx->key_dec),
"2"(num_rounds(ctx) - 2)
: "cc");
kernel_neon_end();
}
static struct crypto_alg aes_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_module = THIS_MODULE,
.cra_cipher = {
.cia_min_keysize = AES_MIN_KEY_SIZE,
.cia_max_keysize = AES_MAX_KEY_SIZE,
.cia_setkey = crypto_aes_set_key,
.cia_encrypt = aes_cipher_encrypt,
.cia_decrypt = aes_cipher_decrypt
}
};
static int __init aes_mod_init(void)
{
return crypto_register_alg(&aes_alg);
}
static void __exit aes_mod_exit(void)
{
crypto_unregister_alg(&aes_alg);
}
module_cpu_feature_match(AES, aes_mod_init);
module_exit(aes_mod_exit);

133
arch/arm64/crypto/aes-ce.S Normal file
View File

@ -0,0 +1,133 @@
/*
* linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
* Crypto Extensions
*
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#define AES_ENTRY(func) ENTRY(ce_ ## func)
#define AES_ENDPROC(func) ENDPROC(ce_ ## func)
.arch armv8-a+crypto
/* preload all round keys */
.macro load_round_keys, rounds, rk
cmp \rounds, #12
blo 2222f /* 128 bits */
beq 1111f /* 192 bits */
ld1 {v17.16b-v18.16b}, [\rk], #32
1111: ld1 {v19.16b-v20.16b}, [\rk], #32
2222: ld1 {v21.16b-v24.16b}, [\rk], #64
ld1 {v25.16b-v28.16b}, [\rk], #64
ld1 {v29.16b-v31.16b}, [\rk]
.endm
/* prepare for encryption with key in rk[] */
.macro enc_prepare, rounds, rk, ignore
load_round_keys \rounds, \rk
.endm
/* prepare for encryption (again) but with new key in rk[] */
.macro enc_switch_key, rounds, rk, ignore
load_round_keys \rounds, \rk
.endm
/* prepare for decryption with key in rk[] */
.macro dec_prepare, rounds, rk, ignore
load_round_keys \rounds, \rk
.endm
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3
aes\de \i0\().16b, \k\().16b
.ifnb \i1
aes\de \i1\().16b, \k\().16b
.ifnb \i3
aes\de \i2\().16b, \k\().16b
aes\de \i3\().16b, \k\().16b
.endif
.endif
aes\mc \i0\().16b, \i0\().16b
.ifnb \i1
aes\mc \i1\().16b, \i1\().16b
.ifnb \i3
aes\mc \i2\().16b, \i2\().16b
aes\mc \i3\().16b, \i3\().16b
.endif
.endif
.endm
/* up to 4 interleaved encryption rounds with the same round key */
.macro round_Nx, enc, k, i0, i1, i2, i3
.ifc \enc, e
do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3
.else
do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3
.endif
.endm
/* up to 4 interleaved final rounds */
.macro fin_round_Nx, de, k, k2, i0, i1, i2, i3
aes\de \i0\().16b, \k\().16b
.ifnb \i1
aes\de \i1\().16b, \k\().16b
.ifnb \i3
aes\de \i2\().16b, \k\().16b
aes\de \i3\().16b, \k\().16b
.endif
.endif
eor \i0\().16b, \i0\().16b, \k2\().16b
.ifnb \i1
eor \i1\().16b, \i1\().16b, \k2\().16b
.ifnb \i3
eor \i2\().16b, \i2\().16b, \k2\().16b
eor \i3\().16b, \i3\().16b, \k2\().16b
.endif
.endif
.endm
/* up to 4 interleaved blocks */
.macro do_block_Nx, enc, rounds, i0, i1, i2, i3
cmp \rounds, #12
blo 2222f /* 128 bits */
beq 1111f /* 192 bits */
round_Nx \enc, v17, \i0, \i1, \i2, \i3
round_Nx \enc, v18, \i0, \i1, \i2, \i3
1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3
round_Nx \enc, v20, \i0, \i1, \i2, \i3
2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
round_Nx \enc, \key, \i0, \i1, \i2, \i3
.endr
fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3
.endm
.macro encrypt_block, in, rounds, t0, t1, t2
do_block_Nx e, \rounds, \in
.endm
.macro encrypt_block2x, i0, i1, rounds, t0, t1, t2
do_block_Nx e, \rounds, \i0, \i1
.endm
.macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
do_block_Nx e, \rounds, \i0, \i1, \i2, \i3
.endm
.macro decrypt_block, in, rounds, t0, t1, t2
do_block_Nx d, \rounds, \in
.endm
.macro decrypt_block2x, i0, i1, rounds, t0, t1, t2
do_block_Nx d, \rounds, \i0, \i1
.endm
.macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
do_block_Nx d, \rounds, \i0, \i1, \i2, \i3
.endm
#include "aes-modes.S"

View File

@ -0,0 +1,446 @@
/*
* linux/arch/arm64/crypto/aes-glue.c - wrapper code for ARMv8 AES
*
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/neon.h>
#include <asm/hwcap.h>
#include <crypto/aes.h>
#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
#include <linux/module.h>
#include <linux/cpufeature.h>
#ifdef USE_V8_CRYPTO_EXTENSIONS
#define MODE "ce"
#define PRIO 300
#define aes_ecb_encrypt ce_aes_ecb_encrypt
#define aes_ecb_decrypt ce_aes_ecb_decrypt
#define aes_cbc_encrypt ce_aes_cbc_encrypt
#define aes_cbc_decrypt ce_aes_cbc_decrypt
#define aes_ctr_encrypt ce_aes_ctr_encrypt
#define aes_xts_encrypt ce_aes_xts_encrypt
#define aes_xts_decrypt ce_aes_xts_decrypt
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
#else
#define MODE "neon"
#define PRIO 200
#define aes_ecb_encrypt neon_aes_ecb_encrypt
#define aes_ecb_decrypt neon_aes_ecb_decrypt
#define aes_cbc_encrypt neon_aes_cbc_encrypt
#define aes_cbc_decrypt neon_aes_cbc_decrypt
#define aes_ctr_encrypt neon_aes_ctr_encrypt
#define aes_xts_encrypt neon_aes_xts_encrypt
#define aes_xts_decrypt neon_aes_xts_decrypt
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
MODULE_ALIAS("ecb(aes)");
MODULE_ALIAS("cbc(aes)");
MODULE_ALIAS("ctr(aes)");
MODULE_ALIAS("xts(aes)");
#endif
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
/* defined in aes-modes.S */
asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, int first);
asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, int first);
asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[], int first);
asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[], int first);
asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 ctr[], int first);
asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
int rounds, int blocks, u8 const rk2[], u8 iv[],
int first);
asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
int rounds, int blocks, u8 const rk2[], u8 iv[],
int first);
struct crypto_aes_xts_ctx {
struct crypto_aes_ctx key1;
struct crypto_aes_ctx __aligned(8) key2;
};
static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
int ret;
ret = crypto_aes_expand_key(&ctx->key1, in_key, key_len / 2);
if (!ret)
ret = crypto_aes_expand_key(&ctx->key2, &in_key[key_len / 2],
key_len / 2);
if (!ret)
return 0;
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
}
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
int err, first, rounds = 6 + ctx->key_length / 4;
struct blkcipher_walk walk;
unsigned int blocks;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
kernel_neon_begin();
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_enc, rounds, blocks, first);
err = blkcipher_walk_done(desc, &walk, 0);
}
kernel_neon_end();
return err;
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
int err, first, rounds = 6 + ctx->key_length / 4;
struct blkcipher_walk walk;
unsigned int blocks;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
kernel_neon_begin();
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_dec, rounds, blocks, first);
err = blkcipher_walk_done(desc, &walk, 0);
}
kernel_neon_end();
return err;
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
int err, first, rounds = 6 + ctx->key_length / 4;
struct blkcipher_walk walk;
unsigned int blocks;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
kernel_neon_begin();
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
first);
err = blkcipher_walk_done(desc, &walk, 0);
}
kernel_neon_end();
return err;
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
int err, first, rounds = 6 + ctx->key_length / 4;
struct blkcipher_walk walk;
unsigned int blocks;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
kernel_neon_begin();
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_dec, rounds, blocks, walk.iv,
first);
err = blkcipher_walk_done(desc, &walk, 0);
}
kernel_neon_end();
return err;
}
static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
int err, first, rounds = 6 + ctx->key_length / 4;
struct blkcipher_walk walk;
int blocks;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
first = 1;
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
first);
first = 0;
nbytes -= blocks * AES_BLOCK_SIZE;
if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
break;
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % AES_BLOCK_SIZE);
}
if (nbytes) {
u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
u8 __aligned(8) tail[AES_BLOCK_SIZE];
/*
* Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
* to tell aes_ctr_encrypt() to only read half a block.
*/
blocks = (nbytes <= 8) ? -1 : 1;
aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds,
blocks, walk.iv, first);
memcpy(tdst, tail, nbytes);
err = blkcipher_walk_done(desc, &walk, 0);
}
kernel_neon_end();
return err;
}
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
int err, first, rounds = 6 + ctx->key1.key_length / 4;
struct blkcipher_walk walk;
unsigned int blocks;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
kernel_neon_begin();
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key1.key_enc, rounds, blocks,
(u8 *)ctx->key2.key_enc, walk.iv, first);
err = blkcipher_walk_done(desc, &walk, 0);
}
kernel_neon_end();
return err;
}
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
int err, first, rounds = 6 + ctx->key1.key_length / 4;
struct blkcipher_walk walk;
unsigned int blocks;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
kernel_neon_begin();
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key1.key_dec, rounds, blocks,
(u8 *)ctx->key2.key_enc, walk.iv, first);
err = blkcipher_walk_done(desc, &walk, 0);
}
kernel_neon_end();
return err;
}
static struct crypto_alg aes_algs[] = { {
.cra_name = "__ecb-aes-" MODE,
.cra_driver_name = "__driver-ecb-aes-" MODE,
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = crypto_aes_set_key,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
}, {
.cra_name = "__cbc-aes-" MODE,
.cra_driver_name = "__driver-cbc-aes-" MODE,
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = crypto_aes_set_key,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
}, {
.cra_name = "__ctr-aes-" MODE,
.cra_driver_name = "__driver-ctr-aes-" MODE,
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = crypto_aes_set_key,
.encrypt = ctr_encrypt,
.decrypt = ctr_encrypt,
},
}, {
.cra_name = "__xts-aes-" MODE,
.cra_driver_name = "__driver-xts-aes-" MODE,
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_blkcipher = {
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = xts_set_key,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
},
}, {
.cra_name = "ecb(aes)",
.cra_driver_name = "ecb-aes-" MODE,
.cra_priority = PRIO,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
}
}, {
.cra_name = "cbc(aes)",
.cra_driver_name = "cbc-aes-" MODE,
.cra_priority = PRIO,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
}
}, {
.cra_name = "ctr(aes)",
.cra_driver_name = "ctr-aes-" MODE,
.cra_priority = PRIO,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
}
}, {
.cra_name = "xts(aes)",
.cra_driver_name = "xts-aes-" MODE,
.cra_priority = PRIO,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_ablkcipher = {
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
}
} };
static int __init aes_init(void)
{
return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
}
static void __exit aes_exit(void)
{
crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
}
#ifdef USE_V8_CRYPTO_EXTENSIONS
module_cpu_feature_match(AES, aes_init);
#else
module_init(aes_init);
#endif
module_exit(aes_exit);

View File

@ -0,0 +1,532 @@
/*
* linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
*
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
/* included by aes-ce.S and aes-neon.S */
.text
.align 4
/*
* There are several ways to instantiate this code:
* - no interleave, all inline
* - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
* - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
* - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
* - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
*
* Macros imported by this code:
* - enc_prepare - setup NEON registers for encryption
* - dec_prepare - setup NEON registers for decryption
* - enc_switch_key - change to new key after having prepared for encryption
* - encrypt_block - encrypt a single block
* - decrypt block - decrypt a single block
* - encrypt_block2x - encrypt 2 blocks in parallel (if INTERLEAVE == 2)
* - decrypt_block2x - decrypt 2 blocks in parallel (if INTERLEAVE == 2)
* - encrypt_block4x - encrypt 4 blocks in parallel (if INTERLEAVE == 4)
* - decrypt_block4x - decrypt 4 blocks in parallel (if INTERLEAVE == 4)
*/
#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
#define FRAME_PUSH stp x29, x30, [sp,#-16]! ; mov x29, sp
#define FRAME_POP ldp x29, x30, [sp],#16
#if INTERLEAVE == 2
aes_encrypt_block2x:
encrypt_block2x v0, v1, w3, x2, x6, w7
ret
ENDPROC(aes_encrypt_block2x)
aes_decrypt_block2x:
decrypt_block2x v0, v1, w3, x2, x6, w7
ret
ENDPROC(aes_decrypt_block2x)
#elif INTERLEAVE == 4
aes_encrypt_block4x:
encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
ret
ENDPROC(aes_encrypt_block4x)
aes_decrypt_block4x:
decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
ret
ENDPROC(aes_decrypt_block4x)
#else
#error INTERLEAVE should equal 2 or 4
#endif
.macro do_encrypt_block2x
bl aes_encrypt_block2x
.endm
.macro do_decrypt_block2x
bl aes_decrypt_block2x
.endm
.macro do_encrypt_block4x
bl aes_encrypt_block4x
.endm
.macro do_decrypt_block4x
bl aes_decrypt_block4x
.endm
#else
#define FRAME_PUSH
#define FRAME_POP
.macro do_encrypt_block2x
encrypt_block2x v0, v1, w3, x2, x6, w7
.endm
.macro do_decrypt_block2x
decrypt_block2x v0, v1, w3, x2, x6, w7
.endm
.macro do_encrypt_block4x
encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
.endm
.macro do_decrypt_block4x
decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
.endm
#endif
/*
* aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, int first)
* aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, int first)
*/
AES_ENTRY(aes_ecb_encrypt)
FRAME_PUSH
cbz w5, .LecbencloopNx
enc_prepare w3, x2, x5
.LecbencloopNx:
#if INTERLEAVE >= 2
subs w4, w4, #INTERLEAVE
bmi .Lecbenc1x
#if INTERLEAVE == 2
ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
do_encrypt_block2x
st1 {v0.16b-v1.16b}, [x0], #32
#else
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
do_encrypt_block4x
st1 {v0.16b-v3.16b}, [x0], #64
#endif
b .LecbencloopNx
.Lecbenc1x:
adds w4, w4, #INTERLEAVE
beq .Lecbencout
#endif
.Lecbencloop:
ld1 {v0.16b}, [x1], #16 /* get next pt block */
encrypt_block v0, w3, x2, x5, w6
st1 {v0.16b}, [x0], #16
subs w4, w4, #1
bne .Lecbencloop
.Lecbencout:
FRAME_POP
ret
AES_ENDPROC(aes_ecb_encrypt)
AES_ENTRY(aes_ecb_decrypt)
FRAME_PUSH
cbz w5, .LecbdecloopNx
dec_prepare w3, x2, x5
.LecbdecloopNx:
#if INTERLEAVE >= 2
subs w4, w4, #INTERLEAVE
bmi .Lecbdec1x
#if INTERLEAVE == 2
ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
do_decrypt_block2x
st1 {v0.16b-v1.16b}, [x0], #32
#else
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
do_decrypt_block4x
st1 {v0.16b-v3.16b}, [x0], #64
#endif
b .LecbdecloopNx
.Lecbdec1x:
adds w4, w4, #INTERLEAVE
beq .Lecbdecout
#endif
.Lecbdecloop:
ld1 {v0.16b}, [x1], #16 /* get next ct block */
decrypt_block v0, w3, x2, x5, w6
st1 {v0.16b}, [x0], #16
subs w4, w4, #1
bne .Lecbdecloop
.Lecbdecout:
FRAME_POP
ret
AES_ENDPROC(aes_ecb_decrypt)
/*
* aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 iv[], int first)
* aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 iv[], int first)
*/
AES_ENTRY(aes_cbc_encrypt)
cbz w6, .Lcbcencloop
ld1 {v0.16b}, [x5] /* get iv */
enc_prepare w3, x2, x5
.Lcbcencloop:
ld1 {v1.16b}, [x1], #16 /* get next pt block */
eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */
encrypt_block v0, w3, x2, x5, w6
st1 {v0.16b}, [x0], #16
subs w4, w4, #1
bne .Lcbcencloop
ret
AES_ENDPROC(aes_cbc_encrypt)
AES_ENTRY(aes_cbc_decrypt)
FRAME_PUSH
cbz w6, .LcbcdecloopNx
ld1 {v7.16b}, [x5] /* get iv */
dec_prepare w3, x2, x5
.LcbcdecloopNx:
#if INTERLEAVE >= 2
subs w4, w4, #INTERLEAVE
bmi .Lcbcdec1x
#if INTERLEAVE == 2
ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
mov v2.16b, v0.16b
mov v3.16b, v1.16b
do_decrypt_block2x
eor v0.16b, v0.16b, v7.16b
eor v1.16b, v1.16b, v2.16b
mov v7.16b, v3.16b
st1 {v0.16b-v1.16b}, [x0], #32
#else
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
mov v4.16b, v0.16b
mov v5.16b, v1.16b
mov v6.16b, v2.16b
do_decrypt_block4x
sub x1, x1, #16
eor v0.16b, v0.16b, v7.16b
eor v1.16b, v1.16b, v4.16b
ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
eor v2.16b, v2.16b, v5.16b
eor v3.16b, v3.16b, v6.16b
st1 {v0.16b-v3.16b}, [x0], #64
#endif
b .LcbcdecloopNx
.Lcbcdec1x:
adds w4, w4, #INTERLEAVE
beq .Lcbcdecout
#endif
.Lcbcdecloop:
ld1 {v1.16b}, [x1], #16 /* get next ct block */
mov v0.16b, v1.16b /* ...and copy to v0 */
decrypt_block v0, w3, x2, x5, w6
eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
mov v7.16b, v1.16b /* ct is next iv */
st1 {v0.16b}, [x0], #16
subs w4, w4, #1
bne .Lcbcdecloop
.Lcbcdecout:
FRAME_POP
ret
AES_ENDPROC(aes_cbc_decrypt)
/*
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 ctr[], int first)
*/
AES_ENTRY(aes_ctr_encrypt)
FRAME_PUSH
cbnz w6, .Lctrfirst /* 1st time around? */
umov x5, v4.d[1] /* keep swabbed ctr in reg */
rev x5, x5
#if INTERLEAVE >= 2
cmn w5, w4 /* 32 bit overflow? */
bcs .Lctrinc
add x5, x5, #1 /* increment BE ctr */
b .LctrincNx
#else
b .Lctrinc
#endif
.Lctrfirst:
enc_prepare w3, x2, x6
ld1 {v4.16b}, [x5]
umov x5, v4.d[1] /* keep swabbed ctr in reg */
rev x5, x5
#if INTERLEAVE >= 2
cmn w5, w4 /* 32 bit overflow? */
bcs .Lctrloop
.LctrloopNx:
subs w4, w4, #INTERLEAVE
bmi .Lctr1x
#if INTERLEAVE == 2
mov v0.8b, v4.8b
mov v1.8b, v4.8b
rev x7, x5
add x5, x5, #1
ins v0.d[1], x7
rev x7, x5
add x5, x5, #1
ins v1.d[1], x7
ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */
do_encrypt_block2x
eor v0.16b, v0.16b, v2.16b
eor v1.16b, v1.16b, v3.16b
st1 {v0.16b-v1.16b}, [x0], #32
#else
ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
dup v7.4s, w5
mov v0.16b, v4.16b
add v7.4s, v7.4s, v8.4s
mov v1.16b, v4.16b
rev32 v8.16b, v7.16b
mov v2.16b, v4.16b
mov v3.16b, v4.16b
mov v1.s[3], v8.s[0]
mov v2.s[3], v8.s[1]
mov v3.s[3], v8.s[2]
ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
do_encrypt_block4x
eor v0.16b, v5.16b, v0.16b
ld1 {v5.16b}, [x1], #16 /* get 1 input block */
eor v1.16b, v6.16b, v1.16b
eor v2.16b, v7.16b, v2.16b
eor v3.16b, v5.16b, v3.16b
st1 {v0.16b-v3.16b}, [x0], #64
add x5, x5, #INTERLEAVE
#endif
cbz w4, .LctroutNx
.LctrincNx:
rev x7, x5
ins v4.d[1], x7
b .LctrloopNx
.LctroutNx:
sub x5, x5, #1
rev x7, x5
ins v4.d[1], x7
b .Lctrout
.Lctr1x:
adds w4, w4, #INTERLEAVE
beq .Lctrout
#endif
.Lctrloop:
mov v0.16b, v4.16b
encrypt_block v0, w3, x2, x6, w7
subs w4, w4, #1
bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */
ld1 {v3.16b}, [x1], #16
eor v3.16b, v0.16b, v3.16b
st1 {v3.16b}, [x0], #16
beq .Lctrout
.Lctrinc:
adds x5, x5, #1 /* increment BE ctr */
rev x7, x5
ins v4.d[1], x7
bcc .Lctrloop /* no overflow? */
umov x7, v4.d[0] /* load upper word of ctr */
rev x7, x7 /* ... to handle the carry */
add x7, x7, #1
rev x7, x7
ins v4.d[0], x7
b .Lctrloop
.Lctrhalfblock:
ld1 {v3.8b}, [x1]
eor v3.8b, v0.8b, v3.8b
st1 {v3.8b}, [x0]
.Lctrout:
FRAME_POP
ret
AES_ENDPROC(aes_ctr_encrypt)
.ltorg
/*
* aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
* int blocks, u8 const rk2[], u8 iv[], int first)
* aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
* int blocks, u8 const rk2[], u8 iv[], int first)
*/
.macro next_tweak, out, in, const, tmp
sshr \tmp\().2d, \in\().2d, #63
and \tmp\().16b, \tmp\().16b, \const\().16b
add \out\().2d, \in\().2d, \in\().2d
ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
eor \out\().16b, \out\().16b, \tmp\().16b
.endm
.Lxts_mul_x:
.word 1, 0, 0x87, 0
AES_ENTRY(aes_xts_encrypt)
FRAME_PUSH
cbz w7, .LxtsencloopNx
ld1 {v4.16b}, [x6]
enc_prepare w3, x5, x6
encrypt_block v4, w3, x5, x6, w7 /* first tweak */
enc_switch_key w3, x2, x6
ldr q7, .Lxts_mul_x
b .LxtsencNx
.LxtsencloopNx:
ldr q7, .Lxts_mul_x
next_tweak v4, v4, v7, v8
.LxtsencNx:
#if INTERLEAVE >= 2
subs w4, w4, #INTERLEAVE
bmi .Lxtsenc1x
#if INTERLEAVE == 2
ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
next_tweak v5, v4, v7, v8
eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b
do_encrypt_block2x
eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b
st1 {v0.16b-v1.16b}, [x0], #32
cbz w4, .LxtsencoutNx
next_tweak v4, v5, v7, v8
b .LxtsencNx
.LxtsencoutNx:
mov v4.16b, v5.16b
b .Lxtsencout
#else
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
next_tweak v5, v4, v7, v8
eor v0.16b, v0.16b, v4.16b
next_tweak v6, v5, v7, v8
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
next_tweak v7, v6, v7, v8
eor v3.16b, v3.16b, v7.16b
do_encrypt_block4x
eor v3.16b, v3.16b, v7.16b
eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
st1 {v0.16b-v3.16b}, [x0], #64
mov v4.16b, v7.16b
cbz w4, .Lxtsencout
b .LxtsencloopNx
#endif
.Lxtsenc1x:
adds w4, w4, #INTERLEAVE
beq .Lxtsencout
#endif
.Lxtsencloop:
ld1 {v1.16b}, [x1], #16
eor v0.16b, v1.16b, v4.16b
encrypt_block v0, w3, x2, x6, w7
eor v0.16b, v0.16b, v4.16b
st1 {v0.16b}, [x0], #16
subs w4, w4, #1
beq .Lxtsencout
next_tweak v4, v4, v7, v8
b .Lxtsencloop
.Lxtsencout:
FRAME_POP
ret
AES_ENDPROC(aes_xts_encrypt)
AES_ENTRY(aes_xts_decrypt)
FRAME_PUSH
cbz w7, .LxtsdecloopNx
ld1 {v4.16b}, [x6]
enc_prepare w3, x5, x6
encrypt_block v4, w3, x5, x6, w7 /* first tweak */
dec_prepare w3, x2, x6
ldr q7, .Lxts_mul_x
b .LxtsdecNx
.LxtsdecloopNx:
ldr q7, .Lxts_mul_x
next_tweak v4, v4, v7, v8
.LxtsdecNx:
#if INTERLEAVE >= 2
subs w4, w4, #INTERLEAVE
bmi .Lxtsdec1x
#if INTERLEAVE == 2
ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
next_tweak v5, v4, v7, v8
eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b
do_decrypt_block2x
eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b
st1 {v0.16b-v1.16b}, [x0], #32
cbz w4, .LxtsdecoutNx
next_tweak v4, v5, v7, v8
b .LxtsdecNx
.LxtsdecoutNx:
mov v4.16b, v5.16b
b .Lxtsdecout
#else
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
next_tweak v5, v4, v7, v8
eor v0.16b, v0.16b, v4.16b
next_tweak v6, v5, v7, v8
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
next_tweak v7, v6, v7, v8
eor v3.16b, v3.16b, v7.16b
do_decrypt_block4x
eor v3.16b, v3.16b, v7.16b
eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
st1 {v0.16b-v3.16b}, [x0], #64
mov v4.16b, v7.16b
cbz w4, .Lxtsdecout
b .LxtsdecloopNx
#endif
.Lxtsdec1x:
adds w4, w4, #INTERLEAVE
beq .Lxtsdecout
#endif
.Lxtsdecloop:
ld1 {v1.16b}, [x1], #16
eor v0.16b, v1.16b, v4.16b
decrypt_block v0, w3, x2, x6, w7
eor v0.16b, v0.16b, v4.16b
st1 {v0.16b}, [x0], #16
subs w4, w4, #1
beq .Lxtsdecout
next_tweak v4, v4, v7, v8
b .Lxtsdecloop
.Lxtsdecout:
FRAME_POP
ret
AES_ENDPROC(aes_xts_decrypt)

View File

@ -0,0 +1,382 @@
/*
* linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
*
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#define AES_ENTRY(func) ENTRY(neon_ ## func)
#define AES_ENDPROC(func) ENDPROC(neon_ ## func)
/* multiply by polynomial 'x' in GF(2^8) */
.macro mul_by_x, out, in, temp, const
sshr \temp, \in, #7
add \out, \in, \in
and \temp, \temp, \const
eor \out, \out, \temp
.endm
/* preload the entire Sbox */
.macro prepare, sbox, shiftrows, temp
adr \temp, \sbox
movi v12.16b, #0x40
ldr q13, \shiftrows
movi v14.16b, #0x1b
ld1 {v16.16b-v19.16b}, [\temp], #64
ld1 {v20.16b-v23.16b}, [\temp], #64
ld1 {v24.16b-v27.16b}, [\temp], #64
ld1 {v28.16b-v31.16b}, [\temp]
.endm
/* do preload for encryption */
.macro enc_prepare, ignore0, ignore1, temp
prepare .LForward_Sbox, .LForward_ShiftRows, \temp
.endm
.macro enc_switch_key, ignore0, ignore1, temp
/* do nothing */
.endm
/* do preload for decryption */
.macro dec_prepare, ignore0, ignore1, temp
prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp
.endm
/* apply SubBytes transformation using the the preloaded Sbox */
.macro sub_bytes, in
sub v9.16b, \in\().16b, v12.16b
tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
sub v10.16b, v9.16b, v12.16b
tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
sub v11.16b, v10.16b, v12.16b
tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
.endm
/* apply MixColumns transformation */
.macro mix_columns, in
mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b
rev32 v8.8h, \in\().8h
eor \in\().16b, v10.16b, \in\().16b
shl v9.4s, v8.4s, #24
shl v11.4s, \in\().4s, #24
sri v9.4s, v8.4s, #8
sri v11.4s, \in\().4s, #8
eor v9.16b, v9.16b, v8.16b
eor v10.16b, v10.16b, v9.16b
eor \in\().16b, v10.16b, v11.16b
.endm
/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
.macro inv_mix_columns, in
mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b
mul_by_x v11.16b, v11.16b, v10.16b, v14.16b
eor \in\().16b, \in\().16b, v11.16b
rev32 v11.8h, v11.8h
eor \in\().16b, \in\().16b, v11.16b
mix_columns \in
.endm
.macro do_block, enc, in, rounds, rk, rkp, i
ld1 {v15.16b}, [\rk]
add \rkp, \rk, #16
mov \i, \rounds
1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
sub_bytes \in
ld1 {v15.16b}, [\rkp], #16
subs \i, \i, #1
beq 2222f
.if \enc == 1
mix_columns \in
.else
inv_mix_columns \in
.endif
b 1111b
2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
.endm
.macro encrypt_block, in, rounds, rk, rkp, i
do_block 1, \in, \rounds, \rk, \rkp, \i
.endm
.macro decrypt_block, in, rounds, rk, rkp, i
do_block 0, \in, \rounds, \rk, \rkp, \i
.endm
/*
* Interleaved versions: functionally equivalent to the
* ones above, but applied to 2 or 4 AES states in parallel.
*/
.macro sub_bytes_2x, in0, in1
sub v8.16b, \in0\().16b, v12.16b
sub v9.16b, \in1\().16b, v12.16b
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
sub v10.16b, v8.16b, v12.16b
sub v11.16b, v9.16b, v12.16b
tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
sub v8.16b, v10.16b, v12.16b
sub v9.16b, v11.16b, v12.16b
tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
.endm
.macro sub_bytes_4x, in0, in1, in2, in3
sub v8.16b, \in0\().16b, v12.16b
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
sub v9.16b, \in1\().16b, v12.16b
tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
sub v10.16b, \in2\().16b, v12.16b
tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
sub v11.16b, \in3\().16b, v12.16b
tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
sub v8.16b, v8.16b, v12.16b
tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
sub v9.16b, v9.16b, v12.16b
tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
sub v10.16b, v10.16b, v12.16b
tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
sub v11.16b, v11.16b, v12.16b
tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
sub v8.16b, v8.16b, v12.16b
tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
sub v9.16b, v9.16b, v12.16b
tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
sub v10.16b, v10.16b, v12.16b
tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
sub v11.16b, v11.16b, v12.16b
tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
.endm
.macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
sshr \tmp0\().16b, \in0\().16b, #7
add \out0\().16b, \in0\().16b, \in0\().16b
sshr \tmp1\().16b, \in1\().16b, #7
and \tmp0\().16b, \tmp0\().16b, \const\().16b
add \out1\().16b, \in1\().16b, \in1\().16b
and \tmp1\().16b, \tmp1\().16b, \const\().16b
eor \out0\().16b, \out0\().16b, \tmp0\().16b
eor \out1\().16b, \out1\().16b, \tmp1\().16b
.endm
.macro mix_columns_2x, in0, in1
mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
rev32 v10.8h, \in0\().8h
rev32 v11.8h, \in1\().8h
eor \in0\().16b, v8.16b, \in0\().16b
eor \in1\().16b, v9.16b, \in1\().16b
shl v12.4s, v10.4s, #24
shl v13.4s, v11.4s, #24
eor v8.16b, v8.16b, v10.16b
sri v12.4s, v10.4s, #8
shl v10.4s, \in0\().4s, #24
eor v9.16b, v9.16b, v11.16b
sri v13.4s, v11.4s, #8
shl v11.4s, \in1\().4s, #24
sri v10.4s, \in0\().4s, #8
eor \in0\().16b, v8.16b, v12.16b
sri v11.4s, \in1\().4s, #8
eor \in1\().16b, v9.16b, v13.16b
eor \in0\().16b, v10.16b, \in0\().16b
eor \in1\().16b, v11.16b, \in1\().16b
.endm
.macro inv_mix_cols_2x, in0, in1
mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
mul_by_x_2x v8, v9, v8, v9, v10, v11, v14
eor \in0\().16b, \in0\().16b, v8.16b
eor \in1\().16b, \in1\().16b, v9.16b
rev32 v8.8h, v8.8h
rev32 v9.8h, v9.8h
eor \in0\().16b, \in0\().16b, v8.16b
eor \in1\().16b, \in1\().16b, v9.16b
mix_columns_2x \in0, \in1
.endm
.macro inv_mix_cols_4x, in0, in1, in2, in3
mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
mul_by_x_2x v10, v11, \in2, \in3, v12, v13, v14
mul_by_x_2x v8, v9, v8, v9, v12, v13, v14
mul_by_x_2x v10, v11, v10, v11, v12, v13, v14
eor \in0\().16b, \in0\().16b, v8.16b
eor \in1\().16b, \in1\().16b, v9.16b
eor \in2\().16b, \in2\().16b, v10.16b
eor \in3\().16b, \in3\().16b, v11.16b
rev32 v8.8h, v8.8h
rev32 v9.8h, v9.8h
rev32 v10.8h, v10.8h
rev32 v11.8h, v11.8h
eor \in0\().16b, \in0\().16b, v8.16b
eor \in1\().16b, \in1\().16b, v9.16b
eor \in2\().16b, \in2\().16b, v10.16b
eor \in3\().16b, \in3\().16b, v11.16b
mix_columns_2x \in0, \in1
mix_columns_2x \in2, \in3
.endm
.macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i
ld1 {v15.16b}, [\rk]
add \rkp, \rk, #16
mov \i, \rounds
1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
sub_bytes_2x \in0, \in1
tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
ld1 {v15.16b}, [\rkp], #16
subs \i, \i, #1
beq 2222f
.if \enc == 1
mix_columns_2x \in0, \in1
ldr q13, .LForward_ShiftRows
.else
inv_mix_cols_2x \in0, \in1
ldr q13, .LReverse_ShiftRows
.endif
movi v12.16b, #0x40
b 1111b
2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
.endm
.macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
ld1 {v15.16b}, [\rk]
add \rkp, \rk, #16
mov \i, \rounds
1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
sub_bytes_4x \in0, \in1, \in2, \in3
tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
ld1 {v15.16b}, [\rkp], #16
subs \i, \i, #1
beq 2222f
.if \enc == 1
mix_columns_2x \in0, \in1
mix_columns_2x \in2, \in3
ldr q13, .LForward_ShiftRows
.else
inv_mix_cols_4x \in0, \in1, \in2, \in3
ldr q13, .LReverse_ShiftRows
.endif
movi v12.16b, #0x40
b 1111b
2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
.endm
.macro encrypt_block2x, in0, in1, rounds, rk, rkp, i
do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i
.endm
.macro decrypt_block2x, in0, in1, rounds, rk, rkp, i
do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i
.endm
.macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
.endm
.macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
.endm
#include "aes-modes.S"
.text
.align 4
.LForward_ShiftRows:
.byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3
.byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb
.LReverse_ShiftRows:
.byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb
.byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3
.LForward_Sbox:
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
.LReverse_Sbox:
.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d

View File

@ -0,0 +1,95 @@
/*
* Accelerated GHASH implementation with ARMv8 PMULL instructions.
*
* Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* Based on arch/x86/crypto/ghash-pmullni-intel_asm.S
*
* Copyright (c) 2009 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
* Vinodh Gopal
* Erdinc Ozturk
* Deniz Karakoyunlu
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
DATA .req v0
SHASH .req v1
IN1 .req v2
T1 .req v2
T2 .req v3
T3 .req v4
VZR .req v5
.text
.arch armv8-a+crypto
/*
* void pmull_ghash_update(int blocks, u64 dg[], const char *src,
* struct ghash_key const *k, const char *head)
*/
ENTRY(pmull_ghash_update)
ld1 {DATA.16b}, [x1]
ld1 {SHASH.16b}, [x3]
eor VZR.16b, VZR.16b, VZR.16b
/* do the head block first, if supplied */
cbz x4, 0f
ld1 {IN1.2d}, [x4]
b 1f
0: ld1 {IN1.2d}, [x2], #16
sub w0, w0, #1
1: ext IN1.16b, IN1.16b, IN1.16b, #8
CPU_LE( rev64 IN1.16b, IN1.16b )
eor DATA.16b, DATA.16b, IN1.16b
/* multiply DATA by SHASH in GF(2^128) */
ext T2.16b, DATA.16b, DATA.16b, #8
ext T3.16b, SHASH.16b, SHASH.16b, #8
eor T2.16b, T2.16b, DATA.16b
eor T3.16b, T3.16b, SHASH.16b
pmull2 T1.1q, SHASH.2d, DATA.2d // a1 * b1
pmull DATA.1q, SHASH.1d, DATA.1d // a0 * b0
pmull T2.1q, T2.1d, T3.1d // (a1 + a0)(b1 + b0)
eor T2.16b, T2.16b, T1.16b // (a0 * b1) + (a1 * b0)
eor T2.16b, T2.16b, DATA.16b
ext T3.16b, VZR.16b, T2.16b, #8
ext T2.16b, T2.16b, VZR.16b, #8
eor DATA.16b, DATA.16b, T3.16b
eor T1.16b, T1.16b, T2.16b // <T1:DATA> is result of
// carry-less multiplication
/* first phase of the reduction */
shl T3.2d, DATA.2d, #1
eor T3.16b, T3.16b, DATA.16b
shl T3.2d, T3.2d, #5
eor T3.16b, T3.16b, DATA.16b
shl T3.2d, T3.2d, #57
ext T2.16b, VZR.16b, T3.16b, #8
ext T3.16b, T3.16b, VZR.16b, #8
eor DATA.16b, DATA.16b, T2.16b
eor T1.16b, T1.16b, T3.16b
/* second phase of the reduction */
ushr T2.2d, DATA.2d, #5
eor T2.16b, T2.16b, DATA.16b
ushr T2.2d, T2.2d, #1
eor T2.16b, T2.16b, DATA.16b
ushr T2.2d, T2.2d, #1
eor T1.16b, T1.16b, T2.16b
eor DATA.16b, DATA.16b, T1.16b
cbnz w0, 0b
st1 {DATA.16b}, [x1]
ret
ENDPROC(pmull_ghash_update)

View File

@ -0,0 +1,155 @@
/*
* Accelerated GHASH implementation with ARMv8 PMULL instructions.
*
* Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*/
#include <asm/neon.h>
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
struct ghash_key {
u64 a;
u64 b;
};
struct ghash_desc_ctx {
u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
u8 buf[GHASH_BLOCK_SIZE];
u32 count;
};
asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
struct ghash_key const *k, const char *head);
static int ghash_init(struct shash_desc *desc)
{
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
*ctx = (struct ghash_desc_ctx){};
return 0;
}
static int ghash_update(struct shash_desc *desc, const u8 *src,
unsigned int len)
{
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
ctx->count += len;
if ((partial + len) >= GHASH_BLOCK_SIZE) {
struct ghash_key *key = crypto_shash_ctx(desc->tfm);
int blocks;
if (partial) {
int p = GHASH_BLOCK_SIZE - partial;
memcpy(ctx->buf + partial, src, p);
src += p;
len -= p;
}
blocks = len / GHASH_BLOCK_SIZE;
len %= GHASH_BLOCK_SIZE;
kernel_neon_begin_partial(6);
pmull_ghash_update(blocks, ctx->digest, src, key,
partial ? ctx->buf : NULL);
kernel_neon_end();
src += blocks * GHASH_BLOCK_SIZE;
}
if (len)
memcpy(ctx->buf + partial, src, len);
return 0;
}
static int ghash_final(struct shash_desc *desc, u8 *dst)
{
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
if (partial) {
struct ghash_key *key = crypto_shash_ctx(desc->tfm);
memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
kernel_neon_begin_partial(6);
pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
kernel_neon_end();
}
put_unaligned_be64(ctx->digest[1], dst);
put_unaligned_be64(ctx->digest[0], dst + 8);
*ctx = (struct ghash_desc_ctx){};
return 0;
}
static int ghash_setkey(struct crypto_shash *tfm,
const u8 *inkey, unsigned int keylen)
{
struct ghash_key *key = crypto_shash_ctx(tfm);
u64 a, b;
if (keylen != GHASH_BLOCK_SIZE) {
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
/* perform multiplication by 'x' in GF(2^128) */
b = get_unaligned_be64(inkey);
a = get_unaligned_be64(inkey + 8);
key->a = (a << 1) | (b >> 63);
key->b = (b << 1) | (a >> 63);
if (b >> 63)
key->b ^= 0xc200000000000000UL;
return 0;
}
static struct shash_alg ghash_alg = {
.digestsize = GHASH_DIGEST_SIZE,
.init = ghash_init,
.update = ghash_update,
.final = ghash_final,
.setkey = ghash_setkey,
.descsize = sizeof(struct ghash_desc_ctx),
.base = {
.cra_name = "ghash",
.cra_driver_name = "ghash-ce",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct ghash_key),
.cra_module = THIS_MODULE,
},
};
static int __init ghash_ce_mod_init(void)
{
return crypto_register_shash(&ghash_alg);
}
static void __exit ghash_ce_mod_exit(void)
{
crypto_unregister_shash(&ghash_alg);
}
module_cpu_feature_match(PMULL, ghash_ce_mod_init);
module_exit(ghash_ce_mod_exit);

View File

@ -0,0 +1,153 @@
/*
* sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
*
* Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
.arch armv8-a+crypto
k0 .req v0
k1 .req v1
k2 .req v2
k3 .req v3
t0 .req v4
t1 .req v5
dga .req q6
dgav .req v6
dgb .req s7
dgbv .req v7
dg0q .req q12
dg0s .req s12
dg0v .req v12
dg1s .req s13
dg1v .req v13
dg2s .req s14
.macro add_only, op, ev, rc, s0, dg1
.ifc \ev, ev
add t1.4s, v\s0\().4s, \rc\().4s
sha1h dg2s, dg0s
.ifnb \dg1
sha1\op dg0q, \dg1, t0.4s
.else
sha1\op dg0q, dg1s, t0.4s
.endif
.else
.ifnb \s0
add t0.4s, v\s0\().4s, \rc\().4s
.endif
sha1h dg1s, dg0s
sha1\op dg0q, dg2s, t1.4s
.endif
.endm
.macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s
add_only \op, \ev, \rc, \s1, \dg1
sha1su1 v\s0\().4s, v\s3\().4s
.endm
/*
* The SHA1 round constants
*/
.align 4
.Lsha1_rcon:
.word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
/*
* void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
* u8 *head, long bytes)
*/
ENTRY(sha1_ce_transform)
/* load round constants */
adr x6, .Lsha1_rcon
ld1r {k0.4s}, [x6], #4
ld1r {k1.4s}, [x6], #4
ld1r {k2.4s}, [x6], #4
ld1r {k3.4s}, [x6]
/* load state */
ldr dga, [x2]
ldr dgb, [x2, #16]
/* load partial state (if supplied) */
cbz x3, 0f
ld1 {v8.4s-v11.4s}, [x3]
b 1f
/* load input */
0: ld1 {v8.4s-v11.4s}, [x1], #64
sub w0, w0, #1
1:
CPU_LE( rev32 v8.16b, v8.16b )
CPU_LE( rev32 v9.16b, v9.16b )
CPU_LE( rev32 v10.16b, v10.16b )
CPU_LE( rev32 v11.16b, v11.16b )
2: add t0.4s, v8.4s, k0.4s
mov dg0v.16b, dgav.16b
add_update c, ev, k0, 8, 9, 10, 11, dgb
add_update c, od, k0, 9, 10, 11, 8
add_update c, ev, k0, 10, 11, 8, 9
add_update c, od, k0, 11, 8, 9, 10
add_update c, ev, k1, 8, 9, 10, 11
add_update p, od, k1, 9, 10, 11, 8
add_update p, ev, k1, 10, 11, 8, 9
add_update p, od, k1, 11, 8, 9, 10
add_update p, ev, k1, 8, 9, 10, 11
add_update p, od, k2, 9, 10, 11, 8
add_update m, ev, k2, 10, 11, 8, 9
add_update m, od, k2, 11, 8, 9, 10
add_update m, ev, k2, 8, 9, 10, 11
add_update m, od, k2, 9, 10, 11, 8
add_update m, ev, k3, 10, 11, 8, 9
add_update p, od, k3, 11, 8, 9, 10
add_only p, ev, k3, 9
add_only p, od, k3, 10
add_only p, ev, k3, 11
add_only p, od
/* update state */
add dgbv.2s, dgbv.2s, dg1v.2s
add dgav.4s, dgav.4s, dg0v.4s
cbnz w0, 0b
/*
* Final block: add padding and total bit count.
* Skip if we have no total byte count in x4. In that case, the input
* size was not a round multiple of the block size, and the padding is
* handled by the C code.
*/
cbz x4, 3f
movi v9.2d, #0
mov x8, #0x80000000
movi v10.2d, #0
ror x7, x4, #29 // ror(lsl(x4, 3), 32)
fmov d8, x8
mov x4, #0
mov v11.d[0], xzr
mov v11.d[1], x7
b 2b
/* store new state */
3: str dga, [x2]
str dgb, [x2, #16]
ret
ENDPROC(sha1_ce_transform)

View File

@ -0,0 +1,174 @@
/*
* sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
*
* Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/neon.h>
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
u8 *head, long bytes);
static int sha1_init(struct shash_desc *desc)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
*sctx = (struct sha1_state){
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
};
return 0;
}
static int sha1_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
sctx->count += len;
if ((partial + len) >= SHA1_BLOCK_SIZE) {
int blocks;
if (partial) {
int p = SHA1_BLOCK_SIZE - partial;
memcpy(sctx->buffer + partial, data, p);
data += p;
len -= p;
}
blocks = len / SHA1_BLOCK_SIZE;
len %= SHA1_BLOCK_SIZE;
kernel_neon_begin_partial(16);
sha1_ce_transform(blocks, data, sctx->state,
partial ? sctx->buffer : NULL, 0);
kernel_neon_end();
data += blocks * SHA1_BLOCK_SIZE;
partial = 0;
}
if (len)
memcpy(sctx->buffer + partial, data, len);
return 0;
}
static int sha1_final(struct shash_desc *desc, u8 *out)
{
static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
struct sha1_state *sctx = shash_desc_ctx(desc);
__be64 bits = cpu_to_be64(sctx->count << 3);
__be32 *dst = (__be32 *)out;
int i;
u32 padlen = SHA1_BLOCK_SIZE
- ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);
sha1_update(desc, padding, padlen);
sha1_update(desc, (const u8 *)&bits, sizeof(bits));
for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha1_state){};
return 0;
}
static int sha1_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
int blocks;
int i;
if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
sha1_update(desc, data, len);
return sha1_final(desc, out);
}
/*
* Use a fast path if the input is a multiple of 64 bytes. In
* this case, there is no need to copy data around, and we can
* perform the entire digest calculation in a single invocation
* of sha1_ce_transform()
*/
blocks = len / SHA1_BLOCK_SIZE;
kernel_neon_begin_partial(16);
sha1_ce_transform(blocks, data, sctx->state, NULL, len);
kernel_neon_end();
for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha1_state){};
return 0;
}
static int sha1_export(struct shash_desc *desc, void *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
struct sha1_state *dst = out;
*dst = *sctx;
return 0;
}
static int sha1_import(struct shash_desc *desc, const void *in)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
struct sha1_state const *src = in;
*sctx = *src;
return 0;
}
static struct shash_alg alg = {
.init = sha1_init,
.update = sha1_update,
.final = sha1_final,
.finup = sha1_finup,
.export = sha1_export,
.import = sha1_import,
.descsize = sizeof(struct sha1_state),
.digestsize = SHA1_DIGEST_SIZE,
.statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-ce",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static int __init sha1_ce_mod_init(void)
{
return crypto_register_shash(&alg);
}
static void __exit sha1_ce_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_cpu_feature_match(SHA1, sha1_ce_mod_init);
module_exit(sha1_ce_mod_fini);

View File

@ -0,0 +1,156 @@
/*
* sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions
*
* Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
.arch armv8-a+crypto
dga .req q20
dgav .req v20
dgb .req q21
dgbv .req v21
t0 .req v22
t1 .req v23
dg0q .req q24
dg0v .req v24
dg1q .req q25
dg1v .req v25
dg2q .req q26
dg2v .req v26
.macro add_only, ev, rc, s0
mov dg2v.16b, dg0v.16b
.ifeq \ev
add t1.4s, v\s0\().4s, \rc\().4s
sha256h dg0q, dg1q, t0.4s
sha256h2 dg1q, dg2q, t0.4s
.else
.ifnb \s0
add t0.4s, v\s0\().4s, \rc\().4s
.endif
sha256h dg0q, dg1q, t1.4s
sha256h2 dg1q, dg2q, t1.4s
.endif
.endm
.macro add_update, ev, rc, s0, s1, s2, s3
sha256su0 v\s0\().4s, v\s1\().4s
add_only \ev, \rc, \s1
sha256su1 v\s0\().4s, v\s2\().4s, v\s3\().4s
.endm
/*
* The SHA-256 round constants
*/
.align 4
.Lsha2_rcon:
.word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
.word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
.word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
.word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
.word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
.word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
.word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
.word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
.word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
.word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
.word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
.word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
.word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
.word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
.word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
.word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
/*
* void sha2_ce_transform(int blocks, u8 const *src, u32 *state,
* u8 *head, long bytes)
*/
ENTRY(sha2_ce_transform)
/* load round constants */
adr x8, .Lsha2_rcon
ld1 { v0.4s- v3.4s}, [x8], #64
ld1 { v4.4s- v7.4s}, [x8], #64
ld1 { v8.4s-v11.4s}, [x8], #64
ld1 {v12.4s-v15.4s}, [x8]
/* load state */
ldp dga, dgb, [x2]
/* load partial input (if supplied) */
cbz x3, 0f
ld1 {v16.4s-v19.4s}, [x3]
b 1f
/* load input */
0: ld1 {v16.4s-v19.4s}, [x1], #64
sub w0, w0, #1
1:
CPU_LE( rev32 v16.16b, v16.16b )
CPU_LE( rev32 v17.16b, v17.16b )
CPU_LE( rev32 v18.16b, v18.16b )
CPU_LE( rev32 v19.16b, v19.16b )
2: add t0.4s, v16.4s, v0.4s
mov dg0v.16b, dgav.16b
mov dg1v.16b, dgbv.16b
add_update 0, v1, 16, 17, 18, 19
add_update 1, v2, 17, 18, 19, 16
add_update 0, v3, 18, 19, 16, 17
add_update 1, v4, 19, 16, 17, 18
add_update 0, v5, 16, 17, 18, 19
add_update 1, v6, 17, 18, 19, 16
add_update 0, v7, 18, 19, 16, 17
add_update 1, v8, 19, 16, 17, 18
add_update 0, v9, 16, 17, 18, 19
add_update 1, v10, 17, 18, 19, 16
add_update 0, v11, 18, 19, 16, 17
add_update 1, v12, 19, 16, 17, 18
add_only 0, v13, 17
add_only 1, v14, 18
add_only 0, v15, 19
add_only 1
/* update state */
add dgav.4s, dgav.4s, dg0v.4s
add dgbv.4s, dgbv.4s, dg1v.4s
/* handled all input blocks? */
cbnz w0, 0b
/*
* Final block: add padding and total bit count.
* Skip if we have no total byte count in x4. In that case, the input
* size was not a round multiple of the block size, and the padding is
* handled by the C code.
*/
cbz x4, 3f
movi v17.2d, #0
mov x8, #0x80000000
movi v18.2d, #0
ror x7, x4, #29 // ror(lsl(x4, 3), 32)
fmov d16, x8
mov x4, #0
mov v19.d[0], xzr
mov v19.d[1], x7
b 2b
/* store new state */
3: stp dga, dgb, [x2]
ret
ENDPROC(sha2_ce_transform)

View File

@ -0,0 +1,255 @@
/*
* sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
*
* Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/neon.h>
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state,
u8 *head, long bytes);
static int sha224_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
*sctx = (struct sha256_state){
.state = {
SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
}
};
return 0;
}
static int sha256_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
*sctx = (struct sha256_state){
.state = {
SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
}
};
return 0;
}
static int sha2_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
sctx->count += len;
if ((partial + len) >= SHA256_BLOCK_SIZE) {
int blocks;
if (partial) {
int p = SHA256_BLOCK_SIZE - partial;
memcpy(sctx->buf + partial, data, p);
data += p;
len -= p;
}
blocks = len / SHA256_BLOCK_SIZE;
len %= SHA256_BLOCK_SIZE;
kernel_neon_begin_partial(28);
sha2_ce_transform(blocks, data, sctx->state,
partial ? sctx->buf : NULL, 0);
kernel_neon_end();
data += blocks * SHA256_BLOCK_SIZE;
partial = 0;
}
if (len)
memcpy(sctx->buf + partial, data, len);
return 0;
}
static void sha2_final(struct shash_desc *desc)
{
static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
struct sha256_state *sctx = shash_desc_ctx(desc);
__be64 bits = cpu_to_be64(sctx->count << 3);
u32 padlen = SHA256_BLOCK_SIZE
- ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE);
sha2_update(desc, padding, padlen);
sha2_update(desc, (const u8 *)&bits, sizeof(bits));
}
static int sha224_final(struct shash_desc *desc, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
int i;
sha2_final(desc);
for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha256_state){};
return 0;
}
static int sha256_final(struct shash_desc *desc, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
int i;
sha2_final(desc);
for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha256_state){};
return 0;
}
static void sha2_finup(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
int blocks;
if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) {
sha2_update(desc, data, len);
sha2_final(desc);
return;
}
/*
* Use a fast path if the input is a multiple of 64 bytes. In
* this case, there is no need to copy data around, and we can
* perform the entire digest calculation in a single invocation
* of sha2_ce_transform()
*/
blocks = len / SHA256_BLOCK_SIZE;
kernel_neon_begin_partial(28);
sha2_ce_transform(blocks, data, sctx->state, NULL, len);
kernel_neon_end();
data += blocks * SHA256_BLOCK_SIZE;
}
static int sha224_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
int i;
sha2_finup(desc, data, len);
for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha256_state){};
return 0;
}
static int sha256_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
int i;
sha2_finup(desc, data, len);
for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha256_state){};
return 0;
}
static int sha2_export(struct shash_desc *desc, void *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
struct sha256_state *dst = out;
*dst = *sctx;
return 0;
}
static int sha2_import(struct shash_desc *desc, const void *in)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
struct sha256_state const *src = in;
*sctx = *src;
return 0;
}
static struct shash_alg algs[] = { {
.init = sha224_init,
.update = sha2_update,
.final = sha224_final,
.finup = sha224_finup,
.export = sha2_export,
.import = sha2_import,
.descsize = sizeof(struct sha256_state),
.digestsize = SHA224_DIGEST_SIZE,
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-ce",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.init = sha256_init,
.update = sha2_update,
.final = sha256_final,
.finup = sha256_finup,
.export = sha2_export,
.import = sha2_import,
.descsize = sizeof(struct sha256_state),
.digestsize = SHA256_DIGEST_SIZE,
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-ce",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static int __init sha2_ce_mod_init(void)
{
return crypto_register_shashes(algs, ARRAY_SIZE(algs));
}
static void __exit sha2_ce_mod_fini(void)
{
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
module_cpu_feature_match(SHA2, sha2_ce_mod_init);
module_exit(sha2_ce_mod_fini);

View File

@ -37,6 +37,7 @@ generic-y += segment.h
generic-y += sembuf.h
generic-y += serial.h
generic-y += shmbuf.h
generic-y += simd.h
generic-y += sizes.h
generic-y += socket.h
generic-y += sockios.h

View File

@ -26,7 +26,13 @@
#include <clocksource/arm_arch_timer.h>
static inline void arch_timer_reg_write(int access, int reg, u32 val)
/*
* These register accessors are marked inline so the compiler can
* nicely work out which register we want, and chuck away the rest of
* the code.
*/
static __always_inline
void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
{
if (access == ARCH_TIMER_PHYS_ACCESS) {
switch (reg) {
@ -36,8 +42,6 @@ static inline void arch_timer_reg_write(int access, int reg, u32 val)
case ARCH_TIMER_REG_TVAL:
asm volatile("msr cntp_tval_el0, %0" : : "r" (val));
break;
default:
BUILD_BUG();
}
} else if (access == ARCH_TIMER_VIRT_ACCESS) {
switch (reg) {
@ -47,17 +51,14 @@ static inline void arch_timer_reg_write(int access, int reg, u32 val)
case ARCH_TIMER_REG_TVAL:
asm volatile("msr cntv_tval_el0, %0" : : "r" (val));
break;
default:
BUILD_BUG();
}
} else {
BUILD_BUG();
}
isb();
}
static inline u32 arch_timer_reg_read(int access, int reg)
static __always_inline
u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
{
u32 val;
@ -69,8 +70,6 @@ static inline u32 arch_timer_reg_read(int access, int reg)
case ARCH_TIMER_REG_TVAL:
asm volatile("mrs %0, cntp_tval_el0" : "=r" (val));
break;
default:
BUILD_BUG();
}
} else if (access == ARCH_TIMER_VIRT_ACCESS) {
switch (reg) {
@ -80,11 +79,7 @@ static inline u32 arch_timer_reg_read(int access, int reg)
case ARCH_TIMER_REG_TVAL:
asm volatile("mrs %0, cntv_tval_el0" : "=r" (val));
break;
default:
BUILD_BUG();
}
} else {
BUILD_BUG();
}
return val;

View File

@ -0,0 +1,29 @@
/*
* Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef __ASM_CPUFEATURE_H
#define __ASM_CPUFEATURE_H
#include <asm/hwcap.h>
/*
* In the arm64 world (as in the ARM world), elf_hwcap is used both internally
* in the kernel and for user space to keep track of which optional features
* are supported by the current system. So let's map feature 'x' to HWCAP_x.
* Note that HWCAP_x constants are bit fields so we need to take the log.
*/
#define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap))
#define cpu_feature(x) ilog2(HWCAP_ ## x)
static inline bool cpu_have_feature(unsigned int num)
{
return elf_hwcap & (1UL << num);
}
#endif

View File

@ -187,6 +187,14 @@ config GENERIC_CPU_DEVICES
bool
default n
config HAVE_CPU_AUTOPROBE
def_bool ARCH_HAS_CPU_AUTOPROBE
config GENERIC_CPU_AUTOPROBE
bool
depends on !ARCH_HAS_CPU_AUTOPROBE
select HAVE_CPU_AUTOPROBE
config SOC_BUS
bool

View File

@ -13,6 +13,9 @@
#include <linux/gfp.h>
#include <linux/slab.h>
#include <linux/percpu.h>
#include <linux/acpi.h>
#include <linux/of.h>
#include <linux/cpufeature.h>
#include "base.h"
@ -260,6 +263,45 @@ static void cpu_device_release(struct device *dev)
*/
}
#ifdef CONFIG_HAVE_CPU_AUTOPROBE
#ifdef CONFIG_GENERIC_CPU_AUTOPROBE
static ssize_t print_cpu_modalias(struct device *dev,
struct device_attribute *attr,
char *buf)
{
ssize_t n;
u32 i;
n = sprintf(buf, "cpu:type:" CPU_FEATURE_TYPEFMT ":feature:",
CPU_FEATURE_TYPEVAL);
for (i = 0; i < MAX_CPU_FEATURES; i++)
if (cpu_have_feature(i)) {
if (PAGE_SIZE < n + sizeof(",XXXX\n")) {
WARN(1, "CPU features overflow page\n");
break;
}
n += sprintf(&buf[n], ",%04X", i);
}
buf[n++] = '\n';
return n;
}
#else
#define print_cpu_modalias arch_print_cpu_modalias
#endif
static int cpu_uevent(struct device *dev, struct kobj_uevent_env *env)
{
char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (buf) {
print_cpu_modalias(NULL, NULL, buf);
add_uevent_var(env, "MODALIAS=%s", buf);
kfree(buf);
}
return 0;
}
#endif
/*
* register_cpu - Setup a sysfs device for a CPU.
* @cpu - cpu->hotpluggable field set to 1 will generate a control file in
@ -277,8 +319,8 @@ int __cpuinit register_cpu(struct cpu *cpu, int num)
cpu->dev.id = num;
cpu->dev.bus = &cpu_subsys;
cpu->dev.release = cpu_device_release;
#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE
cpu->dev.bus->uevent = arch_cpu_uevent;
#ifdef CONFIG_HAVE_CPU_AUTOPROBE
cpu->dev.bus->uevent = cpu_uevent;
#endif
error = device_register(&cpu->dev);
if (!error && cpu->hotpluggable)
@ -307,8 +349,8 @@ struct device *get_cpu_device(unsigned cpu)
}
EXPORT_SYMBOL_GPL(get_cpu_device);
#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE
static DEVICE_ATTR(modalias, 0444, arch_print_cpu_modalias, NULL);
#ifdef CONFIG_HAVE_CPU_AUTOPROBE
static DEVICE_ATTR(modalias, 0444, print_cpu_modalias, NULL);
#endif
static struct attribute *cpu_root_attrs[] = {
@ -321,7 +363,7 @@ static struct attribute *cpu_root_attrs[] = {
&cpu_attrs[2].attr.attr,
&dev_attr_kernel_max.attr,
&dev_attr_offline.attr,
#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE
#ifdef CONFIG_HAVE_CPU_AUTOPROBE
&dev_attr_modalias.attr,
#endif
NULL

View File

@ -17,13 +17,39 @@
#include <linux/clockchips.h>
#include <linux/interrupt.h>
#include <linux/of_irq.h>
#include <linux/of_address.h>
#include <linux/io.h>
#include <linux/slab.h>
#include <asm/arch_timer.h>
#include <asm/virt.h>
#include <clocksource/arm_arch_timer.h>
#define CNTTIDR 0x08
#define CNTTIDR_VIRT(n) (BIT(1) << ((n) * 4))
#define CNTVCT_LO 0x08
#define CNTVCT_HI 0x0c
#define CNTFRQ 0x10
#define CNTP_TVAL 0x28
#define CNTP_CTL 0x2c
#define CNTV_TVAL 0x38
#define CNTV_CTL 0x3c
#define ARCH_CP15_TIMER BIT(0)
#define ARCH_MEM_TIMER BIT(1)
static unsigned arch_timers_present __initdata;
static void __iomem *arch_counter_base;
struct arch_timer {
void __iomem *base;
struct clock_event_device evt;
};
#define to_arch_timer(e) container_of(e, struct arch_timer, evt)
static u32 arch_timer_rate;
enum ppi_nr {
@ -39,19 +65,82 @@ static int arch_timer_ppi[MAX_TIMER_PPI];
static struct clock_event_device __percpu *arch_timer_evt;
static bool arch_timer_use_virtual = true;
static bool arch_timer_mem_use_virtual;
/*
* Architected system timer support.
*/
static inline irqreturn_t timer_handler(const int access,
static __always_inline
void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val,
struct clock_event_device *clk)
{
if (access == ARCH_TIMER_MEM_PHYS_ACCESS) {
struct arch_timer *timer = to_arch_timer(clk);
switch (reg) {
case ARCH_TIMER_REG_CTRL:
writel_relaxed(val, timer->base + CNTP_CTL);
break;
case ARCH_TIMER_REG_TVAL:
writel_relaxed(val, timer->base + CNTP_TVAL);
break;
}
} else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) {
struct arch_timer *timer = to_arch_timer(clk);
switch (reg) {
case ARCH_TIMER_REG_CTRL:
writel_relaxed(val, timer->base + CNTV_CTL);
break;
case ARCH_TIMER_REG_TVAL:
writel_relaxed(val, timer->base + CNTV_TVAL);
break;
}
} else {
arch_timer_reg_write_cp15(access, reg, val);
}
}
static __always_inline
u32 arch_timer_reg_read(int access, enum arch_timer_reg reg,
struct clock_event_device *clk)
{
u32 val;
if (access == ARCH_TIMER_MEM_PHYS_ACCESS) {
struct arch_timer *timer = to_arch_timer(clk);
switch (reg) {
case ARCH_TIMER_REG_CTRL:
val = readl_relaxed(timer->base + CNTP_CTL);
break;
case ARCH_TIMER_REG_TVAL:
val = readl_relaxed(timer->base + CNTP_TVAL);
break;
}
} else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) {
struct arch_timer *timer = to_arch_timer(clk);
switch (reg) {
case ARCH_TIMER_REG_CTRL:
val = readl_relaxed(timer->base + CNTV_CTL);
break;
case ARCH_TIMER_REG_TVAL:
val = readl_relaxed(timer->base + CNTV_TVAL);
break;
}
} else {
val = arch_timer_reg_read_cp15(access, reg);
}
return val;
}
static __always_inline irqreturn_t timer_handler(const int access,
struct clock_event_device *evt)
{
unsigned long ctrl;
ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL);
ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, evt);
if (ctrl & ARCH_TIMER_CTRL_IT_STAT) {
ctrl |= ARCH_TIMER_CTRL_IT_MASK;
arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl);
arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, evt);
evt->event_handler(evt);
return IRQ_HANDLED;
}
@ -73,15 +162,30 @@ static irqreturn_t arch_timer_handler_phys(int irq, void *dev_id)
return timer_handler(ARCH_TIMER_PHYS_ACCESS, evt);
}
static inline void timer_set_mode(const int access, int mode)
static irqreturn_t arch_timer_handler_phys_mem(int irq, void *dev_id)
{
struct clock_event_device *evt = dev_id;
return timer_handler(ARCH_TIMER_MEM_PHYS_ACCESS, evt);
}
static irqreturn_t arch_timer_handler_virt_mem(int irq, void *dev_id)
{
struct clock_event_device *evt = dev_id;
return timer_handler(ARCH_TIMER_MEM_VIRT_ACCESS, evt);
}
static __always_inline void timer_set_mode(const int access, int mode,
struct clock_event_device *clk)
{
unsigned long ctrl;
switch (mode) {
case CLOCK_EVT_MODE_UNUSED:
case CLOCK_EVT_MODE_SHUTDOWN:
ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL);
ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
ctrl &= ~ARCH_TIMER_CTRL_ENABLE;
arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl);
arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
break;
default:
break;
@ -91,36 +195,49 @@ static inline void timer_set_mode(const int access, int mode)
static void arch_timer_set_mode_virt(enum clock_event_mode mode,
struct clock_event_device *clk)
{
timer_set_mode(ARCH_TIMER_VIRT_ACCESS, mode);
timer_set_mode(ARCH_TIMER_VIRT_ACCESS, mode, clk);
}
static void arch_timer_set_mode_phys(enum clock_event_mode mode,
struct clock_event_device *clk)
{
timer_set_mode(ARCH_TIMER_PHYS_ACCESS, mode);
timer_set_mode(ARCH_TIMER_PHYS_ACCESS, mode, clk);
}
static inline void set_next_event(const int access, unsigned long evt)
static void arch_timer_set_mode_virt_mem(enum clock_event_mode mode,
struct clock_event_device *clk)
{
timer_set_mode(ARCH_TIMER_MEM_VIRT_ACCESS, mode, clk);
}
static void arch_timer_set_mode_phys_mem(enum clock_event_mode mode,
struct clock_event_device *clk)
{
timer_set_mode(ARCH_TIMER_MEM_PHYS_ACCESS, mode, clk);
}
static __always_inline void set_next_event(const int access, unsigned long evt,
struct clock_event_device *clk)
{
unsigned long ctrl;
ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL);
ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
ctrl |= ARCH_TIMER_CTRL_ENABLE;
ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt);
arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl);
arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt, clk);
arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
}
static int arch_timer_set_next_event_virt(unsigned long evt,
struct clock_event_device *unused)
struct clock_event_device *clk)
{
set_next_event(ARCH_TIMER_VIRT_ACCESS, evt);
set_next_event(ARCH_TIMER_VIRT_ACCESS, evt, clk);
return 0;
}
static int arch_timer_set_next_event_phys(unsigned long evt,
struct clock_event_device *unused)
struct clock_event_device *clk)
{
set_next_event(ARCH_TIMER_PHYS_ACCESS, evt);
set_next_event(ARCH_TIMER_PHYS_ACCESS, evt, clk);
return 0;
}
@ -137,27 +254,62 @@ static void arch_timer_configure_evtstream(void)
arch_timer_evtstrm_enable(min(pos, 15));
}
static int __cpuinit arch_timer_setup(struct clock_event_device *clk)
static int arch_timer_set_next_event_virt_mem(unsigned long evt,
struct clock_event_device *clk)
{
clk->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP;
clk->name = "arch_sys_timer";
clk->rating = 450;
if (arch_timer_use_virtual) {
clk->irq = arch_timer_ppi[VIRT_PPI];
clk->set_mode = arch_timer_set_mode_virt;
clk->set_next_event = arch_timer_set_next_event_virt;
set_next_event(ARCH_TIMER_MEM_VIRT_ACCESS, evt, clk);
return 0;
}
static int arch_timer_set_next_event_phys_mem(unsigned long evt,
struct clock_event_device *clk)
{
set_next_event(ARCH_TIMER_MEM_PHYS_ACCESS, evt, clk);
return 0;
}
static void __cpuinit __arch_timer_setup(unsigned type,
struct clock_event_device *clk)
{
clk->features = CLOCK_EVT_FEAT_ONESHOT;
if (type == ARCH_CP15_TIMER) {
clk->features |= CLOCK_EVT_FEAT_C3STOP;
clk->name = "arch_sys_timer";
clk->rating = 450;
clk->cpumask = cpumask_of(smp_processor_id());
if (arch_timer_use_virtual) {
clk->irq = arch_timer_ppi[VIRT_PPI];
clk->set_mode = arch_timer_set_mode_virt;
clk->set_next_event = arch_timer_set_next_event_virt;
} else {
clk->irq = arch_timer_ppi[PHYS_SECURE_PPI];
clk->set_mode = arch_timer_set_mode_phys;
clk->set_next_event = arch_timer_set_next_event_phys;
}
} else {
clk->irq = arch_timer_ppi[PHYS_SECURE_PPI];
clk->set_mode = arch_timer_set_mode_phys;
clk->set_next_event = arch_timer_set_next_event_phys;
clk->name = "arch_mem_timer";
clk->rating = 400;
clk->cpumask = cpu_all_mask;
if (arch_timer_mem_use_virtual) {
clk->set_mode = arch_timer_set_mode_virt_mem;
clk->set_next_event =
arch_timer_set_next_event_virt_mem;
} else {
clk->set_mode = arch_timer_set_mode_phys_mem;
clk->set_next_event =
arch_timer_set_next_event_phys_mem;
}
}
clk->cpumask = cpumask_of(smp_processor_id());
clk->set_mode(CLOCK_EVT_MODE_SHUTDOWN, clk);
clk->set_mode(CLOCK_EVT_MODE_SHUTDOWN, NULL);
clockevents_config_and_register(clk, arch_timer_rate, 0xf, 0x7fffffff);
}
clockevents_config_and_register(clk, arch_timer_rate,
0xf, 0x7fffffff);
static int __cpuinit arch_timer_setup(struct clock_event_device *clk)
{
__arch_timer_setup(ARCH_CP15_TIMER, clk);
if (arch_timer_use_virtual)
enable_percpu_irq(arch_timer_ppi[VIRT_PPI], 0);
@ -174,27 +326,41 @@ static int __cpuinit arch_timer_setup(struct clock_event_device *clk)
return 0;
}
static int arch_timer_available(void)
static void
arch_timer_detect_rate(void __iomem *cntbase, struct device_node *np)
{
u32 freq;
/* Who has more than one independent system counter? */
if (arch_timer_rate)
return;
if (arch_timer_rate == 0) {
freq = arch_timer_get_cntfrq();
/* Check the timer frequency. */
if (freq == 0) {
pr_warn("Architected timer frequency not available\n");
return -EINVAL;
}
arch_timer_rate = freq;
/* Try to determine the frequency from the device tree or CNTFRQ */
if (of_property_read_u32(np, "clock-frequency", &arch_timer_rate)) {
if (cntbase)
arch_timer_rate = readl_relaxed(cntbase + CNTFRQ);
else
arch_timer_rate = arch_timer_get_cntfrq();
}
pr_info_once("Architected local timer running at %lu.%02luMHz (%s).\n",
/* Check the timer frequency. */
if (arch_timer_rate == 0)
pr_warn("Architected timer frequency not available\n");
}
static void arch_timer_banner(unsigned type)
{
pr_info("Architected %s%s%s timer(s) running at %lu.%02luMHz (%s%s%s).\n",
type & ARCH_CP15_TIMER ? "cp15" : "",
type == (ARCH_CP15_TIMER | ARCH_MEM_TIMER) ? " and " : "",
type & ARCH_MEM_TIMER ? "mmio" : "",
(unsigned long)arch_timer_rate / 1000000,
(unsigned long)(arch_timer_rate / 10000) % 100,
arch_timer_use_virtual ? "virt" : "phys");
return 0;
type & ARCH_CP15_TIMER ?
arch_timer_use_virtual ? "virt" : "phys" :
"",
type == (ARCH_CP15_TIMER | ARCH_MEM_TIMER) ? "/" : "",
type & ARCH_MEM_TIMER ?
arch_timer_mem_use_virtual ? "virt" : "phys" :
"");
}
u32 arch_timer_get_rate(void)
@ -202,11 +368,27 @@ u32 arch_timer_get_rate(void)
return arch_timer_rate;
}
u64 arch_timer_read_counter(void)
static u64 arch_counter_get_cntvct_mem(void)
{
return arch_counter_get_cntvct();
u32 vct_lo, vct_hi, tmp_hi;
do {
vct_hi = readl_relaxed(arch_counter_base + CNTVCT_HI);
vct_lo = readl_relaxed(arch_counter_base + CNTVCT_LO);
tmp_hi = readl_relaxed(arch_counter_base + CNTVCT_HI);
} while (vct_hi != tmp_hi);
return ((u64) vct_hi << 32) | vct_lo;
}
/*
* Default to cp15 based access because arm64 uses this function for
* sched_clock() before DT is probed and the cp15 method is guaranteed
* to exist on arm64. arm doesn't use this before DT is probed so even
* if we don't have the cp15 accessors we won't have a problem.
*/
u64 (*arch_timer_read_counter)(void) = arch_counter_get_cntvct;
static cycle_t arch_counter_read(struct clocksource *cs)
{
return arch_counter_get_cntvct();
@ -237,6 +419,23 @@ struct timecounter *arch_timer_get_timecounter(void)
return &timecounter;
}
static void __init arch_counter_register(unsigned type)
{
u64 start_count;
/* Register the CP15 based counter if we have one */
if (type & ARCH_CP15_TIMER)
arch_timer_read_counter = arch_counter_get_cntvct;
else
arch_timer_read_counter = arch_counter_get_cntvct_mem;
start_count = arch_timer_read_counter();
clocksource_register_hz(&clocksource_counter, arch_timer_rate);
cyclecounter.mult = clocksource_counter.mult;
cyclecounter.shift = clocksource_counter.shift;
timecounter_init(&timecounter, &cyclecounter, start_count);
}
static void __cpuinit arch_timer_stop(struct clock_event_device *clk)
{
pr_debug("arch_timer_teardown disable IRQ%d cpu #%d\n",
@ -308,22 +507,12 @@ static int __init arch_timer_register(void)
int err;
int ppi;
err = arch_timer_available();
if (err)
goto out;
arch_timer_evt = alloc_percpu(struct clock_event_device);
if (!arch_timer_evt) {
err = -ENOMEM;
goto out;
}
clocksource_register_hz(&clocksource_counter, arch_timer_rate);
cyclecounter.mult = clocksource_counter.mult;
cyclecounter.shift = clocksource_counter.shift;
timecounter_init(&timecounter, &cyclecounter,
arch_counter_get_cntvct());
if (arch_timer_use_virtual) {
ppi = arch_timer_ppi[VIRT_PPI];
err = request_percpu_irq(ppi, arch_timer_handler_virt,
@ -380,24 +569,77 @@ static int __init arch_timer_register(void)
return err;
}
static int __init arch_timer_mem_register(void __iomem *base, unsigned int irq)
{
int ret;
irq_handler_t func;
struct arch_timer *t;
t = kzalloc(sizeof(*t), GFP_KERNEL);
if (!t)
return -ENOMEM;
t->base = base;
t->evt.irq = irq;
__arch_timer_setup(ARCH_MEM_TIMER, &t->evt);
if (arch_timer_mem_use_virtual)
func = arch_timer_handler_virt_mem;
else
func = arch_timer_handler_phys_mem;
ret = request_irq(irq, func, IRQF_TIMER, "arch_mem_timer", &t->evt);
if (ret) {
pr_err("arch_timer: Failed to request mem timer irq\n");
kfree(t);
}
return ret;
}
static const struct of_device_id arch_timer_of_match[] __initconst = {
{ .compatible = "arm,armv7-timer", },
{ .compatible = "arm,armv8-timer", },
{},
};
static const struct of_device_id arch_timer_mem_of_match[] __initconst = {
{ .compatible = "arm,armv7-timer-mem", },
{},
};
static void __init arch_timer_common_init(void)
{
unsigned mask = ARCH_CP15_TIMER | ARCH_MEM_TIMER;
/* Wait until both nodes are probed if we have two timers */
if ((arch_timers_present & mask) != mask) {
if (of_find_matching_node(NULL, arch_timer_mem_of_match) &&
!(arch_timers_present & ARCH_MEM_TIMER))
return;
if (of_find_matching_node(NULL, arch_timer_of_match) &&
!(arch_timers_present & ARCH_CP15_TIMER))
return;
}
arch_timer_banner(arch_timers_present);
arch_counter_register(arch_timers_present);
arch_timer_arch_init();
}
static void __init arch_timer_init(struct device_node *np)
{
u32 freq;
int i;
if (arch_timer_get_rate()) {
if (arch_timers_present & ARCH_CP15_TIMER) {
pr_warn("arch_timer: multiple nodes in dt, skipping\n");
return;
}
/* Try to determine the frequency from the device tree or CNTFRQ */
if (!of_property_read_u32(np, "clock-frequency", &freq))
arch_timer_rate = freq;
arch_timers_present |= ARCH_CP15_TIMER;
for (i = PHYS_SECURE_PPI; i < MAX_TIMER_PPI; i++)
arch_timer_ppi[i] = irq_of_parse_and_map(np, i);
of_node_put(np);
arch_timer_detect_rate(NULL, np);
/*
* If HYP mode is available, we know that the physical timer
@ -418,7 +660,73 @@ static void __init arch_timer_init(struct device_node *np)
}
arch_timer_register();
arch_timer_arch_init();
arch_timer_common_init();
}
CLOCKSOURCE_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_init);
CLOCKSOURCE_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_init);
static void __init arch_timer_mem_init(struct device_node *np)
{
struct device_node *frame, *best_frame = NULL;
void __iomem *cntctlbase, *base;
unsigned int irq;
u32 cnttidr;
arch_timers_present |= ARCH_MEM_TIMER;
cntctlbase = of_iomap(np, 0);
if (!cntctlbase) {
pr_err("arch_timer: Can't find CNTCTLBase\n");
return;
}
cnttidr = readl_relaxed(cntctlbase + CNTTIDR);
iounmap(cntctlbase);
/*
* Try to find a virtual capable frame. Otherwise fall back to a
* physical capable frame.
*/
for_each_available_child_of_node(np, frame) {
int n;
if (of_property_read_u32(frame, "frame-number", &n)) {
pr_err("arch_timer: Missing frame-number\n");
of_node_put(best_frame);
of_node_put(frame);
return;
}
if (cnttidr & CNTTIDR_VIRT(n)) {
of_node_put(best_frame);
best_frame = frame;
arch_timer_mem_use_virtual = true;
break;
}
of_node_put(best_frame);
best_frame = of_node_get(frame);
}
base = arch_counter_base = of_iomap(best_frame, 0);
if (!base) {
pr_err("arch_timer: Can't map frame's registers\n");
of_node_put(best_frame);
return;
}
if (arch_timer_mem_use_virtual)
irq = irq_of_parse_and_map(best_frame, 1);
else
irq = irq_of_parse_and_map(best_frame, 0);
of_node_put(best_frame);
if (!irq) {
pr_err("arch_timer: Frame missing %s irq",
arch_timer_mem_use_virtual ? "virt" : "phys");
return;
}
arch_timer_detect_rate(base, np);
arch_timer_mem_register(base, irq);
arch_timer_common_init();
}
CLOCKSOURCE_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem",
arch_timer_mem_init);

View File

@ -19,7 +19,7 @@
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/of.h>
#include <linux/mailbox.h>
#include <linux/pl320-ipc.h>
#include <linux/platform_device.h>
#define HB_CPUFREQ_CHANGE_NOTE 0x80000001

View File

@ -1 +1,5 @@
# Generic MAILBOX API
obj-$(CONFIG_MAILBOX) += mailbox.o
obj-$(CONFIG_PL320_MBOX) += pl320-ipc.o

488
drivers/mailbox/mailbox.c Normal file
View File

@ -0,0 +1,488 @@
/*
* Mailbox: Common code for Mailbox controllers and users
*
* Copyright (C) 2014 Linaro Ltd.
* Author: Jassi Brar <jassisinghbrar@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/mailbox_client.h>
#include <linux/mailbox_controller.h>
#define TXDONE_BY_IRQ (1 << 0) /* controller has remote RTR irq */
#define TXDONE_BY_POLL (1 << 1) /* controller can read status of last TX */
#define TXDONE_BY_ACK (1 << 2) /* S/W ACK recevied by Client ticks the TX */
static LIST_HEAD(mbox_cons);
static DEFINE_MUTEX(con_mutex);
static int _add_to_rbuf(struct mbox_chan *chan, void *mssg)
{
int idx;
unsigned long flags;
spin_lock_irqsave(&chan->lock, flags);
/* See if there is any space left */
if (chan->msg_count == MBOX_TX_QUEUE_LEN) {
spin_unlock_irqrestore(&chan->lock, flags);
return -ENOMEM;
}
idx = chan->msg_free;
chan->msg_data[idx] = mssg;
chan->msg_count++;
if (idx == MBOX_TX_QUEUE_LEN - 1)
chan->msg_free = 0;
else
chan->msg_free++;
spin_unlock_irqrestore(&chan->lock, flags);
return idx;
}
static void _msg_submit(struct mbox_chan *chan)
{
unsigned count, idx;
unsigned long flags;
void *data;
int err;
spin_lock_irqsave(&chan->lock, flags);
if (!chan->msg_count || chan->active_req) {
spin_unlock_irqrestore(&chan->lock, flags);
return;
}
count = chan->msg_count;
idx = chan->msg_free;
if (idx >= count)
idx -= count;
else
idx += MBOX_TX_QUEUE_LEN - count;
data = chan->msg_data[idx];
/* Try to submit a message to the MBOX controller */
err = chan->mbox->ops->send_data(chan, data);
if (!err) {
chan->active_req = data;
chan->msg_count--;
}
spin_unlock_irqrestore(&chan->lock, flags);
}
static void tx_tick(struct mbox_chan *chan, int r)
{
unsigned long flags;
void *mssg;
spin_lock_irqsave(&chan->lock, flags);
mssg = chan->active_req;
chan->active_req = NULL;
spin_unlock_irqrestore(&chan->lock, flags);
/* Submit next message */
_msg_submit(chan);
/* Notify the client */
if (chan->cl->tx_block)
complete(&chan->tx_complete);
else if (mssg && chan->cl->tx_done)
chan->cl->tx_done(chan->cl, mssg, r);
}
static void poll_txdone(unsigned long data)
{
struct mbox_controller *mbox = (struct mbox_controller *)data;
bool txdone, resched = false;
int i;
for (i = 0; i < mbox->num_chans; i++) {
struct mbox_chan *chan = &mbox->chans[i];
if (chan->active_req && chan->cl) {
resched = true;
txdone = chan->mbox->ops->last_tx_done(chan);
if (txdone)
tx_tick(chan, 0);
}
}
if (resched)
mod_timer(&mbox->poll,
jiffies + msecs_to_jiffies(mbox->period));
}
/**
* mbox_chan_received_data - A way for controller driver to push data
* received from remote to the upper layer.
* @chan: Pointer to the mailbox channel on which RX happened.
* @data: Client specific message typecasted as void *
*
* After startup and before shutdown any data received on the chan
* is passed on to the API via atomic mbox_chan_received_data().
* The controller should ACK the RX only after this call returns.
*/
void mbox_chan_received_data(struct mbox_chan *chan, void *mssg)
{
/* No buffering the received data */
if (chan->cl->rx_callback)
chan->cl->rx_callback(chan->cl, mssg);
}
EXPORT_SYMBOL_GPL(mbox_chan_received_data);
/**
* mbox_chan_txdone - A way for controller driver to notify the
* framework that the last TX has completed.
* @chan: Pointer to the mailbox chan on which TX happened.
* @r: Status of last TX - OK or ERROR
*
* The controller that has IRQ for TX ACK calls this atomic API
* to tick the TX state machine. It works only if txdone_irq
* is set by the controller.
*/
void mbox_chan_txdone(struct mbox_chan *chan, int r)
{
if (unlikely(!(chan->txdone_method & TXDONE_BY_IRQ))) {
pr_err("Controller can't run the TX ticker\n");
return;
}
tx_tick(chan, r);
}
EXPORT_SYMBOL_GPL(mbox_chan_txdone);
/**
* mbox_client_txdone - The way for a client to run the TX state machine.
* @chan: Mailbox channel assigned to this client.
* @r: Success status of last transmission.
*
* The client/protocol had received some 'ACK' packet and it notifies
* the API that the last packet was sent successfully. This only works
* if the controller can't sense TX-Done.
*/
void mbox_client_txdone(struct mbox_chan *chan, int r)
{
if (unlikely(!(chan->txdone_method & TXDONE_BY_ACK))) {
pr_err("Client can't run the TX ticker\n");
return;
}
tx_tick(chan, r);
}
EXPORT_SYMBOL_GPL(mbox_client_txdone);
/**
* mbox_client_peek_data - A way for client driver to pull data
* received from remote by the controller.
* @chan: Mailbox channel assigned to this client.
*
* A poke to controller driver for any received data.
* The data is actually passed onto client via the
* mbox_chan_received_data()
* The call can be made from atomic context, so the controller's
* implementation of peek_data() must not sleep.
*
* Return: True, if controller has, and is going to push after this,
* some data.
* False, if controller doesn't have any data to be read.
*/
bool mbox_client_peek_data(struct mbox_chan *chan)
{
if (chan->mbox->ops->peek_data)
return chan->mbox->ops->peek_data(chan);
return false;
}
EXPORT_SYMBOL_GPL(mbox_client_peek_data);
/**
* mbox_send_message - For client to submit a message to be
* sent to the remote.
* @chan: Mailbox channel assigned to this client.
* @mssg: Client specific message typecasted.
*
* For client to submit data to the controller destined for a remote
* processor. If the client had set 'tx_block', the call will return
* either when the remote receives the data or when 'tx_tout' millisecs
* run out.
* In non-blocking mode, the requests are buffered by the API and a
* non-negative token is returned for each queued request. If the request
* is not queued, a negative token is returned. Upon failure or successful
* TX, the API calls 'tx_done' from atomic context, from which the client
* could submit yet another request.
* In blocking mode, 'tx_done' is not called, effectively making the
* queue length 1.
* The pointer to message should be preserved until it is sent
* over the chan, i.e, tx_done() is made.
* This function could be called from atomic context as it simply
* queues the data and returns a token against the request.
*
* Return: Non-negative integer for successful submission (non-blocking mode)
* or transmission over chan (blocking mode).
* Negative value denotes failure.
*/
int mbox_send_message(struct mbox_chan *chan, void *mssg)
{
int t;
if (!chan || !chan->cl)
return -EINVAL;
t = _add_to_rbuf(chan, mssg);
if (t < 0) {
pr_err("Try increasing MBOX_TX_QUEUE_LEN\n");
return t;
}
_msg_submit(chan);
init_completion(&chan->tx_complete);
if (chan->txdone_method == TXDONE_BY_POLL)
poll_txdone((unsigned long)chan->mbox);
if (chan->cl->tx_block && chan->active_req) {
unsigned long wait;
int ret;
if (!chan->cl->tx_tout) /* wait for ever */
wait = msecs_to_jiffies(3600000);
else
wait = msecs_to_jiffies(chan->cl->tx_tout);
ret = wait_for_completion_timeout(&chan->tx_complete, wait);
if (ret == 0) {
t = -EIO;
tx_tick(chan, -EIO);
}
}
return t;
}
EXPORT_SYMBOL_GPL(mbox_send_message);
/**
* mbox_request_channel - Request a mailbox channel.
* @cl: Identity of the client requesting the channel.
*
* The Client specifies its requirements and capabilities while asking for
* a mailbox channel. It can't be called from atomic context.
* The channel is exclusively allocated and can't be used by another
* client before the owner calls mbox_free_channel.
* After assignment, any packet received on this channel will be
* handed over to the client via the 'rx_callback'.
* The framework holds reference to the client, so the mbox_client
* structure shouldn't be modified until the mbox_free_channel returns.
*
* Return: Pointer to the channel assigned to the client if successful.
* ERR_PTR for request failure.
*/
struct mbox_chan *mbox_request_channel(struct mbox_client *cl)
{
struct device *dev = cl->dev;
struct mbox_controller *mbox;
struct of_phandle_args spec;
struct mbox_chan *chan;
unsigned long flags;
int count, i, ret;
if (!dev || !dev->of_node) {
pr_err("%s: No owner device node\n", __func__);
return ERR_PTR(-ENODEV);
}
count = of_property_count_strings(dev->of_node, "mbox-names");
if (count < 0) {
pr_err("%s: mbox-names property of node '%s' missing\n",
__func__, dev->of_node->full_name);
return ERR_PTR(-ENODEV);
}
mutex_lock(&con_mutex);
ret = -ENODEV;
for (i = 0; i < count; i++) {
const char *s;
if (of_property_read_string_index(dev->of_node,
"mbox-names", i, &s))
continue;
if (strcmp(cl->chan_name, s))
continue;
if (of_parse_phandle_with_args(dev->of_node,
"mbox", "#mbox-cells", i, &spec))
continue;
chan = NULL;
list_for_each_entry(mbox, &mbox_cons, node)
if (mbox->dev->of_node == spec.np) {
chan = mbox->of_xlate(mbox, &spec);
break;
}
of_node_put(spec.np);
if (!chan)
continue;
ret = -EBUSY;
if (!chan->cl && try_module_get(mbox->dev->driver->owner))
break;
}
if (i == count) {
mutex_unlock(&con_mutex);
return ERR_PTR(ret);
}
spin_lock_irqsave(&chan->lock, flags);
chan->msg_free = 0;
chan->msg_count = 0;
chan->active_req = NULL;
chan->cl = cl;
init_completion(&chan->tx_complete);
if (chan->txdone_method == TXDONE_BY_POLL
&& cl->knows_txdone)
chan->txdone_method |= TXDONE_BY_ACK;
spin_unlock_irqrestore(&chan->lock, flags);
ret = chan->mbox->ops->startup(chan);
if (ret) {
pr_err("Unable to startup the chan (%d)\n", ret);
mbox_free_channel(chan);
chan = ERR_PTR(ret);
}
mutex_unlock(&con_mutex);
return chan;
}
EXPORT_SYMBOL_GPL(mbox_request_channel);
/**
* mbox_free_channel - The client relinquishes control of a mailbox
* channel by this call.
* @chan: The mailbox channel to be freed.
*/
void mbox_free_channel(struct mbox_chan *chan)
{
unsigned long flags;
if (!chan || !chan->cl)
return;
chan->mbox->ops->shutdown(chan);
/* The queued TX requests are simply aborted, no callbacks are made */
spin_lock_irqsave(&chan->lock, flags);
chan->cl = NULL;
chan->active_req = NULL;
if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK))
chan->txdone_method = TXDONE_BY_POLL;
module_put(chan->mbox->dev->driver->owner);
spin_unlock_irqrestore(&chan->lock, flags);
}
EXPORT_SYMBOL_GPL(mbox_free_channel);
static struct mbox_chan *
of_mbox_index_xlate(struct mbox_controller *mbox,
const struct of_phandle_args *sp)
{
int ind = sp->args[0];
if (ind >= mbox->num_chans)
return NULL;
return &mbox->chans[ind];
}
/**
* mbox_controller_register - Register the mailbox controller
* @mbox: Pointer to the mailbox controller.
*
* The controller driver registers its communication chans
*/
int mbox_controller_register(struct mbox_controller *mbox)
{
int i, txdone;
/* Sanity check */
if (!mbox || !mbox->dev || !mbox->ops || !mbox->num_chans)
return -EINVAL;
if (mbox->txdone_irq)
txdone = TXDONE_BY_IRQ;
else if (mbox->txdone_poll)
txdone = TXDONE_BY_POLL;
else /* It has to be ACK then */
txdone = TXDONE_BY_ACK;
if (txdone == TXDONE_BY_POLL) {
mbox->poll.function = &poll_txdone;
mbox->poll.data = (unsigned long)mbox;
init_timer(&mbox->poll);
}
for (i = 0; i < mbox->num_chans; i++) {
struct mbox_chan *chan = &mbox->chans[i];
chan->cl = NULL;
chan->mbox = mbox;
chan->txdone_method = txdone;
spin_lock_init(&chan->lock);
}
if (!mbox->of_xlate)
mbox->of_xlate = of_mbox_index_xlate;
mutex_lock(&con_mutex);
list_add_tail(&mbox->node, &mbox_cons);
mutex_unlock(&con_mutex);
return 0;
}
EXPORT_SYMBOL_GPL(mbox_controller_register);
/**
* mbox_controller_unregister - UnRegister the mailbox controller
* @mbox: Pointer to the mailbox controller.
*/
void mbox_controller_unregister(struct mbox_controller *mbox)
{
int i;
if (!mbox)
return;
mutex_lock(&con_mutex);
list_del(&mbox->node);
for (i = 0; i < mbox->num_chans; i++)
mbox_free_channel(&mbox->chans[i]);
if (mbox->txdone_poll)
del_timer_sync(&mbox->poll);
mutex_unlock(&con_mutex);
}
EXPORT_SYMBOL_GPL(mbox_controller_unregister);

View File

@ -26,7 +26,7 @@
#include <linux/device.h>
#include <linux/amba/bus.h>
#include <linux/mailbox.h>
#include <linux/pl320-ipc.h>
#define IPCMxSOURCE(m) ((m) * 0x40)
#define IPCMxDSET(m) (((m) * 0x40) + 0x004)

View File

@ -35,7 +35,9 @@ int pinconf_check_ops(struct pinctrl_dev *pctldev)
return -EINVAL;
}
/* We have to be able to config the pins in SOME way */
if (!ops->pin_config_set && !ops->pin_config_group_set) {
if (!ops->pin_config_set && !ops->pin_config_group_set
&& !ops->pin_config_set_bulk
&& !ops->pin_config_group_set_bulk) {
dev_err(pctldev->dev,
"pinconf has to be able to set a pins config\n");
return -EINVAL;
@ -171,14 +173,14 @@ int pinconf_apply_setting(struct pinctrl_setting const *setting)
dev_err(pctldev->dev, "missing pin_config_set op\n");
return -EINVAL;
}
if (ops->pin_config_group_set_bulk) {
ret = ops->pin_config_group_set_bulk(pctldev,
if (ops->pin_config_set_bulk) {
ret = ops->pin_config_set_bulk(pctldev,
setting->data.configs.group_or_pin,
setting->data.configs.configs,
setting->data.configs.num_configs);
if (ret < 0) {
dev_err(pctldev->dev,
"pin_config_set op failed for pin %d\n",
"pin_config_set_bulk op failed for pin %d\n",
setting->data.configs.group_or_pin);
return ret;
}
@ -211,7 +213,7 @@ int pinconf_apply_setting(struct pinctrl_setting const *setting)
setting->data.configs.num_configs);
if (ret < 0) {
dev_err(pctldev->dev,
"pin_config_group_set op failed for group %d\n",
"pin_config_group_set_bulk op failed for group %d\n",
setting->data.configs.group_or_pin);
return ret;
}

View File

@ -88,6 +88,11 @@ static void cputime_to_compat_timeval(const cputime_t cputime,
#define ELF_HWCAP COMPAT_ELF_HWCAP
#endif
#ifdef COMPAT_ELF_HWCAP2
#undef ELF_HWCAP2
#define ELF_HWCAP2 COMPAT_ELF_HWCAP2
#endif
#ifdef COMPAT_ARCH_DLINFO
#undef ARCH_DLINFO
#define ARCH_DLINFO COMPAT_ARCH_DLINFO

View File

@ -23,11 +23,15 @@
#define ARCH_TIMER_CTRL_IT_MASK (1 << 1)
#define ARCH_TIMER_CTRL_IT_STAT (1 << 2)
#define ARCH_TIMER_REG_CTRL 0
#define ARCH_TIMER_REG_TVAL 1
enum arch_timer_reg {
ARCH_TIMER_REG_CTRL,
ARCH_TIMER_REG_TVAL,
};
#define ARCH_TIMER_PHYS_ACCESS 0
#define ARCH_TIMER_VIRT_ACCESS 1
#define ARCH_TIMER_MEM_PHYS_ACCESS 2
#define ARCH_TIMER_MEM_VIRT_ACCESS 3
#define ARCH_TIMER_USR_PCT_ACCESS_EN (1 << 0) /* physical counter */
#define ARCH_TIMER_USR_VCT_ACCESS_EN (1 << 1) /* virtual counter */
@ -42,7 +46,7 @@
#ifdef CONFIG_ARM_ARCH_TIMER
extern u32 arch_timer_get_rate(void);
extern u64 arch_timer_read_counter(void);
extern u64 (*arch_timer_read_counter)(void);
extern struct timecounter *arch_timer_get_timecounter(void);
#else

View File

@ -0,0 +1,60 @@
/*
* Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef __LINUX_CPUFEATURE_H
#define __LINUX_CPUFEATURE_H
#ifdef CONFIG_GENERIC_CPU_AUTOPROBE
#include <linux/mod_devicetable.h>
#include <asm/cpufeature.h>
/*
* Macros imported from <asm/cpufeature.h>:
* - cpu_feature(x) ordinal value of feature called 'x'
* - cpu_have_feature(u32 n) whether feature #n is available
* - MAX_CPU_FEATURES upper bound for feature ordinal values
* Optional:
* - CPU_FEATURE_TYPEFMT format string fragment for printing the cpu type
* - CPU_FEATURE_TYPEVAL set of values matching the format string above
*/
#ifndef CPU_FEATURE_TYPEFMT
#define CPU_FEATURE_TYPEFMT "%s"
#endif
#ifndef CPU_FEATURE_TYPEVAL
#define CPU_FEATURE_TYPEVAL ELF_PLATFORM
#endif
/*
* Use module_cpu_feature_match(feature, module_init_function) to
* declare that
* a) the module shall be probed upon discovery of CPU feature 'feature'
* (typically at boot time using udev)
* b) the module must not be loaded if CPU feature 'feature' is not present
* (not even by manual insmod).
*
* For a list of legal values for 'feature', please consult the file
* 'asm/cpufeature.h' of your favorite architecture.
*/
#define module_cpu_feature_match(x, __init) \
static struct cpu_feature const cpu_feature_match_ ## x[] = \
{ { .feature = cpu_feature(x) }, { } }; \
MODULE_DEVICE_TABLE(cpu, cpu_feature_match_ ## x); \
\
static int cpu_feature_match_ ## x ## _init(void) \
{ \
if (!cpu_have_feature(cpu_feature(x))) \
return -ENODEV; \
return __init(); \
} \
module_init(cpu_feature_match_ ## x ## _init)
#endif
#endif

View File

@ -0,0 +1,46 @@
/*
* Copyright (C) 2014 Linaro Ltd.
* Author: Jassi Brar <jassisinghbrar@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef __MAILBOX_CLIENT_H
#define __MAILBOX_CLIENT_H
#include <linux/of.h>
struct mbox_chan;
/**
* struct mbox_client - User of a mailbox
* @dev: The client device
* @chan_name: The "controller:channel" this client wants
* @rx_callback: Atomic callback to provide client the data received
* @tx_done: Atomic callback to tell client of data transmission
* @tx_block: If the mbox_send_message should block until data is
* transmitted.
* @tx_tout: Max block period in ms before TX is assumed failure
* @knows_txdone: if the client could run the TX state machine. Usually
* if the client receives some ACK packet for transmission.
* Unused if the controller already has TX_Done/RTR IRQ.
*/
struct mbox_client {
struct device *dev;
const char *chan_name;
void (*rx_callback)(struct mbox_client *cl, void *mssg);
void (*tx_done)(struct mbox_client *cl, void *mssg, int r);
bool tx_block;
unsigned long tx_tout;
bool knows_txdone;
};
struct mbox_chan *mbox_request_channel(struct mbox_client *cl);
int mbox_send_message(struct mbox_chan *chan, void *mssg);
void mbox_client_txdone(struct mbox_chan *chan, int r);
bool mbox_client_peek_data(struct mbox_chan *chan);
void mbox_free_channel(struct mbox_chan *chan);
#endif /* __MAILBOX_CLIENT_H */

View File

@ -0,0 +1,121 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef __MAILBOX_CONTROLLER_H
#define __MAILBOX_CONTROLLER_H
#include <linux/of.h>
struct mbox_chan;
/**
* struct mbox_chan_ops - s/w representation of a communication chan
* @send_data: The API asks the MBOX controller driver, in atomic
* context try to transmit a message on the bus. Returns 0 if
* data is accepted for transmission, -EBUSY while rejecting
* if the remote hasn't yet read the last data sent. Actual
* transmission of data is reported by the controller via
* mbox_chan_txdone (if it has some TX ACK irq). It must not
* block.
* @startup: Called when a client requests the chan. The controller
* could ask clients for additional parameters of communication
* to be provided via client's chan_data. This call may
* block. After this call the Controller must forward any
* data received on the chan by calling mbox_chan_received_data.
* @shutdown: Called when a client relinquishes control of a chan.
* This call may block too. The controller must not forwared
* any received data anymore.
* @last_tx_done: If the controller sets 'txdone_poll', the API calls
* this to poll status of last TX. The controller must
* give priority to IRQ method over polling and never
* set both txdone_poll and txdone_irq. Only in polling
* mode 'send_data' is expected to return -EBUSY.
* Used only if txdone_poll:=true && txdone_irq:=false
* @peek_data: Atomic check for any received data. Return true if controller
* has some data to push to the client. False otherwise.
*/
struct mbox_chan_ops {
int (*send_data)(struct mbox_chan *chan, void *data);
int (*startup)(struct mbox_chan *chan);
void (*shutdown)(struct mbox_chan *chan);
bool (*last_tx_done)(struct mbox_chan *chan);
bool (*peek_data)(struct mbox_chan *chan);
};
/**
* struct mbox_controller - Controller of a class of communication chans
* @dev: Device backing this controller
* @controller_name: Literal name of the controller.
* @ops: Operators that work on each communication chan
* @chans: Null terminated array of chans.
* @txdone_irq: Indicates if the controller can report to API when
* the last transmitted data was read by the remote.
* Eg, if it has some TX ACK irq.
* @txdone_poll: If the controller can read but not report the TX
* done. Ex, some register shows the TX status but
* no interrupt rises. Ignored if 'txdone_irq' is set.
* @txpoll_period: If 'txdone_poll' is in effect, the API polls for
* last TX's status after these many millisecs
*/
struct mbox_controller {
struct device *dev;
struct mbox_chan_ops *ops;
struct mbox_chan *chans;
int num_chans;
bool txdone_irq;
bool txdone_poll;
unsigned txpoll_period;
struct mbox_chan *(*of_xlate)(struct mbox_controller *mbox,
const struct of_phandle_args *sp);
/*
* If the controller supports only TXDONE_BY_POLL,
* this timer polls all the links for txdone.
*/
struct timer_list poll;
unsigned period;
/* Hook to add to the global controller list */
struct list_head node;
};
/*
* The length of circular buffer for queuing messages from a client.
* 'msg_count' tracks the number of buffered messages while 'msg_free'
* is the index where the next message would be buffered.
* We shouldn't need it too big because every transferr is interrupt
* triggered and if we have lots of data to transfer, the interrupt
* latencies are going to be the bottleneck, not the buffer length.
* Besides, mbox_send_message could be called from atomic context and
* the client could also queue another message from the notifier 'tx_done'
* of the last transfer done.
* REVIST: If too many platforms see the "Try increasing MBOX_TX_QUEUE_LEN"
* print, it needs to be taken from config option or somesuch.
*/
#define MBOX_TX_QUEUE_LEN 20
struct mbox_chan {
struct mbox_controller *mbox; /* Parent Controller */
unsigned txdone_method;
/* client */
struct mbox_client *cl;
struct completion tx_complete;
void *active_req;
unsigned msg_count, msg_free;
void *msg_data[MBOX_TX_QUEUE_LEN];
/* Access to the channel */
spinlock_t lock;
/* Private data for controller */
void *con_priv;
};
int mbox_controller_register(struct mbox_controller *mbox);
void mbox_chan_received_data(struct mbox_chan *chan, void *data);
void mbox_chan_txdone(struct mbox_chan *chan, int r);
void mbox_controller_unregister(struct mbox_controller *mbox);
#endif /* __MAILBOX_CONTROLLER_H */

View File

@ -563,6 +563,15 @@ struct x86_cpu_id {
#define X86_MODEL_ANY 0
#define X86_FEATURE_ANY 0 /* Same as FPU, you can't test for that */
/*
* Generic table type for matching CPU features.
* @feature: the bit number of the feature (0 - 65535)
*/
struct cpu_feature {
__u16 feature;
};
#define IPACK_ANY_FORMAT 0xff
#define IPACK_ANY_ID (~0)
struct ipack_device_id {

View File

@ -11,6 +11,7 @@
#ifndef __LINUX_SND_SOC_DPCM_H
#define __LINUX_SND_SOC_DPCM_H
#include <linux/slab.h>
#include <linux/list.h>
#include <sound/pcm.h>
@ -135,4 +136,25 @@ int soc_dpcm_be_digital_mute(struct snd_soc_pcm_runtime *fe, int mute);
int soc_dpcm_debugfs_add(struct snd_soc_pcm_runtime *rtd);
int soc_dpcm_runtime_update(struct snd_soc_dapm_widget *);
int dpcm_path_get(struct snd_soc_pcm_runtime *fe,
int stream, struct snd_soc_dapm_widget_list **list_);
int dpcm_process_paths(struct snd_soc_pcm_runtime *fe,
int stream, struct snd_soc_dapm_widget_list **list, int new);
int dpcm_be_dai_startup(struct snd_soc_pcm_runtime *fe, int stream);
int dpcm_be_dai_shutdown(struct snd_soc_pcm_runtime *fe, int stream);
void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream);
void dpcm_clear_pending_state(struct snd_soc_pcm_runtime *fe, int stream);
int dpcm_be_dai_hw_free(struct snd_soc_pcm_runtime *fe, int stream);
int dpcm_be_dai_hw_params(struct snd_soc_pcm_runtime *fe, int tream);
int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, int cmd);
int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream);
int dpcm_dapm_stream_event(struct snd_soc_pcm_runtime *fe, int dir,
int event);
static inline void dpcm_path_put(struct snd_soc_dapm_widget_list **list)
{
kfree(*list);
}
#endif

View File

@ -1063,6 +1063,7 @@ struct snd_soc_pcm_runtime {
/* Dynamic PCM BE runtime data */
struct snd_soc_dpcm_runtime dpcm[2];
int fe_compr;
long pmdown_time;
unsigned char pop_wait:1;

View File

@ -102,3 +102,4 @@ CONFIG_FUNCTION_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_STACK_TRACER=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_MAILBOX=y

View File

@ -174,6 +174,9 @@ int main(void)
DEVID_FIELD(x86_cpu_id, model);
DEVID_FIELD(x86_cpu_id, vendor);
DEVID(cpu_feature);
DEVID_FIELD(cpu_feature, feature);
DEVID(mei_cl_device_id);
DEVID_FIELD(mei_cl_device_id, name);

View File

@ -1133,6 +1133,16 @@ static int do_x86cpu_entry(const char *filename, void *symval,
}
ADD_TO_DEVTABLE("x86cpu", x86_cpu_id, do_x86cpu_entry);
/* LOOKS like cpu:type:*:feature:*FEAT* */
static int do_cpu_entry(const char *filename, void *symval, char *alias)
{
DEF_FIELD(symval, cpu_feature, feature);
sprintf(alias, "cpu:type:*:feature:*%04X*", feature);
return 1;
}
ADD_TO_DEVTABLE("cpu", cpu_feature, do_cpu_entry);
/* Looks like: mei:S */
static int do_mei_entry(const char *filename, void *symval,
char *alias)

View File

@ -24,6 +24,7 @@
#include <sound/compress_driver.h>
#include <sound/soc.h>
#include <sound/initval.h>
#include <sound/soc-dpcm.h>
static int soc_compr_open(struct snd_compr_stream *cstream)
{
@ -75,6 +76,98 @@ static int soc_compr_open(struct snd_compr_stream *cstream)
return ret;
}
static int soc_compr_open_fe(struct snd_compr_stream *cstream)
{
struct snd_soc_pcm_runtime *fe = cstream->private_data;
struct snd_pcm_substream *fe_substream = fe->pcm->streams[0].substream;
struct snd_soc_platform *platform = fe->platform;
struct snd_soc_dai *cpu_dai = fe->cpu_dai;
struct snd_soc_dai *codec_dai = fe->codec_dai;
struct snd_soc_dpcm *dpcm;
struct snd_soc_dapm_widget_list *list;
int stream;
int ret = 0;
if (cstream->direction == SND_COMPRESS_PLAYBACK)
stream = SNDRV_PCM_STREAM_PLAYBACK;
else
stream = SNDRV_PCM_STREAM_CAPTURE;
mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME);
if (platform->driver->compr_ops && platform->driver->compr_ops->open) {
ret = platform->driver->compr_ops->open(cstream);
if (ret < 0) {
pr_err("compress asoc: can't open platform %s\n", platform->name);
goto out;
}
}
if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->startup) {
ret = fe->dai_link->compr_ops->startup(cstream);
if (ret < 0) {
pr_err("compress asoc: %s startup failed\n", fe->dai_link->name);
goto machine_err;
}
}
fe->dpcm[stream].runtime = fe_substream->runtime;
if (dpcm_path_get(fe, stream, &list) <= 0) {
dev_dbg(fe->dev, "ASoC: %s no valid %s route\n",
fe->dai_link->name, stream ? "capture" : "playback");
}
/* calculate valid and active FE <-> BE dpcms */
dpcm_process_paths(fe, stream, &list, 1);
fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE;
ret = dpcm_be_dai_startup(fe, stream);
if (ret < 0) {
/* clean up all links */
list_for_each_entry(dpcm, &fe->dpcm[stream].be_clients, list_be)
dpcm->state = SND_SOC_DPCM_LINK_STATE_FREE;
dpcm_be_disconnect(fe, stream);
fe->dpcm[stream].runtime = NULL;
goto fe_err;
}
dpcm_clear_pending_state(fe, stream);
dpcm_path_put(&list);
fe->dpcm[stream].state = SND_SOC_DPCM_STATE_OPEN;
fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO;
if (cstream->direction == SND_COMPRESS_PLAYBACK) {
cpu_dai->playback_active++;
codec_dai->playback_active++;
} else {
cpu_dai->capture_active++;
codec_dai->capture_active++;
}
cpu_dai->active++;
codec_dai->active++;
fe->codec->active++;
mutex_unlock(&fe->card->mutex);
return 0;
fe_err:
if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->shutdown)
fe->dai_link->compr_ops->shutdown(cstream);
machine_err:
if (platform->driver->compr_ops && platform->driver->compr_ops->free)
platform->driver->compr_ops->free(cstream);
out:
fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO;
mutex_unlock(&fe->card->mutex);
return ret;
}
/*
* Power down the audio subsystem pmdown_time msecs after close is called.
* This is to ensure there are no pops or clicks in between any music tracks
@ -164,6 +257,65 @@ static int soc_compr_free(struct snd_compr_stream *cstream)
return 0;
}
static int soc_compr_free_fe(struct snd_compr_stream *cstream)
{
struct snd_soc_pcm_runtime *fe = cstream->private_data;
struct snd_soc_platform *platform = fe->platform;
struct snd_soc_dai *cpu_dai = fe->cpu_dai;
struct snd_soc_dai *codec_dai = fe->codec_dai;
struct snd_soc_dpcm *dpcm;
int stream, ret;
mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME);
if (cstream->direction == SND_COMPRESS_PLAYBACK) {
stream = SNDRV_PCM_STREAM_PLAYBACK;
cpu_dai->playback_active--;
codec_dai->playback_active--;
} else {
stream = SNDRV_PCM_STREAM_CAPTURE;
cpu_dai->capture_active--;
codec_dai->capture_active--;
}
cpu_dai->active--;
codec_dai->active--;
fe->codec->active--;
fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE;
ret = dpcm_be_dai_hw_free(fe, stream);
if (ret < 0)
dev_err(fe->dev, "compressed hw_free failed %d\n", ret);
ret = dpcm_be_dai_shutdown(fe, stream);
/* mark FE's links ready to prune */
list_for_each_entry(dpcm, &fe->dpcm[stream].be_clients, list_be)
dpcm->state = SND_SOC_DPCM_LINK_STATE_FREE;
if (stream == SNDRV_PCM_STREAM_PLAYBACK)
dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_STOP);
else
dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_STOP);
fe->dpcm[stream].state = SND_SOC_DPCM_STATE_CLOSE;
fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO;
dpcm_be_disconnect(fe, stream);
fe->dpcm[stream].runtime = NULL;
if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->shutdown)
fe->dai_link->compr_ops->shutdown(cstream);
if (platform->driver->compr_ops && platform->driver->compr_ops->free)
platform->driver->compr_ops->free(cstream);
mutex_unlock(&fe->card->mutex);
return 0;
}
static int soc_compr_trigger(struct snd_compr_stream *cstream, int cmd)
{
@ -194,6 +346,59 @@ static int soc_compr_trigger(struct snd_compr_stream *cstream, int cmd)
return ret;
}
static int soc_compr_trigger_fe(struct snd_compr_stream *cstream, int cmd)
{
struct snd_soc_pcm_runtime *fe = cstream->private_data;
struct snd_soc_platform *platform = fe->platform;
int ret = 0, stream;
if (cmd == SND_COMPR_TRIGGER_PARTIAL_DRAIN ||
cmd == SND_COMPR_TRIGGER_DRAIN) {
if (platform->driver->compr_ops &&
platform->driver->compr_ops->trigger)
return platform->driver->compr_ops->trigger(cstream, cmd);
}
if (cstream->direction == SND_COMPRESS_PLAYBACK)
stream = SNDRV_PCM_STREAM_PLAYBACK;
else
stream = SNDRV_PCM_STREAM_CAPTURE;
mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME);
if (platform->driver->compr_ops && platform->driver->compr_ops->trigger) {
ret = platform->driver->compr_ops->trigger(cstream, cmd);
if (ret < 0)
goto out;
}
fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE;
ret = dpcm_be_dai_trigger(fe, stream, cmd);
switch (cmd) {
case SNDRV_PCM_TRIGGER_START:
case SNDRV_PCM_TRIGGER_RESUME:
case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
fe->dpcm[stream].state = SND_SOC_DPCM_STATE_START;
break;
case SNDRV_PCM_TRIGGER_STOP:
case SNDRV_PCM_TRIGGER_SUSPEND:
fe->dpcm[stream].state = SND_SOC_DPCM_STATE_STOP;
break;
case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
fe->dpcm[stream].state = SND_SOC_DPCM_STATE_PAUSED;
break;
}
out:
fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO;
mutex_unlock(&fe->card->mutex);
return ret;
}
static int soc_compr_set_params(struct snd_compr_stream *cstream,
struct snd_compr_params *params)
{
@ -241,6 +446,64 @@ static int soc_compr_set_params(struct snd_compr_stream *cstream,
return ret;
}
static int soc_compr_set_params_fe(struct snd_compr_stream *cstream,
struct snd_compr_params *params)
{
struct snd_soc_pcm_runtime *fe = cstream->private_data;
struct snd_pcm_substream *fe_substream = fe->pcm->streams[0].substream;
struct snd_soc_platform *platform = fe->platform;
int ret = 0, stream;
if (cstream->direction == SND_COMPRESS_PLAYBACK)
stream = SNDRV_PCM_STREAM_PLAYBACK;
else
stream = SNDRV_PCM_STREAM_CAPTURE;
mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME);
if (platform->driver->compr_ops && platform->driver->compr_ops->set_params) {
ret = platform->driver->compr_ops->set_params(cstream, params);
if (ret < 0)
goto out;
}
if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->set_params) {
ret = fe->dai_link->compr_ops->set_params(cstream);
if (ret < 0)
goto out;
}
/*
* Create an empty hw_params for the BE as the machine driver must
* fix this up to match DSP decoder and ASRC configuration.
* I.e. machine driver fixup for compressed BE is mandatory.
*/
memset(&fe->dpcm[fe_substream->stream].hw_params, 0,
sizeof(struct snd_pcm_hw_params));
fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE;
ret = dpcm_be_dai_hw_params(fe, stream);
if (ret < 0)
goto out;
ret = dpcm_be_dai_prepare(fe, stream);
if (ret < 0)
goto out;
if (stream == SNDRV_PCM_STREAM_PLAYBACK)
dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_START);
else
dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_START);
fe->dpcm[stream].state = SND_SOC_DPCM_STATE_PREPARE;
out:
fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO;
mutex_unlock(&fe->card->mutex);
return ret;
}
static int soc_compr_get_params(struct snd_compr_stream *cstream,
struct snd_codec *params)
{
@ -335,7 +598,7 @@ static int soc_compr_copy(struct snd_compr_stream *cstream,
return ret;
}
static int sst_compr_set_metadata(struct snd_compr_stream *cstream,
static int soc_compr_set_metadata(struct snd_compr_stream *cstream,
struct snd_compr_metadata *metadata)
{
struct snd_soc_pcm_runtime *rtd = cstream->private_data;
@ -348,7 +611,7 @@ static int sst_compr_set_metadata(struct snd_compr_stream *cstream,
return ret;
}
static int sst_compr_get_metadata(struct snd_compr_stream *cstream,
static int soc_compr_get_metadata(struct snd_compr_stream *cstream,
struct snd_compr_metadata *metadata)
{
struct snd_soc_pcm_runtime *rtd = cstream->private_data;
@ -360,13 +623,14 @@ static int sst_compr_get_metadata(struct snd_compr_stream *cstream,
return ret;
}
/* ASoC Compress operations */
static struct snd_compr_ops soc_compr_ops = {
.open = soc_compr_open,
.free = soc_compr_free,
.set_params = soc_compr_set_params,
.set_metadata = sst_compr_set_metadata,
.get_metadata = sst_compr_get_metadata,
.set_metadata = soc_compr_set_metadata,
.get_metadata = soc_compr_get_metadata,
.get_params = soc_compr_get_params,
.trigger = soc_compr_trigger,
.pointer = soc_compr_pointer,
@ -375,6 +639,21 @@ static struct snd_compr_ops soc_compr_ops = {
.get_codec_caps = soc_compr_get_codec_caps
};
/* ASoC Dynamic Compress operations */
static struct snd_compr_ops soc_compr_dyn_ops = {
.open = soc_compr_open_fe,
.free = soc_compr_free_fe,
.set_params = soc_compr_set_params_fe,
.get_params = soc_compr_get_params,
.set_metadata = soc_compr_set_metadata,
.get_metadata = soc_compr_get_metadata,
.trigger = soc_compr_trigger_fe,
.pointer = soc_compr_pointer,
.ack = soc_compr_ack,
.get_caps = soc_compr_get_caps,
.get_codec_caps = soc_compr_get_codec_caps
};
/* create a new compress */
int soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num)
{
@ -383,6 +662,7 @@ int soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num)
struct snd_soc_dai *codec_dai = rtd->codec_dai;
struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
struct snd_compr *compr;
struct snd_pcm *be_pcm;
char new_name[64];
int ret = 0, direction = 0;
@ -410,7 +690,26 @@ int soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num)
ret = -ENOMEM;
goto compr_err;
}
memcpy(compr->ops, &soc_compr_ops, sizeof(soc_compr_ops));
if (rtd->dai_link->dynamic) {
snprintf(new_name, sizeof(new_name), "(%s)",
rtd->dai_link->stream_name);
ret = snd_pcm_new_internal(rtd->card->snd_card, new_name, num,
1, 0, &be_pcm);
if (ret < 0) {
dev_err(rtd->card->dev, "ASoC: can't create compressed for %s\n",
rtd->dai_link->name);
goto compr_err;
}
rtd->pcm = be_pcm;
rtd->fe_compr = 1;
be_pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream->private_data = rtd;
be_pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream->private_data = rtd;
memcpy(compr->ops, &soc_compr_dyn_ops, sizeof(soc_compr_dyn_ops));
} else
memcpy(compr->ops, &soc_compr_ops, sizeof(soc_compr_ops));
/* Add copy callback for not memory mapped DSPs */
if (platform->driver->compr_ops && platform->driver->compr_ops->copy)

View File

@ -34,7 +34,7 @@
#define DPCM_MAX_BE_USERS 8
/* DPCM stream event, send event to FE and all active BEs. */
static int dpcm_dapm_stream_event(struct snd_soc_pcm_runtime *fe, int dir,
int dpcm_dapm_stream_event(struct snd_soc_pcm_runtime *fe, int dir,
int event)
{
struct snd_soc_dpcm *dpcm;
@ -758,7 +758,7 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe,
}
/* disconnect a BE and FE */
static void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream)
void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream)
{
struct snd_soc_dpcm *dpcm, *d;
@ -854,7 +854,7 @@ static int widget_in_list(struct snd_soc_dapm_widget_list *list,
return 0;
}
static int dpcm_path_get(struct snd_soc_pcm_runtime *fe,
int dpcm_path_get(struct snd_soc_pcm_runtime *fe,
int stream, struct snd_soc_dapm_widget_list **list_)
{
struct snd_soc_dai *cpu_dai = fe->cpu_dai;
@ -876,11 +876,6 @@ static int dpcm_path_get(struct snd_soc_pcm_runtime *fe,
return paths;
}
static inline void dpcm_path_put(struct snd_soc_dapm_widget_list **list)
{
kfree(*list);
}
static int dpcm_prune_paths(struct snd_soc_pcm_runtime *fe, int stream,
struct snd_soc_dapm_widget_list **list_)
{
@ -950,7 +945,7 @@ static int dpcm_add_paths(struct snd_soc_pcm_runtime *fe, int stream,
continue;
/* don't connect if FE is not running */
if (!fe->dpcm[stream].runtime)
if (!fe->dpcm[stream].runtime && !fe->fe_compr)
continue;
/* newly connected FE and BE */
@ -975,7 +970,7 @@ static int dpcm_add_paths(struct snd_soc_pcm_runtime *fe, int stream,
* Find the corresponding BE DAIs that source or sink audio to this
* FE substream.
*/
static int dpcm_process_paths(struct snd_soc_pcm_runtime *fe,
int dpcm_process_paths(struct snd_soc_pcm_runtime *fe,
int stream, struct snd_soc_dapm_widget_list **list, int new)
{
if (new)
@ -984,7 +979,7 @@ static int dpcm_process_paths(struct snd_soc_pcm_runtime *fe,
return dpcm_prune_paths(fe, stream, list);
}
static void dpcm_clear_pending_state(struct snd_soc_pcm_runtime *fe, int stream)
void dpcm_clear_pending_state(struct snd_soc_pcm_runtime *fe, int stream)
{
struct snd_soc_dpcm *dpcm;
@ -1022,7 +1017,7 @@ static void dpcm_be_dai_startup_unwind(struct snd_soc_pcm_runtime *fe,
}
}
static int dpcm_be_dai_startup(struct snd_soc_pcm_runtime *fe, int stream)
int dpcm_be_dai_startup(struct snd_soc_pcm_runtime *fe, int stream)
{
struct snd_soc_dpcm *dpcm;
int err, count = 0;
@ -1164,7 +1159,7 @@ static int dpcm_fe_dai_startup(struct snd_pcm_substream *fe_substream)
return ret;
}
static int dpcm_be_dai_shutdown(struct snd_soc_pcm_runtime *fe, int stream)
int dpcm_be_dai_shutdown(struct snd_soc_pcm_runtime *fe, int stream)
{
struct snd_soc_dpcm *dpcm;
@ -1225,7 +1220,7 @@ static int dpcm_fe_dai_shutdown(struct snd_pcm_substream *substream)
return 0;
}
static int dpcm_be_dai_hw_free(struct snd_soc_pcm_runtime *fe, int stream)
int dpcm_be_dai_hw_free(struct snd_soc_pcm_runtime *fe, int stream)
{
struct snd_soc_dpcm *dpcm;
@ -1290,7 +1285,7 @@ static int dpcm_fe_dai_hw_free(struct snd_pcm_substream *substream)
return 0;
}
static int dpcm_be_dai_hw_params(struct snd_soc_pcm_runtime *fe, int stream)
int dpcm_be_dai_hw_params(struct snd_soc_pcm_runtime *fe, int stream)
{
struct snd_soc_dpcm *dpcm;
int ret;
@ -1420,7 +1415,7 @@ static int dpcm_do_trigger(struct snd_soc_dpcm *dpcm,
return ret;
}
static int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
int cmd)
{
struct snd_soc_dpcm *dpcm;
@ -1588,7 +1583,7 @@ static int dpcm_fe_dai_trigger(struct snd_pcm_substream *substream, int cmd)
return ret;
}
static int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream)
int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream)
{
struct snd_soc_dpcm *dpcm;
int ret = 0;