lib/crc: arm64: Assume a little-endian kernel

Since support for big-endian arm64 kernels was removed, the CPU_LE()
macro now unconditionally emits the code it is passed, and the CPU_BE()
macro now unconditionally discards the code it is passed.

Simplify the assembly code in lib/crc/arm64/ accordingly.

Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20260401004431.151432-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
This commit is contained in:
Eric Biggers 2026-03-31 17:44:31 -07:00
parent 63432fd625
commit 5276ea17a2
2 changed files with 30 additions and 35 deletions

View File

@ -181,13 +181,13 @@ SYM_FUNC_END(__pmull_p8_16x64)
pmull16x64_\p fold_consts, \reg1, v8
CPU_LE( rev64 v11.16b, v11.16b )
CPU_LE( rev64 v12.16b, v12.16b )
rev64 v11.16b, v11.16b
rev64 v12.16b, v12.16b
pmull16x64_\p fold_consts, \reg2, v9
CPU_LE( ext v11.16b, v11.16b, v11.16b, #8 )
CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
ext v11.16b, v11.16b, v11.16b, #8
ext v12.16b, v12.16b, v12.16b, #8
eor \reg1\().16b, \reg1\().16b, v8.16b
eor \reg2\().16b, \reg2\().16b, v9.16b
@ -220,22 +220,22 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
ldp q4, q5, [buf, #0x40]
ldp q6, q7, [buf, #0x60]
add buf, buf, #0x80
CPU_LE( rev64 v0.16b, v0.16b )
CPU_LE( rev64 v1.16b, v1.16b )
CPU_LE( rev64 v2.16b, v2.16b )
CPU_LE( rev64 v3.16b, v3.16b )
CPU_LE( rev64 v4.16b, v4.16b )
CPU_LE( rev64 v5.16b, v5.16b )
CPU_LE( rev64 v6.16b, v6.16b )
CPU_LE( rev64 v7.16b, v7.16b )
CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 )
CPU_LE( ext v2.16b, v2.16b, v2.16b, #8 )
CPU_LE( ext v3.16b, v3.16b, v3.16b, #8 )
CPU_LE( ext v4.16b, v4.16b, v4.16b, #8 )
CPU_LE( ext v5.16b, v5.16b, v5.16b, #8 )
CPU_LE( ext v6.16b, v6.16b, v6.16b, #8 )
CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
rev64 v0.16b, v0.16b
rev64 v1.16b, v1.16b
rev64 v2.16b, v2.16b
rev64 v3.16b, v3.16b
rev64 v4.16b, v4.16b
rev64 v5.16b, v5.16b
rev64 v6.16b, v6.16b
rev64 v7.16b, v7.16b
ext v0.16b, v0.16b, v0.16b, #8
ext v1.16b, v1.16b, v1.16b, #8
ext v2.16b, v2.16b, v2.16b, #8
ext v3.16b, v3.16b, v3.16b, #8
ext v4.16b, v4.16b, v4.16b, #8
ext v5.16b, v5.16b, v5.16b, #8
ext v6.16b, v6.16b, v6.16b, #8
ext v7.16b, v7.16b, v7.16b, #8
// XOR the first 16 data *bits* with the initial CRC value.
movi v8.16b, #0
@ -288,8 +288,8 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
pmull16x64_\p fold_consts, v7, v8
eor v7.16b, v7.16b, v8.16b
ldr q0, [buf], #16
CPU_LE( rev64 v0.16b, v0.16b )
CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
rev64 v0.16b, v0.16b
ext v0.16b, v0.16b, v0.16b, #8
eor v7.16b, v7.16b, v0.16b
subs len, len, #16
b.ge .Lfold_16_bytes_loop_\@
@ -310,8 +310,8 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
// v0 = last 16 original data bytes
add buf, buf, len
ldr q0, [buf, #-16]
CPU_LE( rev64 v0.16b, v0.16b )
CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
rev64 v0.16b, v0.16b
ext v0.16b, v0.16b, v0.16b, #8
// v1 = high order part of second chunk: v7 left-shifted by 'len' bytes.
adr_l x4, .Lbyteshift_table + 16
@ -344,8 +344,8 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
// Load the first 16 data bytes.
ldr q7, [buf], #0x10
CPU_LE( rev64 v7.16b, v7.16b )
CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
rev64 v7.16b, v7.16b
ext v7.16b, v7.16b, v7.16b, #8
// XOR the first 16 data *bits* with the initial CRC value.
movi v0.16b, #0
@ -382,8 +382,8 @@ SYM_FUNC_START(crc_t10dif_pmull_p8)
crc_t10dif_pmull p8
CPU_LE( rev64 v7.16b, v7.16b )
CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
rev64 v7.16b, v7.16b
ext v7.16b, v7.16b, v7.16b, #8
str q7, [x3]
frame_pop

View File

@ -29,24 +29,19 @@
.endm
.macro hwordle, reg
CPU_BE( rev16 \reg, \reg )
.endm
.macro hwordbe, reg
CPU_LE( rev \reg, \reg )
rev \reg, \reg
rbit \reg, \reg
CPU_BE( lsr \reg, \reg, #16 )
.endm
.macro le, regs:vararg
.irp r, \regs
CPU_BE( rev \r, \r )
.endr
.endm
.macro be, regs:vararg
.irp r, \regs
CPU_LE( rev \r, \r )
rev \r, \r
.endr
.irp r, \regs
rbit \r, \r