mirror of
https://github.com/torvalds/linux.git
synced 2026-05-26 08:02:27 +02:00
Merge patch series "riscv: Add Zicbop & prefetchw support"
Alexandre Ghiti <alexghiti@rivosinc.com> says: I found this lost series developed by Guo so here is a respin with the comments on v2 applied. This patch series adds Zicbop support and then enables the Linux prefetch features. * patches from https://lore.kernel.org/r/20250421142441.395849-1-alexghiti@rivosinc.com: riscv: xchg: Prefetch the destination word for sc.w riscv: Add ARCH_HAS_PREFETCH[W] support with Zicbop riscv: Add support for Zicbop riscv: Introduce Zicbop instructions Link: https://lore.kernel.org/r/20250421142441.395849-1-alexghiti@rivosinc.com Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>
This commit is contained in:
commit
847689d2a0
|
|
@ -847,6 +847,21 @@ config RISCV_ISA_ZICBOZ
|
|||
|
||||
If you don't know what to do here, say Y.
|
||||
|
||||
config RISCV_ISA_ZICBOP
|
||||
bool "Zicbop extension support for cache block prefetch"
|
||||
depends on MMU
|
||||
depends on RISCV_ALTERNATIVE
|
||||
default y
|
||||
help
|
||||
Adds support to dynamically detect the presence of the ZICBOP
|
||||
extension (Cache Block Prefetch Operations) and enable its
|
||||
usage.
|
||||
|
||||
The Zicbop extension can be used to prefetch cache blocks for
|
||||
read/write fetch.
|
||||
|
||||
If you don't know what to do here, say Y.
|
||||
|
||||
config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
|
||||
def_bool y
|
||||
# https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc
|
||||
|
|
|
|||
|
|
@ -14,11 +14,6 @@
|
|||
#include <asm/cmpxchg.h>
|
||||
#include <asm/fence.h>
|
||||
|
||||
#define nop() __asm__ __volatile__ ("nop")
|
||||
#define __nops(n) ".rept " #n "\nnop\n.endr\n"
|
||||
#define nops(n) __asm__ __volatile__ (__nops(n))
|
||||
|
||||
|
||||
/* These barriers need to enforce ordering on both devices or memory. */
|
||||
#define __mb() RISCV_FENCE(iorw, iorw)
|
||||
#define __rmb() RISCV_FENCE(ir, ir)
|
||||
|
|
|
|||
|
|
@ -80,6 +80,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local);
|
|||
|
||||
extern unsigned int riscv_cbom_block_size;
|
||||
extern unsigned int riscv_cboz_block_size;
|
||||
extern unsigned int riscv_cbop_block_size;
|
||||
void riscv_init_cbo_blocksizes(void);
|
||||
|
||||
#ifdef CONFIG_RISCV_DMA_NONCOHERENT
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
#include <asm/hwcap.h>
|
||||
#include <asm/insn-def.h>
|
||||
#include <asm/cpufeature-macros.h>
|
||||
#include <asm/processor.h>
|
||||
|
||||
#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \
|
||||
swap_append, r, p, n) \
|
||||
|
|
@ -37,6 +38,7 @@
|
|||
\
|
||||
__asm__ __volatile__ ( \
|
||||
prepend \
|
||||
PREFETCHW_ASM(%5) \
|
||||
"0: lr.w %0, %2\n" \
|
||||
" and %1, %0, %z4\n" \
|
||||
" or %1, %1, %z3\n" \
|
||||
|
|
@ -44,7 +46,7 @@
|
|||
" bnez %1, 0b\n" \
|
||||
sc_append \
|
||||
: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
|
||||
: "rJ" (__newx), "rJ" (~__mask) \
|
||||
: "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \
|
||||
: "memory"); \
|
||||
\
|
||||
r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
|
||||
|
|
|
|||
|
|
@ -105,6 +105,7 @@
|
|||
#define RISCV_ISA_EXT_ZVFBFWMA 96
|
||||
#define RISCV_ISA_EXT_ZAAMO 97
|
||||
#define RISCV_ISA_EXT_ZALRSC 98
|
||||
#define RISCV_ISA_EXT_ZICBOP 99
|
||||
|
||||
#define RISCV_ISA_EXT_XLINUXENVCFG 127
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,13 @@
|
|||
#define INSN_I_RD_SHIFT 7
|
||||
#define INSN_I_OPCODE_SHIFT 0
|
||||
|
||||
#define INSN_S_SIMM7_SHIFT 25
|
||||
#define INSN_S_RS2_SHIFT 20
|
||||
#define INSN_S_RS1_SHIFT 15
|
||||
#define INSN_S_FUNC3_SHIFT 12
|
||||
#define INSN_S_SIMM5_SHIFT 7
|
||||
#define INSN_S_OPCODE_SHIFT 0
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
|
||||
#ifdef CONFIG_AS_HAS_INSN
|
||||
|
|
@ -30,6 +37,10 @@
|
|||
.insn i \opcode, \func3, \rd, \rs1, \simm12
|
||||
.endm
|
||||
|
||||
.macro insn_s, opcode, func3, rs2, simm12, rs1
|
||||
.insn s \opcode, \func3, \rs2, \simm12(\rs1)
|
||||
.endm
|
||||
|
||||
#else
|
||||
|
||||
#include <asm/gpr-num.h>
|
||||
|
|
@ -51,10 +62,20 @@
|
|||
(\simm12 << INSN_I_SIMM12_SHIFT))
|
||||
.endm
|
||||
|
||||
.macro insn_s, opcode, func3, rs2, simm12, rs1
|
||||
.4byte ((\opcode << INSN_S_OPCODE_SHIFT) | \
|
||||
(\func3 << INSN_S_FUNC3_SHIFT) | \
|
||||
(.L__gpr_num_\rs2 << INSN_S_RS2_SHIFT) | \
|
||||
(.L__gpr_num_\rs1 << INSN_S_RS1_SHIFT) | \
|
||||
((\simm12 & 0x1f) << INSN_S_SIMM5_SHIFT) | \
|
||||
(((\simm12 >> 5) & 0x7f) << INSN_S_SIMM7_SHIFT))
|
||||
.endm
|
||||
|
||||
#endif
|
||||
|
||||
#define __INSN_R(...) insn_r __VA_ARGS__
|
||||
#define __INSN_I(...) insn_i __VA_ARGS__
|
||||
#define __INSN_S(...) insn_s __VA_ARGS__
|
||||
|
||||
#else /* ! __ASSEMBLY__ */
|
||||
|
||||
|
|
@ -66,6 +87,9 @@
|
|||
#define __INSN_I(opcode, func3, rd, rs1, simm12) \
|
||||
".insn i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n"
|
||||
|
||||
#define __INSN_S(opcode, func3, rs2, simm12, rs1) \
|
||||
".insn s " opcode ", " func3 ", " rs2 ", " simm12 "(" rs1 ")\n"
|
||||
|
||||
#else
|
||||
|
||||
#include <linux/stringify.h>
|
||||
|
|
@ -92,12 +116,26 @@
|
|||
" (\\simm12 << " __stringify(INSN_I_SIMM12_SHIFT) "))\n" \
|
||||
" .endm\n"
|
||||
|
||||
#define DEFINE_INSN_S \
|
||||
__DEFINE_ASM_GPR_NUMS \
|
||||
" .macro insn_s, opcode, func3, rs2, simm12, rs1\n" \
|
||||
" .4byte ((\\opcode << " __stringify(INSN_S_OPCODE_SHIFT) ") |" \
|
||||
" (\\func3 << " __stringify(INSN_S_FUNC3_SHIFT) ") |" \
|
||||
" (.L__gpr_num_\\rs2 << " __stringify(INSN_S_RS2_SHIFT) ") |" \
|
||||
" (.L__gpr_num_\\rs1 << " __stringify(INSN_S_RS1_SHIFT) ") |" \
|
||||
" ((\\simm12 & 0x1f) << " __stringify(INSN_S_SIMM5_SHIFT) ") |" \
|
||||
" (((\\simm12 >> 5) & 0x7f) << " __stringify(INSN_S_SIMM7_SHIFT) "))\n" \
|
||||
" .endm\n"
|
||||
|
||||
#define UNDEFINE_INSN_R \
|
||||
" .purgem insn_r\n"
|
||||
|
||||
#define UNDEFINE_INSN_I \
|
||||
" .purgem insn_i\n"
|
||||
|
||||
#define UNDEFINE_INSN_S \
|
||||
" .purgem insn_s\n"
|
||||
|
||||
#define __INSN_R(opcode, func3, func7, rd, rs1, rs2) \
|
||||
DEFINE_INSN_R \
|
||||
"insn_r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n" \
|
||||
|
|
@ -108,6 +146,11 @@
|
|||
"insn_i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n" \
|
||||
UNDEFINE_INSN_I
|
||||
|
||||
#define __INSN_S(opcode, func3, rs2, simm12, rs1) \
|
||||
DEFINE_INSN_S \
|
||||
"insn_s " opcode ", " func3 ", " rs2 ", " simm12 ", " rs1 "\n" \
|
||||
UNDEFINE_INSN_S
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* ! __ASSEMBLY__ */
|
||||
|
|
@ -120,6 +163,10 @@
|
|||
__INSN_I(RV_##opcode, RV_##func3, RV_##rd, \
|
||||
RV_##rs1, RV_##simm12)
|
||||
|
||||
#define INSN_S(opcode, func3, rs2, simm12, rs1) \
|
||||
__INSN_S(RV_##opcode, RV_##func3, RV_##rs2, \
|
||||
RV_##simm12, RV_##rs1)
|
||||
|
||||
#define RV_OPCODE(v) __ASM_STR(v)
|
||||
#define RV_FUNC3(v) __ASM_STR(v)
|
||||
#define RV_FUNC7(v) __ASM_STR(v)
|
||||
|
|
@ -133,6 +180,7 @@
|
|||
#define RV___RS2(v) __RV_REG(v)
|
||||
|
||||
#define RV_OPCODE_MISC_MEM RV_OPCODE(15)
|
||||
#define RV_OPCODE_OP_IMM RV_OPCODE(19)
|
||||
#define RV_OPCODE_SYSTEM RV_OPCODE(115)
|
||||
|
||||
#define HFENCE_VVMA(vaddr, asid) \
|
||||
|
|
@ -196,6 +244,18 @@
|
|||
INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \
|
||||
RS1(base), SIMM12(4))
|
||||
|
||||
#define PREFETCH_I(base, offset) \
|
||||
INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(0), \
|
||||
SIMM12((offset) & 0xfe0), RS1(base))
|
||||
|
||||
#define PREFETCH_R(base, offset) \
|
||||
INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(1), \
|
||||
SIMM12((offset) & 0xfe0), RS1(base))
|
||||
|
||||
#define PREFETCH_W(base, offset) \
|
||||
INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(3), \
|
||||
SIMM12((offset) & 0xfe0), RS1(base))
|
||||
|
||||
#define RISCV_PAUSE ".4byte 0x100000f"
|
||||
#define ZAWRS_WRS_NTO ".4byte 0x00d00073"
|
||||
#define ZAWRS_WRS_STO ".4byte 0x01d00073"
|
||||
|
|
@ -203,4 +263,10 @@
|
|||
|
||||
#define RISCV_INSN_NOP4 _AC(0x00000013, U)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#define nop() __asm__ __volatile__ ("nop")
|
||||
#define __nops(n) ".rept " #n "\nnop\n.endr\n"
|
||||
#define nops(n) __asm__ __volatile__ (__nops(n))
|
||||
#endif
|
||||
|
||||
#endif /* __ASM_INSN_DEF_H */
|
||||
|
|
|
|||
|
|
@ -13,6 +13,9 @@
|
|||
#include <vdso/processor.h>
|
||||
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/insn-def.h>
|
||||
#include <asm/alternative-macros.h>
|
||||
#include <asm/hwcap.h>
|
||||
|
||||
#define arch_get_mmap_end(addr, len, flags) \
|
||||
({ \
|
||||
|
|
@ -52,7 +55,6 @@
|
|||
#endif
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <linux/cpumask.h>
|
||||
|
||||
struct task_struct;
|
||||
struct pt_regs;
|
||||
|
|
@ -141,6 +143,27 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
|
|||
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->epc)
|
||||
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->sp)
|
||||
|
||||
#define PREFETCH_ASM(x) \
|
||||
ALTERNATIVE(__nops(1), PREFETCH_R(x, 0), 0, \
|
||||
RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP)
|
||||
|
||||
#define PREFETCHW_ASM(x) \
|
||||
ALTERNATIVE(__nops(1), PREFETCH_W(x, 0), 0, \
|
||||
RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP)
|
||||
|
||||
#ifdef CONFIG_RISCV_ISA_ZICBOP
|
||||
#define ARCH_HAS_PREFETCH
|
||||
static inline void prefetch(const void *x)
|
||||
{
|
||||
__asm__ __volatile__(PREFETCH_ASM(%0) : : "r" (x) : "memory");
|
||||
}
|
||||
|
||||
#define ARCH_HAS_PREFETCHW
|
||||
static inline void prefetchw(const void *x)
|
||||
{
|
||||
__asm__ __volatile__(PREFETCHW_ASM(%0) : : "r" (x) : "memory");
|
||||
}
|
||||
#endif /* CONFIG_RISCV_ISA_ZICBOP */
|
||||
|
||||
/* Do necessary setup to start up a newly executed thread. */
|
||||
extern void start_thread(struct pt_regs *regs,
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@
|
|||
#define NUM_ALPHA_EXTS ('z' - 'a' + 1)
|
||||
|
||||
static bool any_cpu_has_zicboz;
|
||||
static bool any_cpu_has_zicbop;
|
||||
static bool any_cpu_has_zicbom;
|
||||
|
||||
unsigned long elf_hwcap __read_mostly;
|
||||
|
|
@ -119,6 +120,21 @@ static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int riscv_ext_zicbop_validate(const struct riscv_isa_ext_data *data,
|
||||
const unsigned long *isa_bitmap)
|
||||
{
|
||||
if (!riscv_cbop_block_size) {
|
||||
pr_err("Zicbop detected in ISA string, disabling as no cbop-block-size found\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (!is_power_of_2(riscv_cbop_block_size)) {
|
||||
pr_err("Zicbop disabled as cbop-block-size present, but is not a power-of-2\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
any_cpu_has_zicbop = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int riscv_ext_f_validate(const struct riscv_isa_ext_data *data,
|
||||
const unsigned long *isa_bitmap)
|
||||
{
|
||||
|
|
@ -442,6 +458,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
|
|||
__RISCV_ISA_EXT_SUPERSET_VALIDATE(v, RISCV_ISA_EXT_v, riscv_v_exts, riscv_ext_vector_float_validate),
|
||||
__RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h),
|
||||
__RISCV_ISA_EXT_SUPERSET_VALIDATE(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts, riscv_ext_zicbom_validate),
|
||||
__RISCV_ISA_EXT_DATA_VALIDATE(zicbop, RISCV_ISA_EXT_ZICBOP, riscv_ext_zicbop_validate),
|
||||
__RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts, riscv_ext_zicboz_validate),
|
||||
__RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE),
|
||||
__RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR),
|
||||
|
|
@ -1112,6 +1129,10 @@ void __init riscv_user_isa_enable(void)
|
|||
current->thread.envcfg |= ENVCFG_CBCFE;
|
||||
else if (any_cpu_has_zicbom)
|
||||
pr_warn("Zicbom disabled as it is unavailable on some harts\n");
|
||||
|
||||
if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOP) &&
|
||||
any_cpu_has_zicbop)
|
||||
pr_warn("Zicbop disabled as it is unavailable on some harts\n");
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RISCV_ALTERNATIVE
|
||||
|
|
|
|||
|
|
@ -114,6 +114,9 @@ EXPORT_SYMBOL_GPL(riscv_cbom_block_size);
|
|||
unsigned int riscv_cboz_block_size;
|
||||
EXPORT_SYMBOL_GPL(riscv_cboz_block_size);
|
||||
|
||||
unsigned int riscv_cbop_block_size;
|
||||
EXPORT_SYMBOL_GPL(riscv_cbop_block_size);
|
||||
|
||||
static void __init cbo_get_block_size(struct device_node *node,
|
||||
const char *name, u32 *block_size,
|
||||
unsigned long *first_hartid)
|
||||
|
|
@ -138,8 +141,8 @@ static void __init cbo_get_block_size(struct device_node *node,
|
|||
|
||||
void __init riscv_init_cbo_blocksizes(void)
|
||||
{
|
||||
unsigned long cbom_hartid, cboz_hartid;
|
||||
u32 cbom_block_size = 0, cboz_block_size = 0;
|
||||
unsigned long cbom_hartid, cboz_hartid, cbop_hartid;
|
||||
u32 cbom_block_size = 0, cboz_block_size = 0, cbop_block_size = 0;
|
||||
struct device_node *node;
|
||||
struct acpi_table_header *rhct;
|
||||
acpi_status status;
|
||||
|
|
@ -151,13 +154,15 @@ void __init riscv_init_cbo_blocksizes(void)
|
|||
&cbom_block_size, &cbom_hartid);
|
||||
cbo_get_block_size(node, "riscv,cboz-block-size",
|
||||
&cboz_block_size, &cboz_hartid);
|
||||
cbo_get_block_size(node, "riscv,cbop-block-size",
|
||||
&cbop_block_size, &cbop_hartid);
|
||||
}
|
||||
} else {
|
||||
status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct);
|
||||
if (ACPI_FAILURE(status))
|
||||
return;
|
||||
|
||||
acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, NULL);
|
||||
acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, &cbop_block_size);
|
||||
acpi_put_table((struct acpi_table_header *)rhct);
|
||||
}
|
||||
|
||||
|
|
@ -166,6 +171,9 @@ void __init riscv_init_cbo_blocksizes(void)
|
|||
|
||||
if (cboz_block_size)
|
||||
riscv_cboz_block_size = cboz_block_size;
|
||||
|
||||
if (cbop_block_size)
|
||||
riscv_cbop_block_size = cbop_block_size;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user