Merge patch series "riscv: Add Zicbop & prefetchw support"

Alexandre Ghiti <alexghiti@rivosinc.com> says:

I found this lost series developed by Guo so here is a respin with the
comments on v2 applied.

This patch series adds Zicbop support and then enables the Linux
prefetch features.

* patches from https://lore.kernel.org/r/20250421142441.395849-1-alexghiti@rivosinc.com:
  riscv: xchg: Prefetch the destination word for sc.w
  riscv: Add ARCH_HAS_PREFETCH[W] support with Zicbop
  riscv: Add support for Zicbop
  riscv: Introduce Zicbop instructions

Link: https://lore.kernel.org/r/20250421142441.395849-1-alexghiti@rivosinc.com
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>
This commit is contained in:
Alexandre Ghiti 2025-05-06 08:19:57 +00:00 committed by Palmer Dabbelt
commit 847689d2a0
No known key found for this signature in database
GPG Key ID: 2E1319F35FBB1889
9 changed files with 142 additions and 10 deletions

View File

@ -847,6 +847,21 @@ config RISCV_ISA_ZICBOZ
If you don't know what to do here, say Y.
config RISCV_ISA_ZICBOP
bool "Zicbop extension support for cache block prefetch"
depends on MMU
depends on RISCV_ALTERNATIVE
default y
help
Adds support to dynamically detect the presence of the ZICBOP
extension (Cache Block Prefetch Operations) and enable its
usage.
The Zicbop extension can be used to prefetch cache blocks for
read/write fetch.
If you don't know what to do here, say Y.
config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
def_bool y
# https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc

View File

@ -14,11 +14,6 @@
#include <asm/cmpxchg.h>
#include <asm/fence.h>
#define nop() __asm__ __volatile__ ("nop")
#define __nops(n) ".rept " #n "\nnop\n.endr\n"
#define nops(n) __asm__ __volatile__ (__nops(n))
/* These barriers need to enforce ordering on both devices or memory. */
#define __mb() RISCV_FENCE(iorw, iorw)
#define __rmb() RISCV_FENCE(ir, ir)

View File

@ -80,6 +80,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local);
extern unsigned int riscv_cbom_block_size;
extern unsigned int riscv_cboz_block_size;
extern unsigned int riscv_cbop_block_size;
void riscv_init_cbo_blocksizes(void);
#ifdef CONFIG_RISCV_DMA_NONCOHERENT

View File

@ -13,6 +13,7 @@
#include <asm/hwcap.h>
#include <asm/insn-def.h>
#include <asm/cpufeature-macros.h>
#include <asm/processor.h>
#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \
swap_append, r, p, n) \
@ -37,6 +38,7 @@
\
__asm__ __volatile__ ( \
prepend \
PREFETCHW_ASM(%5) \
"0: lr.w %0, %2\n" \
" and %1, %0, %z4\n" \
" or %1, %1, %z3\n" \
@ -44,7 +46,7 @@
" bnez %1, 0b\n" \
sc_append \
: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
: "rJ" (__newx), "rJ" (~__mask) \
: "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \
: "memory"); \
\
r = (__typeof__(*(p)))((__retx & __mask) >> __s); \

View File

@ -105,6 +105,7 @@
#define RISCV_ISA_EXT_ZVFBFWMA 96
#define RISCV_ISA_EXT_ZAAMO 97
#define RISCV_ISA_EXT_ZALRSC 98
#define RISCV_ISA_EXT_ZICBOP 99
#define RISCV_ISA_EXT_XLINUXENVCFG 127

View File

@ -18,6 +18,13 @@
#define INSN_I_RD_SHIFT 7
#define INSN_I_OPCODE_SHIFT 0
#define INSN_S_SIMM7_SHIFT 25
#define INSN_S_RS2_SHIFT 20
#define INSN_S_RS1_SHIFT 15
#define INSN_S_FUNC3_SHIFT 12
#define INSN_S_SIMM5_SHIFT 7
#define INSN_S_OPCODE_SHIFT 0
#ifdef __ASSEMBLY__
#ifdef CONFIG_AS_HAS_INSN
@ -30,6 +37,10 @@
.insn i \opcode, \func3, \rd, \rs1, \simm12
.endm
.macro insn_s, opcode, func3, rs2, simm12, rs1
.insn s \opcode, \func3, \rs2, \simm12(\rs1)
.endm
#else
#include <asm/gpr-num.h>
@ -51,10 +62,20 @@
(\simm12 << INSN_I_SIMM12_SHIFT))
.endm
.macro insn_s, opcode, func3, rs2, simm12, rs1
.4byte ((\opcode << INSN_S_OPCODE_SHIFT) | \
(\func3 << INSN_S_FUNC3_SHIFT) | \
(.L__gpr_num_\rs2 << INSN_S_RS2_SHIFT) | \
(.L__gpr_num_\rs1 << INSN_S_RS1_SHIFT) | \
((\simm12 & 0x1f) << INSN_S_SIMM5_SHIFT) | \
(((\simm12 >> 5) & 0x7f) << INSN_S_SIMM7_SHIFT))
.endm
#endif
#define __INSN_R(...) insn_r __VA_ARGS__
#define __INSN_I(...) insn_i __VA_ARGS__
#define __INSN_S(...) insn_s __VA_ARGS__
#else /* ! __ASSEMBLY__ */
@ -66,6 +87,9 @@
#define __INSN_I(opcode, func3, rd, rs1, simm12) \
".insn i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n"
#define __INSN_S(opcode, func3, rs2, simm12, rs1) \
".insn s " opcode ", " func3 ", " rs2 ", " simm12 "(" rs1 ")\n"
#else
#include <linux/stringify.h>
@ -92,12 +116,26 @@
" (\\simm12 << " __stringify(INSN_I_SIMM12_SHIFT) "))\n" \
" .endm\n"
#define DEFINE_INSN_S \
__DEFINE_ASM_GPR_NUMS \
" .macro insn_s, opcode, func3, rs2, simm12, rs1\n" \
" .4byte ((\\opcode << " __stringify(INSN_S_OPCODE_SHIFT) ") |" \
" (\\func3 << " __stringify(INSN_S_FUNC3_SHIFT) ") |" \
" (.L__gpr_num_\\rs2 << " __stringify(INSN_S_RS2_SHIFT) ") |" \
" (.L__gpr_num_\\rs1 << " __stringify(INSN_S_RS1_SHIFT) ") |" \
" ((\\simm12 & 0x1f) << " __stringify(INSN_S_SIMM5_SHIFT) ") |" \
" (((\\simm12 >> 5) & 0x7f) << " __stringify(INSN_S_SIMM7_SHIFT) "))\n" \
" .endm\n"
#define UNDEFINE_INSN_R \
" .purgem insn_r\n"
#define UNDEFINE_INSN_I \
" .purgem insn_i\n"
#define UNDEFINE_INSN_S \
" .purgem insn_s\n"
#define __INSN_R(opcode, func3, func7, rd, rs1, rs2) \
DEFINE_INSN_R \
"insn_r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n" \
@ -108,6 +146,11 @@
"insn_i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n" \
UNDEFINE_INSN_I
#define __INSN_S(opcode, func3, rs2, simm12, rs1) \
DEFINE_INSN_S \
"insn_s " opcode ", " func3 ", " rs2 ", " simm12 ", " rs1 "\n" \
UNDEFINE_INSN_S
#endif
#endif /* ! __ASSEMBLY__ */
@ -120,6 +163,10 @@
__INSN_I(RV_##opcode, RV_##func3, RV_##rd, \
RV_##rs1, RV_##simm12)
#define INSN_S(opcode, func3, rs2, simm12, rs1) \
__INSN_S(RV_##opcode, RV_##func3, RV_##rs2, \
RV_##simm12, RV_##rs1)
#define RV_OPCODE(v) __ASM_STR(v)
#define RV_FUNC3(v) __ASM_STR(v)
#define RV_FUNC7(v) __ASM_STR(v)
@ -133,6 +180,7 @@
#define RV___RS2(v) __RV_REG(v)
#define RV_OPCODE_MISC_MEM RV_OPCODE(15)
#define RV_OPCODE_OP_IMM RV_OPCODE(19)
#define RV_OPCODE_SYSTEM RV_OPCODE(115)
#define HFENCE_VVMA(vaddr, asid) \
@ -196,6 +244,18 @@
INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \
RS1(base), SIMM12(4))
#define PREFETCH_I(base, offset) \
INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(0), \
SIMM12((offset) & 0xfe0), RS1(base))
#define PREFETCH_R(base, offset) \
INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(1), \
SIMM12((offset) & 0xfe0), RS1(base))
#define PREFETCH_W(base, offset) \
INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(3), \
SIMM12((offset) & 0xfe0), RS1(base))
#define RISCV_PAUSE ".4byte 0x100000f"
#define ZAWRS_WRS_NTO ".4byte 0x00d00073"
#define ZAWRS_WRS_STO ".4byte 0x01d00073"
@ -203,4 +263,10 @@
#define RISCV_INSN_NOP4 _AC(0x00000013, U)
#ifndef __ASSEMBLY__
#define nop() __asm__ __volatile__ ("nop")
#define __nops(n) ".rept " #n "\nnop\n.endr\n"
#define nops(n) __asm__ __volatile__ (__nops(n))
#endif
#endif /* __ASM_INSN_DEF_H */

View File

@ -13,6 +13,9 @@
#include <vdso/processor.h>
#include <asm/ptrace.h>
#include <asm/insn-def.h>
#include <asm/alternative-macros.h>
#include <asm/hwcap.h>
#define arch_get_mmap_end(addr, len, flags) \
({ \
@ -52,7 +55,6 @@
#endif
#ifndef __ASSEMBLY__
#include <linux/cpumask.h>
struct task_struct;
struct pt_regs;
@ -141,6 +143,27 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->epc)
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->sp)
#define PREFETCH_ASM(x) \
ALTERNATIVE(__nops(1), PREFETCH_R(x, 0), 0, \
RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP)
#define PREFETCHW_ASM(x) \
ALTERNATIVE(__nops(1), PREFETCH_W(x, 0), 0, \
RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP)
#ifdef CONFIG_RISCV_ISA_ZICBOP
#define ARCH_HAS_PREFETCH
static inline void prefetch(const void *x)
{
__asm__ __volatile__(PREFETCH_ASM(%0) : : "r" (x) : "memory");
}
#define ARCH_HAS_PREFETCHW
static inline void prefetchw(const void *x)
{
__asm__ __volatile__(PREFETCHW_ASM(%0) : : "r" (x) : "memory");
}
#endif /* CONFIG_RISCV_ISA_ZICBOP */
/* Do necessary setup to start up a newly executed thread. */
extern void start_thread(struct pt_regs *regs,

View File

@ -32,6 +32,7 @@
#define NUM_ALPHA_EXTS ('z' - 'a' + 1)
static bool any_cpu_has_zicboz;
static bool any_cpu_has_zicbop;
static bool any_cpu_has_zicbom;
unsigned long elf_hwcap __read_mostly;
@ -119,6 +120,21 @@ static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data,
return 0;
}
static int riscv_ext_zicbop_validate(const struct riscv_isa_ext_data *data,
const unsigned long *isa_bitmap)
{
if (!riscv_cbop_block_size) {
pr_err("Zicbop detected in ISA string, disabling as no cbop-block-size found\n");
return -EINVAL;
}
if (!is_power_of_2(riscv_cbop_block_size)) {
pr_err("Zicbop disabled as cbop-block-size present, but is not a power-of-2\n");
return -EINVAL;
}
any_cpu_has_zicbop = true;
return 0;
}
static int riscv_ext_f_validate(const struct riscv_isa_ext_data *data,
const unsigned long *isa_bitmap)
{
@ -442,6 +458,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_SUPERSET_VALIDATE(v, RISCV_ISA_EXT_v, riscv_v_exts, riscv_ext_vector_float_validate),
__RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h),
__RISCV_ISA_EXT_SUPERSET_VALIDATE(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts, riscv_ext_zicbom_validate),
__RISCV_ISA_EXT_DATA_VALIDATE(zicbop, RISCV_ISA_EXT_ZICBOP, riscv_ext_zicbop_validate),
__RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts, riscv_ext_zicboz_validate),
__RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE),
__RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR),
@ -1112,6 +1129,10 @@ void __init riscv_user_isa_enable(void)
current->thread.envcfg |= ENVCFG_CBCFE;
else if (any_cpu_has_zicbom)
pr_warn("Zicbom disabled as it is unavailable on some harts\n");
if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOP) &&
any_cpu_has_zicbop)
pr_warn("Zicbop disabled as it is unavailable on some harts\n");
}
#ifdef CONFIG_RISCV_ALTERNATIVE

View File

@ -114,6 +114,9 @@ EXPORT_SYMBOL_GPL(riscv_cbom_block_size);
unsigned int riscv_cboz_block_size;
EXPORT_SYMBOL_GPL(riscv_cboz_block_size);
unsigned int riscv_cbop_block_size;
EXPORT_SYMBOL_GPL(riscv_cbop_block_size);
static void __init cbo_get_block_size(struct device_node *node,
const char *name, u32 *block_size,
unsigned long *first_hartid)
@ -138,8 +141,8 @@ static void __init cbo_get_block_size(struct device_node *node,
void __init riscv_init_cbo_blocksizes(void)
{
unsigned long cbom_hartid, cboz_hartid;
u32 cbom_block_size = 0, cboz_block_size = 0;
unsigned long cbom_hartid, cboz_hartid, cbop_hartid;
u32 cbom_block_size = 0, cboz_block_size = 0, cbop_block_size = 0;
struct device_node *node;
struct acpi_table_header *rhct;
acpi_status status;
@ -151,13 +154,15 @@ void __init riscv_init_cbo_blocksizes(void)
&cbom_block_size, &cbom_hartid);
cbo_get_block_size(node, "riscv,cboz-block-size",
&cboz_block_size, &cboz_hartid);
cbo_get_block_size(node, "riscv,cbop-block-size",
&cbop_block_size, &cbop_hartid);
}
} else {
status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct);
if (ACPI_FAILURE(status))
return;
acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, NULL);
acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, &cbop_block_size);
acpi_put_table((struct acpi_table_header *)rhct);
}
@ -166,6 +171,9 @@ void __init riscv_init_cbo_blocksizes(void)
if (cboz_block_size)
riscv_cboz_block_size = cboz_block_size;
if (cbop_block_size)
riscv_cbop_block_size = cbop_block_size;
}
#ifdef CONFIG_SMP