riscv: add floating point insn support to misaligned access emulation

This support is partially based of openSBI misaligned emulation floating
point instruction support. It provides support for the existing
floating point instructions (both for 32/64 bits as well as compressed
ones). Since floating point registers are not part of the pt_regs
struct, we need to modify them directly using some assembly. We also
dirty the pt_regs status in case we modify them to be sure context
switch will save FP state. With this support, Linux is on par with
openSBI support.

Signed-off-by: Clément Léger <cleger@rivosinc.com>
Link: https://lore.kernel.org/r/20231004151405.521596-5-cleger@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
Clément Léger 2023-10-04 17:14:01 +02:00 committed by Palmer Dabbelt
parent 89c12fecdc
commit 7c586a555a
No known key found for this signature in database
GPG Key ID: 2E1319F35FBB1889
2 changed files with 269 additions and 4 deletions

View File

@ -104,3 +104,124 @@ ENTRY(__fstate_restore)
csrc CSR_STATUS, t1
ret
ENDPROC(__fstate_restore)
#define get_f32(which) fmv.x.s a0, which; j 2f
#define put_f32(which) fmv.s.x which, a1; j 2f
#if __riscv_xlen == 64
# define get_f64(which) fmv.x.d a0, which; j 2f
# define put_f64(which) fmv.d.x which, a1; j 2f
#else
# define get_f64(which) fsd which, 0(a1); j 2f
# define put_f64(which) fld which, 0(a1); j 2f
#endif
.macro fp_access_prologue
/*
* Compute jump offset to store the correct FP register since we don't
* have indirect FP register access
*/
sll t0, a0, 3
la t2, 1f
add t0, t0, t2
li t1, SR_FS
csrs CSR_STATUS, t1
jr t0
1:
.endm
.macro fp_access_epilogue
2:
csrc CSR_STATUS, t1
ret
.endm
#define fp_access_body(__access_func) \
__access_func(f0); \
__access_func(f1); \
__access_func(f2); \
__access_func(f3); \
__access_func(f4); \
__access_func(f5); \
__access_func(f6); \
__access_func(f7); \
__access_func(f8); \
__access_func(f9); \
__access_func(f10); \
__access_func(f11); \
__access_func(f12); \
__access_func(f13); \
__access_func(f14); \
__access_func(f15); \
__access_func(f16); \
__access_func(f17); \
__access_func(f18); \
__access_func(f19); \
__access_func(f20); \
__access_func(f21); \
__access_func(f22); \
__access_func(f23); \
__access_func(f24); \
__access_func(f25); \
__access_func(f26); \
__access_func(f27); \
__access_func(f28); \
__access_func(f29); \
__access_func(f30); \
__access_func(f31)
#ifdef CONFIG_RISCV_MISALIGNED
/*
* Disable compressed instructions set to keep a constant offset between FP
* load/store/move instructions
*/
.option norvc
/*
* put_f32_reg - Set a FP register from a register containing the value
* a0 = FP register index to be set
* a1 = value to be loaded in the FP register
*/
SYM_FUNC_START(put_f32_reg)
fp_access_prologue
fp_access_body(put_f32)
fp_access_epilogue
SYM_FUNC_END(put_f32_reg)
/*
* get_f32_reg - Get a FP register value and return it
* a0 = FP register index to be retrieved
*/
SYM_FUNC_START(get_f32_reg)
fp_access_prologue
fp_access_body(get_f32)
fp_access_epilogue
SYM_FUNC_END(get_f32_reg)
/*
* put_f64_reg - Set a 64 bits FP register from a value or a pointer.
* a0 = FP register index to be set
* a1 = value/pointer to be loaded in the FP register (when xlen == 32 bits, we
* load the value to a pointer).
*/
SYM_FUNC_START(put_f64_reg)
fp_access_prologue
fp_access_body(put_f64)
fp_access_epilogue
SYM_FUNC_END(put_f64_reg)
/*
* put_f64_reg - Get a 64 bits FP register value and returned it or store it to
* a pointer.
* a0 = FP register index to be retrieved
* a1 = If xlen == 32, pointer which should be loaded with the FP register value
* or unused if xlen == 64. In which case the FP register value is returned
* through a0
*/
SYM_FUNC_START(get_f64_reg)
fp_access_prologue
fp_access_body(get_f64)
fp_access_epilogue
SYM_FUNC_END(get_f64_reg)
#endif /* CONFIG_RISCV_MISALIGNED */

View File

@ -153,6 +153,115 @@
#define PRECISION_S 0
#define PRECISION_D 1
#ifdef CONFIG_FPU
#define FP_GET_RD(insn) (insn >> 7 & 0x1F)
extern void put_f32_reg(unsigned long fp_reg, unsigned long value);
static int set_f32_rd(unsigned long insn, struct pt_regs *regs,
unsigned long val)
{
unsigned long fp_reg = FP_GET_RD(insn);
put_f32_reg(fp_reg, val);
regs->status |= SR_FS_DIRTY;
return 0;
}
extern void put_f64_reg(unsigned long fp_reg, unsigned long value);
static int set_f64_rd(unsigned long insn, struct pt_regs *regs, u64 val)
{
unsigned long fp_reg = FP_GET_RD(insn);
unsigned long value;
#if __riscv_xlen == 32
value = (unsigned long) &val;
#else
value = val;
#endif
put_f64_reg(fp_reg, value);
regs->status |= SR_FS_DIRTY;
return 0;
}
#if __riscv_xlen == 32
extern void get_f64_reg(unsigned long fp_reg, u64 *value);
static u64 get_f64_rs(unsigned long insn, u8 fp_reg_offset,
struct pt_regs *regs)
{
unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F;
u64 val;
get_f64_reg(fp_reg, &val);
regs->status |= SR_FS_DIRTY;
return val;
}
#else
extern unsigned long get_f64_reg(unsigned long fp_reg);
static unsigned long get_f64_rs(unsigned long insn, u8 fp_reg_offset,
struct pt_regs *regs)
{
unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F;
unsigned long val;
val = get_f64_reg(fp_reg);
regs->status |= SR_FS_DIRTY;
return val;
}
#endif
extern unsigned long get_f32_reg(unsigned long fp_reg);
static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset,
struct pt_regs *regs)
{
unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F;
unsigned long val;
val = get_f32_reg(fp_reg);
regs->status |= SR_FS_DIRTY;
return val;
}
#else /* CONFIG_FPU */
static void set_f32_rd(unsigned long insn, struct pt_regs *regs,
unsigned long val) {}
static void set_f64_rd(unsigned long insn, struct pt_regs *regs, u64 val) {}
static unsigned long get_f64_rs(unsigned long insn, u8 fp_reg_offset,
struct pt_regs *regs)
{
return 0;
}
static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset,
struct pt_regs *regs)
{
return 0;
}
#endif
#define GET_F64_RS2(insn, regs) (get_f64_rs(insn, 20, regs))
#define GET_F64_RS2C(insn, regs) (get_f64_rs(insn, 2, regs))
#define GET_F64_RS2S(insn, regs) (get_f64_rs(RVC_RS2S(insn), 0, regs))
#define GET_F32_RS2(insn, regs) (get_f32_rs(insn, 20, regs))
#define GET_F32_RS2C(insn, regs) (get_f32_rs(insn, 2, regs))
#define GET_F32_RS2S(insn, regs) (get_f32_rs(RVC_RS2S(insn), 0, regs))
#ifdef CONFIG_RISCV_M_MODE
static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val)
{
@ -362,15 +471,21 @@ int handle_misaligned_load(struct pt_regs *regs)
return -1;
}
if (!IS_ENABLED(CONFIG_FPU) && fp)
return -EOPNOTSUPP;
val.data_u64 = 0;
for (i = 0; i < len; i++) {
if (load_u8(regs, (void *)(addr + i), &val.data_bytes[i]))
return -1;
}
if (fp)
return -1;
SET_RD(insn, regs, val.data_ulong << shift >> shift);
if (!fp)
SET_RD(insn, regs, val.data_ulong << shift >> shift);
else if (len == 8)
set_f64_rd(insn, regs, val.data_u64);
else
set_f32_rd(insn, regs, val.data_ulong);
regs->epc = epc + INSN_LEN(insn);
@ -383,7 +498,7 @@ int handle_misaligned_store(struct pt_regs *regs)
unsigned long epc = regs->epc;
unsigned long insn;
unsigned long addr = regs->badaddr;
int i, len = 0;
int i, len = 0, fp = 0;
perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
@ -400,6 +515,14 @@ int handle_misaligned_store(struct pt_regs *regs)
} else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) {
len = 8;
#endif
} else if ((insn & INSN_MASK_FSD) == INSN_MATCH_FSD) {
fp = 1;
len = 8;
val.data_u64 = GET_F64_RS2(insn, regs);
} else if ((insn & INSN_MASK_FSW) == INSN_MATCH_FSW) {
fp = 1;
len = 4;
val.data_ulong = GET_F32_RS2(insn, regs);
} else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) {
len = 2;
#if defined(CONFIG_64BIT)
@ -418,11 +541,32 @@ int handle_misaligned_store(struct pt_regs *regs)
((insn >> SH_RD) & 0x1f)) {
len = 4;
val.data_ulong = GET_RS2C(insn, regs);
} else if ((insn & INSN_MASK_C_FSD) == INSN_MATCH_C_FSD) {
fp = 1;
len = 8;
val.data_u64 = GET_F64_RS2S(insn, regs);
} else if ((insn & INSN_MASK_C_FSDSP) == INSN_MATCH_C_FSDSP) {
fp = 1;
len = 8;
val.data_u64 = GET_F64_RS2C(insn, regs);
#if !defined(CONFIG_64BIT)
} else if ((insn & INSN_MASK_C_FSW) == INSN_MATCH_C_FSW) {
fp = 1;
len = 4;
val.data_ulong = GET_F32_RS2S(insn, regs);
} else if ((insn & INSN_MASK_C_FSWSP) == INSN_MATCH_C_FSWSP) {
fp = 1;
len = 4;
val.data_ulong = GET_F32_RS2C(insn, regs);
#endif
} else {
regs->epc = epc;
return -1;
}
if (!IS_ENABLED(CONFIG_FPU) && fp)
return -EOPNOTSUPP;
for (i = 0; i < len; i++) {
if (store_u8(regs, (void *)(addr + i), val.data_bytes[i]))
return -1;