linux/arch/riscv/kernel/sys_riscv.c
Deepak Gupta 6c7559f22b riscv/mm: ensure PROT_WRITE leads to VM_READ | VM_WRITE
'arch_calc_vm_prot_bits' is implemented on risc-v to return VM_READ |
VM_WRITE if PROT_WRITE is specified. Similarly 'riscv_sys_mmap' is
updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ).
This is to make sure that any existing apps using PROT_WRITE still work.

Earlier 'protection_map[VM_WRITE]' used to pick read-write PTE encodings.
Now 'protection_map[VM_WRITE]' will always pick PAGE_SHADOWSTACK PTE
encodings for shadow stack. The above changes ensure that existing apps
continue to work because underneath, the kernel will be picking
'protection_map[VM_WRITE|VM_READ]' PTE encodings.

Reviewed-by: Zong Li <zong.li@sifive.com>
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Signed-off-by: Deepak Gupta <debug@rivosinc.com>
Tested-by: Andreas Korb <andreas.korb@aisec.fraunhofer.de> # QEMU, custom CVA6
Tested-by: Valentin Haudiquet <valentin.haudiquet@canonical.com>
Link: https://patch.msgid.link/20251112-v5_user_cfi_series-v23-6-b55691eacf4f@rivosinc.com
Signed-off-by: Paul Walmsley <pjw@kernel.org>
2026-01-25 21:09:53 -07:00

86 lines
2.8 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2012 Regents of the University of California
* Copyright (C) 2014 Darius Rad <darius@bluespec.com>
* Copyright (C) 2017 SiFive
*/
#include <linux/syscalls.h>
#include <asm/cacheflush.h>
#include <asm-generic/mman-common.h>
static long riscv_sys_mmap(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long offset,
unsigned long page_shift_offset)
{
if (unlikely(offset & (~PAGE_MASK >> page_shift_offset)))
return -EINVAL;
/*
* If PROT_WRITE is specified then extend that to PROT_READ
* protection_map[VM_WRITE] is now going to select shadow stack encodings.
* So specifying PROT_WRITE actually should select protection_map [VM_WRITE | VM_READ]
* If user wants to create shadow stack then they should use `map_shadow_stack` syscall.
*/
if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ)))
prot |= PROT_READ;
return ksys_mmap_pgoff(addr, len, prot, flags, fd,
offset >> (PAGE_SHIFT - page_shift_offset));
}
#ifdef CONFIG_64BIT
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
unsigned long, fd, unsigned long, offset)
{
return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 0);
}
#endif
#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
unsigned long, fd, unsigned long, offset)
{
/*
* Note that the shift for mmap2 is constant (12),
* regardless of PAGE_SIZE
*/
return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 12);
}
#endif
/*
* Allows the instruction cache to be flushed from userspace. Despite RISC-V
* having a direct 'fence.i' instruction available to userspace (which we
* can't trap!), that's not actually viable when running on Linux because the
* kernel might schedule a process on another hart. There is no way for
* userspace to handle this without invoking the kernel (as it doesn't know the
* thread->hart mappings), so we've defined a RISC-V specific system call to
* flush the instruction cache.
*
* sys_riscv_flush_icache() is defined to flush the instruction cache over an
* address range, with the flush applying to either all threads or just the
* caller. We don't currently do anything with the address range, that's just
* in there for forwards compatibility.
*/
SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
uintptr_t, flags)
{
/* Check the reserved flags. */
if (unlikely(flags & ~SYS_RISCV_FLUSH_ICACHE_ALL))
return -EINVAL;
flush_icache_mm(current->mm, flags & SYS_RISCV_FLUSH_ICACHE_LOCAL);
return 0;
}
/* Not defined using SYSCALL_DEFINE0 to avoid error injection */
asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *__unused)
{
return -ENOSYS;
}