mirror of
https://github.com/torvalds/linux.git
synced 2026-06-10 07:32:29 +02:00
Merge branch 'upstream/android-3.10' into 'linaro-fixes/android-3.10'
This commit is contained in:
commit
51b6770b8d
|
|
@ -331,6 +331,7 @@ config HAVE_ARCH_SECCOMP_FILTER
|
|||
- secure_computing is called from a ptrace_event()-safe context
|
||||
- secure_computing return value is checked and a return value of -1
|
||||
results in the system call being skipped immediately.
|
||||
- seccomp syscall wired up
|
||||
|
||||
config SECCOMP_FILTER
|
||||
def_bool y
|
||||
|
|
|
|||
|
|
@ -59,6 +59,21 @@
|
|||
#define smp_wmb() dmb()
|
||||
#endif
|
||||
|
||||
#define smp_store_release(p, v) \
|
||||
do { \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
smp_mb(); \
|
||||
ACCESS_ONCE(*p) = (v); \
|
||||
} while (0)
|
||||
|
||||
#define smp_load_acquire(p) \
|
||||
({ \
|
||||
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
smp_mb(); \
|
||||
___p1; \
|
||||
})
|
||||
|
||||
#define read_barrier_depends() do { } while(0)
|
||||
#define smp_read_barrier_depends() do { } while(0)
|
||||
|
||||
|
|
|
|||
|
|
@ -97,8 +97,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
|
|||
memcpy(®s->ARM_r0 + i, args, n * sizeof(args[0]));
|
||||
}
|
||||
|
||||
static inline int syscall_get_arch(struct task_struct *task,
|
||||
struct pt_regs *regs)
|
||||
static inline int syscall_get_arch(void)
|
||||
{
|
||||
/* ARM tasks don't change audit architectures on the fly. */
|
||||
return AUDIT_ARCH_ARM;
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ config ARM64
|
|||
select GENERIC_TIME_VSYSCALL
|
||||
select HARDIRQS_SW_RESEND
|
||||
select HAVE_ARCH_AUDITSYSCALL
|
||||
select HAVE_ARCH_SECCOMP_FILTER
|
||||
select HAVE_ARCH_TRACEHOOK
|
||||
select HAVE_DEBUG_BUGVERBOSE
|
||||
select HAVE_DEBUG_KMEMLEAK
|
||||
|
|
@ -232,6 +233,20 @@ config ARMV7_COMPAT_CPUINFO
|
|||
|
||||
source "mm/Kconfig"
|
||||
|
||||
config SECCOMP
|
||||
bool "Enable seccomp to safely compute untrusted bytecode"
|
||||
---help---
|
||||
This kernel feature is useful for number crunching applications
|
||||
that may need to compute untrusted bytecode during their
|
||||
execution. By using pipes or other transports made available to
|
||||
the process as file descriptors supporting the read/write
|
||||
syscalls, it's possible to isolate those applications in
|
||||
their own address space using seccomp. Once seccomp is
|
||||
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
|
||||
and the task is only allowed to execute a few safe syscalls
|
||||
defined by each seccomp mode.
|
||||
|
||||
|
||||
endmenu
|
||||
|
||||
menu "Boot options"
|
||||
|
|
|
|||
|
|
@ -35,10 +35,60 @@
|
|||
#define smp_mb() barrier()
|
||||
#define smp_rmb() barrier()
|
||||
#define smp_wmb() barrier()
|
||||
|
||||
#define smp_store_release(p, v) \
|
||||
do { \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
smp_mb(); \
|
||||
ACCESS_ONCE(*p) = (v); \
|
||||
} while (0)
|
||||
|
||||
#define smp_load_acquire(p) \
|
||||
({ \
|
||||
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
smp_mb(); \
|
||||
___p1; \
|
||||
})
|
||||
|
||||
#else
|
||||
|
||||
#define smp_mb() asm volatile("dmb ish" : : : "memory")
|
||||
#define smp_rmb() asm volatile("dmb ishld" : : : "memory")
|
||||
#define smp_wmb() asm volatile("dmb ishst" : : : "memory")
|
||||
|
||||
#define smp_store_release(p, v) \
|
||||
do { \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
switch (sizeof(*p)) { \
|
||||
case 4: \
|
||||
asm volatile ("stlr %w1, %0" \
|
||||
: "=Q" (*p) : "r" (v) : "memory"); \
|
||||
break; \
|
||||
case 8: \
|
||||
asm volatile ("stlr %1, %0" \
|
||||
: "=Q" (*p) : "r" (v) : "memory"); \
|
||||
break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define smp_load_acquire(p) \
|
||||
({ \
|
||||
typeof(*p) ___p1; \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
switch (sizeof(*p)) { \
|
||||
case 4: \
|
||||
asm volatile ("ldar %w0, %1" \
|
||||
: "=r" (___p1) : "Q" (*p) : "memory"); \
|
||||
break; \
|
||||
case 8: \
|
||||
asm volatile ("ldar %0, %1" \
|
||||
: "=r" (___p1) : "Q" (*p) : "memory"); \
|
||||
break; \
|
||||
} \
|
||||
___p1; \
|
||||
})
|
||||
|
||||
#endif
|
||||
|
||||
#define read_barrier_depends() do { } while(0)
|
||||
|
|
|
|||
|
|
@ -191,6 +191,13 @@ typedef struct compat_siginfo {
|
|||
compat_long_t _band; /* POLL_IN, POLL_OUT, POLL_MSG */
|
||||
int _fd;
|
||||
} _sigpoll;
|
||||
|
||||
/* SIGSYS */
|
||||
struct {
|
||||
compat_uptr_t _call_addr; /* calling user insn */
|
||||
int _syscall; /* triggering system call number */
|
||||
unsigned int _arch; /* AUDIT_ARCH_* of syscall */
|
||||
} _sigsys;
|
||||
} _sifields;
|
||||
} compat_siginfo_t;
|
||||
|
||||
|
|
|
|||
|
|
@ -60,6 +60,15 @@
|
|||
#define COMPAT_PT_TEXT_ADDR 0x10000
|
||||
#define COMPAT_PT_DATA_ADDR 0x10004
|
||||
#define COMPAT_PT_TEXT_END_ADDR 0x10008
|
||||
|
||||
/*
|
||||
* used to skip a system call when tracer changes its number to -1
|
||||
* with ptrace(PTRACE_SET_SYSCALL)
|
||||
*/
|
||||
#define RET_SKIP_SYSCALL -1
|
||||
#define RET_SKIP_SYSCALL_TRACE -2
|
||||
#define IS_SKIP_SYSCALL(no) ((int)(no & 0xffffffff) == -1)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
/* sizeof(struct user) for AArch32 */
|
||||
|
|
|
|||
25
arch/arm64/include/asm/seccomp.h
Normal file
25
arch/arm64/include/asm/seccomp.h
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* arch/arm64/include/asm/seccomp.h
|
||||
*
|
||||
* Copyright (C) 2014 Linaro Limited
|
||||
* Author: AKASHI Takahiro <takahiro.akashi <at> linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
#ifndef _ASM_SECCOMP_H
|
||||
#define _ASM_SECCOMP_H
|
||||
|
||||
#include <asm/unistd.h>
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
#define __NR_seccomp_read_32 __NR_compat_read
|
||||
#define __NR_seccomp_write_32 __NR_compat_write
|
||||
#define __NR_seccomp_exit_32 __NR_compat_exit
|
||||
#define __NR_seccomp_sigreturn_32 __NR_compat_rt_sigreturn
|
||||
#endif /* CONFIG_COMPAT */
|
||||
|
||||
#include <asm-generic/seccomp.h>
|
||||
|
||||
#endif /* _ASM_SECCOMP_H */
|
||||
|
|
@ -30,6 +30,9 @@
|
|||
* Compat syscall numbers used by the AArch64 kernel.
|
||||
*/
|
||||
#define __NR_compat_restart_syscall 0
|
||||
#define __NR_compat_exit 1
|
||||
#define __NR_compat_read 3
|
||||
#define __NR_compat_write 4
|
||||
#define __NR_compat_sigreturn 119
|
||||
#define __NR_compat_rt_sigreturn 173
|
||||
|
||||
|
|
@ -40,7 +43,7 @@
|
|||
#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2)
|
||||
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5)
|
||||
|
||||
#define __NR_compat_syscalls 378
|
||||
#define __NR_compat_syscalls 384
|
||||
#endif
|
||||
|
||||
#define __ARCH_WANT_SYS_CLONE
|
||||
|
|
|
|||
|
|
@ -781,3 +781,11 @@ __SYSCALL(__NR_process_vm_writev, compat_sys_process_vm_writev)
|
|||
__SYSCALL(__NR_kcmp, sys_kcmp)
|
||||
#define __NR_finit_module 379
|
||||
__SYSCALL(__NR_finit_module, sys_finit_module)
|
||||
/* #define __NR_sched_setattr 380 */
|
||||
__SYSCALL(380, sys_ni_syscall)
|
||||
/* #define __NR_sched_getattr 381 */
|
||||
__SYSCALL(381, sys_ni_syscall)
|
||||
/* #define __NR_renameat2 382 */
|
||||
__SYSCALL(382, sys_ni_syscall)
|
||||
#define __NR_seccomp 383
|
||||
__SYSCALL(__NR_seccomp, sys_seccomp)
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
#include <asm/hwcap.h>
|
||||
|
||||
#define PTRACE_SET_SYSCALL 23
|
||||
|
||||
/*
|
||||
* PSR bits
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
#include <asm/asm-offsets.h>
|
||||
#include <asm/errno.h>
|
||||
#include <asm/esr.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/unistd.h>
|
||||
|
||||
|
|
@ -662,6 +663,10 @@ __sys_trace:
|
|||
mov x0, sp
|
||||
bl syscall_trace_enter
|
||||
adr lr, __sys_trace_return // return address
|
||||
cmp w0, #RET_SKIP_SYSCALL_TRACE // skip syscall and tracing?
|
||||
b.eq ret_to_user
|
||||
cmp w0, #RET_SKIP_SYSCALL // skip syscall?
|
||||
b.eq __sys_trace_return_skipped
|
||||
uxtw scno, w0 // syscall number (possibly new)
|
||||
mov x1, sp // pointer to regs
|
||||
cmp scno, sc_nr // check upper syscall limit
|
||||
|
|
@ -675,6 +680,7 @@ __sys_trace:
|
|||
|
||||
__sys_trace_return:
|
||||
str x0, [sp] // save returned x0
|
||||
__sys_trace_return_skipped: // x0 already in regs[0]
|
||||
mov x0, sp
|
||||
bl syscall_trace_exit
|
||||
b ret_to_user
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@
|
|||
#include <linux/smp.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/user.h>
|
||||
#include <linux/seccomp.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/signal.h>
|
||||
|
|
@ -1064,7 +1065,19 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
|
|||
long arch_ptrace(struct task_struct *child, long request,
|
||||
unsigned long addr, unsigned long data)
|
||||
{
|
||||
return ptrace_request(child, request, addr, data);
|
||||
int ret;
|
||||
|
||||
switch (request) {
|
||||
case PTRACE_SET_SYSCALL:
|
||||
task_pt_regs(child)->syscallno = data;
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
ret = ptrace_request(child, request, addr, data);
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
enum ptrace_syscall_dir {
|
||||
|
|
@ -1096,9 +1109,33 @@ static void tracehook_report_syscall(struct pt_regs *regs,
|
|||
|
||||
asmlinkage int syscall_trace_enter(struct pt_regs *regs)
|
||||
{
|
||||
unsigned int saved_syscallno = regs->syscallno;
|
||||
|
||||
/* Do the secure computing check first; failures should be fast. */
|
||||
if (secure_computing(regs->syscallno) == -1)
|
||||
return RET_SKIP_SYSCALL_TRACE;
|
||||
|
||||
if (test_thread_flag(TIF_SYSCALL_TRACE))
|
||||
tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
|
||||
|
||||
if (IS_SKIP_SYSCALL(regs->syscallno)) {
|
||||
/*
|
||||
* RESTRICTION: we can't modify a return value of user
|
||||
* issued syscall(-1) here. In order to ease this flavor,
|
||||
* we need to treat whatever value in x0 as a return value,
|
||||
* but this might result in a bogus value being returned.
|
||||
*/
|
||||
/*
|
||||
* NOTE: syscallno may also be set to -1 if fatal signal is
|
||||
* detected in tracehook_report_syscall_entry(), but since
|
||||
* a value set to x0 here is not used in this case, we may
|
||||
* neglect the case.
|
||||
*/
|
||||
if (!test_thread_flag(TIF_SYSCALL_TRACE) ||
|
||||
(IS_SKIP_SYSCALL(saved_syscallno)))
|
||||
regs->regs[0] = -ENOSYS;
|
||||
}
|
||||
|
||||
audit_syscall_entry(syscall_get_arch(), regs->syscallno,
|
||||
regs->orig_x0, regs->regs[1], regs->regs[2], regs->regs[3]);
|
||||
|
||||
|
|
|
|||
|
|
@ -211,6 +211,14 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
|
|||
err |= __put_user(from->si_uid, &to->si_uid);
|
||||
err |= __put_user((compat_uptr_t)(unsigned long)from->si_ptr, &to->si_ptr);
|
||||
break;
|
||||
#ifdef __ARCH_SIGSYS
|
||||
case __SI_SYS:
|
||||
err |= __put_user((compat_uptr_t)(unsigned long)
|
||||
from->si_call_addr, &to->si_call_addr);
|
||||
err |= __put_user(from->si_syscall, &to->si_syscall);
|
||||
err |= __put_user(from->si_arch, &to->si_arch);
|
||||
break;
|
||||
#endif
|
||||
default: /* this is just in case for now ... */
|
||||
err |= __put_user(from->si_pid, &to->si_pid);
|
||||
err |= __put_user(from->si_uid, &to->si_uid);
|
||||
|
|
|
|||
|
|
@ -45,13 +45,36 @@
|
|||
# define smp_rmb() rmb()
|
||||
# define smp_wmb() wmb()
|
||||
# define smp_read_barrier_depends() read_barrier_depends()
|
||||
|
||||
#else
|
||||
|
||||
# define smp_mb() barrier()
|
||||
# define smp_rmb() barrier()
|
||||
# define smp_wmb() barrier()
|
||||
# define smp_read_barrier_depends() do { } while(0)
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* IA64 GCC turns volatile stores into st.rel and volatile loads into ld.acq no
|
||||
* need for asm trickery!
|
||||
*/
|
||||
|
||||
#define smp_store_release(p, v) \
|
||||
do { \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
barrier(); \
|
||||
ACCESS_ONCE(*p) = (v); \
|
||||
} while (0)
|
||||
|
||||
#define smp_load_acquire(p) \
|
||||
({ \
|
||||
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
barrier(); \
|
||||
___p1; \
|
||||
})
|
||||
|
||||
/*
|
||||
* XXX check on this ---I suspect what Linus really wants here is
|
||||
* acquire vs release semantics but we can't discuss this stuff with
|
||||
|
|
|
|||
|
|
@ -82,4 +82,19 @@ static inline void fence(void)
|
|||
#define smp_read_barrier_depends() do { } while (0)
|
||||
#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
|
||||
|
||||
#define smp_store_release(p, v) \
|
||||
do { \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
smp_mb(); \
|
||||
ACCESS_ONCE(*p) = (v); \
|
||||
} while (0)
|
||||
|
||||
#define smp_load_acquire(p) \
|
||||
({ \
|
||||
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
smp_mb(); \
|
||||
___p1; \
|
||||
})
|
||||
|
||||
#endif /* _ASM_METAG_BARRIER_H */
|
||||
|
|
|
|||
|
|
@ -180,4 +180,19 @@
|
|||
#define nudge_writes() mb()
|
||||
#endif
|
||||
|
||||
#define smp_store_release(p, v) \
|
||||
do { \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
smp_mb(); \
|
||||
ACCESS_ONCE(*p) = (v); \
|
||||
} while (0)
|
||||
|
||||
#define smp_load_acquire(p) \
|
||||
({ \
|
||||
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
smp_mb(); \
|
||||
___p1; \
|
||||
})
|
||||
|
||||
#endif /* __ASM_BARRIER_H */
|
||||
|
|
|
|||
|
|
@ -45,11 +45,15 @@
|
|||
# define SMPWMB eieio
|
||||
#endif
|
||||
|
||||
#define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
|
||||
|
||||
#define smp_mb() mb()
|
||||
#define smp_rmb() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
|
||||
#define smp_rmb() __lwsync()
|
||||
#define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
|
||||
#define smp_read_barrier_depends() read_barrier_depends()
|
||||
#else
|
||||
#define __lwsync() barrier()
|
||||
|
||||
#define smp_mb() barrier()
|
||||
#define smp_rmb() barrier()
|
||||
#define smp_wmb() barrier()
|
||||
|
|
@ -65,4 +69,19 @@
|
|||
#define data_barrier(x) \
|
||||
asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory");
|
||||
|
||||
#define smp_store_release(p, v) \
|
||||
do { \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
__lwsync(); \
|
||||
ACCESS_ONCE(*p) = (v); \
|
||||
} while (0)
|
||||
|
||||
#define smp_load_acquire(p) \
|
||||
({ \
|
||||
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
__lwsync(); \
|
||||
___p1; \
|
||||
})
|
||||
|
||||
#endif /* _ASM_POWERPC_BARRIER_H */
|
||||
|
|
|
|||
|
|
@ -32,4 +32,19 @@
|
|||
|
||||
#define set_mb(var, value) do { var = value; mb(); } while (0)
|
||||
|
||||
#define smp_store_release(p, v) \
|
||||
do { \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
barrier(); \
|
||||
ACCESS_ONCE(*p) = (v); \
|
||||
} while (0)
|
||||
|
||||
#define smp_load_acquire(p) \
|
||||
({ \
|
||||
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
barrier(); \
|
||||
___p1; \
|
||||
})
|
||||
|
||||
#endif /* __ASM_BARRIER_H */
|
||||
|
|
|
|||
|
|
@ -89,11 +89,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
|
|||
regs->orig_gpr2 = args[0];
|
||||
}
|
||||
|
||||
static inline int syscall_get_arch(struct task_struct *task,
|
||||
struct pt_regs *regs)
|
||||
static inline int syscall_get_arch(void)
|
||||
{
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (test_tsk_thread_flag(task, TIF_31BIT))
|
||||
if (test_tsk_thread_flag(current, TIF_31BIT))
|
||||
return AUDIT_ARCH_S390;
|
||||
#endif
|
||||
return sizeof(long) == 8 ? AUDIT_ARCH_S390X : AUDIT_ARCH_S390;
|
||||
|
|
|
|||
|
|
@ -53,4 +53,19 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
|
|||
|
||||
#define smp_read_barrier_depends() do { } while(0)
|
||||
|
||||
#define smp_store_release(p, v) \
|
||||
do { \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
barrier(); \
|
||||
ACCESS_ONCE(*p) = (v); \
|
||||
} while (0)
|
||||
|
||||
#define smp_load_acquire(p) \
|
||||
({ \
|
||||
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
barrier(); \
|
||||
___p1; \
|
||||
})
|
||||
|
||||
#endif /* !(__SPARC64_BARRIER_H) */
|
||||
|
|
|
|||
|
|
@ -92,12 +92,53 @@
|
|||
#endif
|
||||
#define smp_read_barrier_depends() read_barrier_depends()
|
||||
#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
|
||||
#else
|
||||
#else /* !SMP */
|
||||
#define smp_mb() barrier()
|
||||
#define smp_rmb() barrier()
|
||||
#define smp_wmb() barrier()
|
||||
#define smp_read_barrier_depends() do { } while (0)
|
||||
#define set_mb(var, value) do { var = value; barrier(); } while (0)
|
||||
#endif /* SMP */
|
||||
|
||||
#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
|
||||
|
||||
/*
|
||||
* For either of these options x86 doesn't have a strong TSO memory
|
||||
* model and we should fall back to full barriers.
|
||||
*/
|
||||
|
||||
#define smp_store_release(p, v) \
|
||||
do { \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
smp_mb(); \
|
||||
ACCESS_ONCE(*p) = (v); \
|
||||
} while (0)
|
||||
|
||||
#define smp_load_acquire(p) \
|
||||
({ \
|
||||
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
smp_mb(); \
|
||||
___p1; \
|
||||
})
|
||||
|
||||
#else /* regular x86 TSO memory ordering */
|
||||
|
||||
#define smp_store_release(p, v) \
|
||||
do { \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
barrier(); \
|
||||
ACCESS_ONCE(*p) = (v); \
|
||||
} while (0)
|
||||
|
||||
#define smp_load_acquire(p) \
|
||||
({ \
|
||||
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
barrier(); \
|
||||
___p1; \
|
||||
})
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -90,8 +90,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
|
|||
memcpy(®s->bx + i, args, n * sizeof(args[0]));
|
||||
}
|
||||
|
||||
static inline int syscall_get_arch(struct task_struct *task,
|
||||
struct pt_regs *regs)
|
||||
static inline int syscall_get_arch(void)
|
||||
{
|
||||
return AUDIT_ARCH_I386;
|
||||
}
|
||||
|
|
@ -220,8 +219,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
|
|||
}
|
||||
}
|
||||
|
||||
static inline int syscall_get_arch(struct task_struct *task,
|
||||
struct pt_regs *regs)
|
||||
static inline int syscall_get_arch(void)
|
||||
{
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
/*
|
||||
|
|
@ -233,7 +231,7 @@ static inline int syscall_get_arch(struct task_struct *task,
|
|||
*
|
||||
* x32 tasks should be considered AUDIT_ARCH_X86_64.
|
||||
*/
|
||||
if (task_thread_info(task)->status & TS_COMPAT)
|
||||
if (task_thread_info(current)->status & TS_COMPAT)
|
||||
return AUDIT_ARCH_I386;
|
||||
#endif
|
||||
/* Both x32 and x86_64 are considered "64-bit". */
|
||||
|
|
|
|||
|
|
@ -357,3 +357,7 @@
|
|||
348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
|
||||
349 i386 kcmp sys_kcmp
|
||||
350 i386 finit_module sys_finit_module
|
||||
# 351 i386 sched_setattr sys_sched_setattr
|
||||
# 352 i386 sched_getattr sys_sched_getattr
|
||||
# 353 i386 renameat2 sys_renameat2
|
||||
354 i386 seccomp sys_seccomp
|
||||
|
|
|
|||
|
|
@ -320,6 +320,10 @@
|
|||
311 64 process_vm_writev sys_process_vm_writev
|
||||
312 common kcmp sys_kcmp
|
||||
313 common finit_module sys_finit_module
|
||||
# 314 common sched_setattr sys_sched_setattr
|
||||
# 315 common sched_getattr sys_sched_getattr
|
||||
# 316 common renameat2 sys_renameat2
|
||||
317 common seccomp sys_seccomp
|
||||
|
||||
#
|
||||
# x32-specific system call numbers start at 512 to avoid cache impact
|
||||
|
|
|
|||
|
|
@ -1220,7 +1220,7 @@ EXPORT_SYMBOL(install_exec_creds);
|
|||
/*
|
||||
* determine how safe it is to execute the proposed program
|
||||
* - the caller must hold ->cred_guard_mutex to protect against
|
||||
* PTRACE_ATTACH
|
||||
* PTRACE_ATTACH or seccomp thread-sync
|
||||
*/
|
||||
static int check_unsafe_exec(struct linux_binprm *bprm)
|
||||
{
|
||||
|
|
@ -1239,7 +1239,7 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
|
|||
* This isn't strictly necessary, but it makes it harder for LSMs to
|
||||
* mess up.
|
||||
*/
|
||||
if (current->no_new_privs)
|
||||
if (task_no_new_privs(current))
|
||||
bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
|
||||
|
||||
n_fs = 1;
|
||||
|
|
@ -1286,7 +1286,7 @@ int prepare_binprm(struct linux_binprm *bprm)
|
|||
bprm->cred->egid = current_egid();
|
||||
|
||||
if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
|
||||
!current->no_new_privs &&
|
||||
!task_no_new_privs(current) &&
|
||||
kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
|
||||
kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
|
||||
/* Set-uid? */
|
||||
|
|
|
|||
|
|
@ -46,5 +46,20 @@
|
|||
#define read_barrier_depends() do {} while (0)
|
||||
#define smp_read_barrier_depends() do {} while (0)
|
||||
|
||||
#define smp_store_release(p, v) \
|
||||
do { \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
smp_mb(); \
|
||||
ACCESS_ONCE(*p) = (v); \
|
||||
} while (0)
|
||||
|
||||
#define smp_load_acquire(p) \
|
||||
({ \
|
||||
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
smp_mb(); \
|
||||
___p1; \
|
||||
})
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
#endif /* __ASM_GENERIC_BARRIER_H */
|
||||
|
|
|
|||
29
include/asm-generic/seccomp.h
Normal file
29
include/asm-generic/seccomp.h
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
* include/asm-generic/seccomp.h
|
||||
*
|
||||
* Copyright (C) 2014 Linaro Limited
|
||||
* Author: AKASHI Takahiro <takahiro.akashi <at> linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
#ifndef _ASM_GENERIC_SECCOMP_H
|
||||
#define _ASM_GENERIC_SECCOMP_H
|
||||
|
||||
#include <asm-generic/unistd.h>
|
||||
|
||||
#if defined(CONFIG_COMPAT) && !defined(__NR_seccomp_read_32)
|
||||
#define __NR_seccomp_read_32 __NR_read
|
||||
#define __NR_seccomp_write_32 __NR_write
|
||||
#define __NR_seccomp_exit_32 __NR_exit
|
||||
#define __NR_seccomp_sigreturn_32 __NR_rt_sigreturn
|
||||
#endif /* CONFIG_COMPAT && ! already defined */
|
||||
|
||||
#define __NR_seccomp_read __NR_read
|
||||
#define __NR_seccomp_write __NR_write
|
||||
#define __NR_seccomp_exit __NR_exit
|
||||
#define __NR_seccomp_sigreturn __NR_rt_sigreturn
|
||||
|
||||
#endif /* _ASM_GENERIC_SECCOMP_H */
|
||||
|
||||
|
|
@ -144,8 +144,6 @@ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
|
|||
|
||||
/**
|
||||
* syscall_get_arch - return the AUDIT_ARCH for the current system call
|
||||
* @task: task of interest, must be in system call entry tracing
|
||||
* @regs: task_pt_regs() of @task
|
||||
*
|
||||
* Returns the AUDIT_ARCH_* based on the system call convention in use.
|
||||
*
|
||||
|
|
@ -155,5 +153,5 @@ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
|
|||
* Architectures which permit CONFIG_HAVE_ARCH_SECCOMP_FILTER must
|
||||
* provide an implementation of this.
|
||||
*/
|
||||
int syscall_get_arch(struct task_struct *task, struct pt_regs *regs);
|
||||
int syscall_get_arch(void);
|
||||
#endif /* _ASM_SYSCALL_H */
|
||||
|
|
|
|||
|
|
@ -298,6 +298,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
|
|||
# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
|
||||
#endif
|
||||
|
||||
/* Is this type a native word size -- useful for atomic operations */
|
||||
#ifndef __native_word
|
||||
# define __native_word(t) (sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
|
||||
#endif
|
||||
|
||||
/* Compile time object size, -1 for unknown */
|
||||
#ifndef __compiletime_object_size
|
||||
# define __compiletime_object_size(obj) -1
|
||||
|
|
@ -337,6 +342,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
|
|||
#define compiletime_assert(condition, msg) \
|
||||
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
|
||||
|
||||
#define compiletime_assert_atomic_type(t) \
|
||||
compiletime_assert(__native_word(t), \
|
||||
"Need native word sized stores/loads for atomicity.")
|
||||
|
||||
/*
|
||||
* Prevent the compiler from merging or refetching accesses. The compiler
|
||||
* is also forbidden from reordering successive instances of ACCESS_ONCE(),
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ extern struct fs_struct init_fs;
|
|||
|
||||
#define INIT_SIGNALS(sig) { \
|
||||
.nr_threads = 1, \
|
||||
.thread_head = LIST_HEAD_INIT(init_task.thread_node), \
|
||||
.wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
|
||||
.shared_pending = { \
|
||||
.list = LIST_HEAD_INIT(sig.shared_pending.list), \
|
||||
|
|
@ -213,6 +214,7 @@ extern struct task_group root_task_group;
|
|||
[PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \
|
||||
}, \
|
||||
.thread_group = LIST_HEAD_INIT(tsk.thread_group), \
|
||||
.thread_node = LIST_HEAD_INIT(init_signals.thread_head), \
|
||||
INIT_IDS \
|
||||
INIT_PERF_EVENTS(tsk) \
|
||||
INIT_TRACE_IRQFLAGS \
|
||||
|
|
|
|||
|
|
@ -476,6 +476,7 @@ struct signal_struct {
|
|||
atomic_t sigcnt;
|
||||
atomic_t live;
|
||||
int nr_threads;
|
||||
struct list_head thread_head;
|
||||
|
||||
wait_queue_head_t wait_chldexit; /* for wait4() */
|
||||
|
||||
|
|
@ -1117,13 +1118,12 @@ struct task_struct {
|
|||
* execve */
|
||||
unsigned in_iowait:1;
|
||||
|
||||
/* task may not gain privileges */
|
||||
unsigned no_new_privs:1;
|
||||
|
||||
/* Revert to default priority/policy when forking */
|
||||
unsigned sched_reset_on_fork:1;
|
||||
unsigned sched_contributes_to_load:1;
|
||||
|
||||
unsigned long atomic_flags; /* Flags needing atomic access. */
|
||||
|
||||
pid_t pid;
|
||||
pid_t tgid;
|
||||
|
||||
|
|
@ -1156,6 +1156,7 @@ struct task_struct {
|
|||
/* PID/PID hash table linkage. */
|
||||
struct pid_link pids[PIDTYPE_MAX];
|
||||
struct list_head thread_group;
|
||||
struct list_head thread_node;
|
||||
|
||||
struct completion *vfork_done; /* for vfork() */
|
||||
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
|
||||
|
|
@ -1687,6 +1688,19 @@ static inline void memalloc_noio_restore(unsigned int flags)
|
|||
current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
|
||||
}
|
||||
|
||||
/* Per-process atomic flags. */
|
||||
#define PFA_NO_NEW_PRIVS 0x00000001 /* May not gain new privileges. */
|
||||
|
||||
static inline bool task_no_new_privs(struct task_struct *p)
|
||||
{
|
||||
return test_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
|
||||
}
|
||||
|
||||
static inline void task_set_no_new_privs(struct task_struct *p)
|
||||
{
|
||||
set_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* task->jobctl flags
|
||||
*/
|
||||
|
|
@ -2166,6 +2180,16 @@ extern bool current_is_single_threaded(void);
|
|||
#define while_each_thread(g, t) \
|
||||
while ((t = next_thread(t)) != g)
|
||||
|
||||
#define __for_each_thread(signal, t) \
|
||||
list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
|
||||
|
||||
#define for_each_thread(p, t) \
|
||||
__for_each_thread((p)->signal, t)
|
||||
|
||||
/* Careful: this is a double loop, 'break' won't work as expected. */
|
||||
#define for_each_process_thread(p, t) \
|
||||
for_each_process(p) for_each_thread(p, t)
|
||||
|
||||
static inline int get_nr_threads(struct task_struct *tsk)
|
||||
{
|
||||
return tsk->signal->nr_threads;
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
#include <uapi/linux/seccomp.h>
|
||||
|
||||
#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC)
|
||||
|
||||
#ifdef CONFIG_SECCOMP
|
||||
|
||||
#include <linux/thread_info.h>
|
||||
|
|
@ -14,11 +16,11 @@ struct seccomp_filter;
|
|||
*
|
||||
* @mode: indicates one of the valid values above for controlled
|
||||
* system calls available to a process.
|
||||
* @filter: The metadata and ruleset for determining what system calls
|
||||
* are allowed for a task.
|
||||
* @filter: must always point to a valid seccomp-filter or NULL as it is
|
||||
* accessed without locking during system call entry.
|
||||
*
|
||||
* @filter must only be accessed from the context of current as there
|
||||
* is no locking.
|
||||
* is no read locking.
|
||||
*/
|
||||
struct seccomp {
|
||||
int mode;
|
||||
|
|
|
|||
|
|
@ -841,4 +841,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
|
|||
asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
|
||||
unsigned long idx1, unsigned long idx2);
|
||||
asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
|
||||
asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
|
||||
const char __user *uargs);
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -692,9 +692,19 @@ __SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
|
|||
__SYSCALL(__NR_kcmp, sys_kcmp)
|
||||
#define __NR_finit_module 273
|
||||
__SYSCALL(__NR_finit_module, sys_finit_module)
|
||||
/* Backporting seccomp, skip a few ...
|
||||
* #define __NR_sched_setattr 274
|
||||
__SYSCALL(__NR_sched_setattr, sys_sched_setattr)
|
||||
* #define __NR_sched_getattr 275
|
||||
__SYSCALL(__NR_sched_getattr, sys_sched_getattr)
|
||||
* #define __NR_renameat2 276
|
||||
__SYSCALL(__NR_renameat2, sys_renameat2)
|
||||
*/
|
||||
#define __NR_seccomp 277
|
||||
__SYSCALL(__NR_seccomp, sys_seccomp)
|
||||
|
||||
#undef __NR_syscalls
|
||||
#define __NR_syscalls 274
|
||||
#define __NR_syscalls 278
|
||||
|
||||
/*
|
||||
* All syscalls below here should go away really,
|
||||
|
|
|
|||
|
|
@ -10,6 +10,13 @@
|
|||
#define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */
|
||||
#define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */
|
||||
|
||||
/* Valid operations for seccomp syscall. */
|
||||
#define SECCOMP_SET_MODE_STRICT 0
|
||||
#define SECCOMP_SET_MODE_FILTER 1
|
||||
|
||||
/* Valid flags for SECCOMP_SET_MODE_FILTER */
|
||||
#define SECCOMP_FILTER_FLAG_TSYNC 1
|
||||
|
||||
/*
|
||||
* All BPF programs must return a 32-bit value.
|
||||
* The bottom 16-bits are for optional return data.
|
||||
|
|
|
|||
|
|
@ -74,6 +74,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
|
|||
__this_cpu_dec(process_counts);
|
||||
}
|
||||
list_del_rcu(&p->thread_group);
|
||||
list_del_rcu(&p->thread_node);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -327,6 +327,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
|
|||
goto free_ti;
|
||||
|
||||
tsk->stack = ti;
|
||||
#ifdef CONFIG_SECCOMP
|
||||
/*
|
||||
* We must handle setting up seccomp filters once we're under
|
||||
* the sighand lock in case orig has changed between now and
|
||||
* then. Until then, filter must be NULL to avoid messing up
|
||||
* the usage counts on the error path calling free_task.
|
||||
*/
|
||||
tsk->seccomp.filter = NULL;
|
||||
#endif
|
||||
|
||||
setup_thread_stack(tsk, orig);
|
||||
clear_user_return_notifier(tsk);
|
||||
|
|
@ -1061,6 +1070,11 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
|
|||
sig->nr_threads = 1;
|
||||
atomic_set(&sig->live, 1);
|
||||
atomic_set(&sig->sigcnt, 1);
|
||||
|
||||
/* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */
|
||||
sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node);
|
||||
tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head);
|
||||
|
||||
init_waitqueue_head(&sig->wait_chldexit);
|
||||
sig->curr_target = tsk;
|
||||
init_sigpending(&sig->shared_pending);
|
||||
|
|
@ -1102,6 +1116,39 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
|
|||
p->flags = new_flags;
|
||||
}
|
||||
|
||||
static void copy_seccomp(struct task_struct *p)
|
||||
{
|
||||
#ifdef CONFIG_SECCOMP
|
||||
/*
|
||||
* Must be called with sighand->lock held, which is common to
|
||||
* all threads in the group. Holding cred_guard_mutex is not
|
||||
* needed because this new task is not yet running and cannot
|
||||
* be racing exec.
|
||||
*/
|
||||
assert_spin_locked(¤t->sighand->siglock);
|
||||
|
||||
/* Ref-count the new filter user, and assign it. */
|
||||
get_seccomp_filter(current);
|
||||
p->seccomp = current->seccomp;
|
||||
|
||||
/*
|
||||
* Explicitly enable no_new_privs here in case it got set
|
||||
* between the task_struct being duplicated and holding the
|
||||
* sighand lock. The seccomp state and nnp must be in sync.
|
||||
*/
|
||||
if (task_no_new_privs(current))
|
||||
task_set_no_new_privs(p);
|
||||
|
||||
/*
|
||||
* If the parent gained a seccomp mode after copying thread
|
||||
* flags and between before we held the sighand lock, we have
|
||||
* to manually enable the seccomp thread flag here.
|
||||
*/
|
||||
if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
|
||||
set_tsk_thread_flag(p, TIF_SECCOMP);
|
||||
#endif
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
|
||||
{
|
||||
current->clear_child_tid = tidptr;
|
||||
|
|
@ -1205,7 +1252,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
|||
goto fork_out;
|
||||
|
||||
ftrace_graph_init_task(p);
|
||||
get_seccomp_filter(p);
|
||||
|
||||
rt_mutex_init_task(p);
|
||||
|
||||
|
|
@ -1447,6 +1493,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
|||
|
||||
spin_lock(¤t->sighand->siglock);
|
||||
|
||||
/*
|
||||
* Copy seccomp details explicitly here, in case they were changed
|
||||
* before holding sighand lock.
|
||||
*/
|
||||
copy_seccomp(p);
|
||||
|
||||
/*
|
||||
* Process group and session signals need to be delivered to just the
|
||||
* parent before the fork or both the parent and the child after the
|
||||
|
|
@ -1487,6 +1539,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
|||
list_add_tail(&p->sibling, &p->real_parent->children);
|
||||
list_add_tail_rcu(&p->tasks, &init_task.tasks);
|
||||
__this_cpu_inc(process_counts);
|
||||
} else {
|
||||
list_add_tail_rcu(&p->thread_node,
|
||||
&p->signal->thread_head);
|
||||
}
|
||||
attach_pid(p, PIDTYPE_PID, pid);
|
||||
nr_threads++;
|
||||
|
|
|
|||
411
kernel/seccomp.c
411
kernel/seccomp.c
|
|
@ -18,15 +18,17 @@
|
|||
#include <linux/compat.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/seccomp.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/syscalls.h>
|
||||
|
||||
/* #define SECCOMP_DEBUG 1 */
|
||||
|
||||
#ifdef CONFIG_SECCOMP_FILTER
|
||||
#include <asm/syscall.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/pid.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/tracehook.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
|
|
@ -95,7 +97,7 @@ u32 seccomp_bpf_load(int off)
|
|||
if (off == BPF_DATA(nr))
|
||||
return syscall_get_nr(current, regs);
|
||||
if (off == BPF_DATA(arch))
|
||||
return syscall_get_arch(current, regs);
|
||||
return syscall_get_arch();
|
||||
if (off >= BPF_DATA(args[0]) && off < BPF_DATA(args[6])) {
|
||||
unsigned long value;
|
||||
int arg = (off - BPF_DATA(args[0])) / sizeof(u64);
|
||||
|
|
@ -201,32 +203,170 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
|
|||
*/
|
||||
static u32 seccomp_run_filters(int syscall)
|
||||
{
|
||||
struct seccomp_filter *f;
|
||||
struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
|
||||
u32 ret = SECCOMP_RET_ALLOW;
|
||||
|
||||
/* Ensure unexpected behavior doesn't result in failing open. */
|
||||
if (WARN_ON(current->seccomp.filter == NULL))
|
||||
if (unlikely(WARN_ON(f == NULL)))
|
||||
return SECCOMP_RET_KILL;
|
||||
|
||||
/* Make sure cross-thread synced filter points somewhere sane. */
|
||||
smp_read_barrier_depends();
|
||||
|
||||
/*
|
||||
* All filters in the list are evaluated and the lowest BPF return
|
||||
* value always takes priority (ignoring the DATA).
|
||||
*/
|
||||
for (f = current->seccomp.filter; f; f = f->prev) {
|
||||
for (; f; f = f->prev) {
|
||||
u32 cur_ret = sk_run_filter(NULL, f->insns);
|
||||
|
||||
if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
|
||||
ret = cur_ret;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_SECCOMP_FILTER */
|
||||
|
||||
static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
|
||||
{
|
||||
assert_spin_locked(¤t->sighand->siglock);
|
||||
|
||||
if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void seccomp_assign_mode(struct task_struct *task,
|
||||
unsigned long seccomp_mode)
|
||||
{
|
||||
assert_spin_locked(&task->sighand->siglock);
|
||||
|
||||
task->seccomp.mode = seccomp_mode;
|
||||
/*
|
||||
* Make sure TIF_SECCOMP cannot be set before the mode (and
|
||||
* filter) is set.
|
||||
*/
|
||||
smp_mb();
|
||||
set_tsk_thread_flag(task, TIF_SECCOMP);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SECCOMP_FILTER
|
||||
/* Returns 1 if the parent is an ancestor of the child. */
|
||||
static int is_ancestor(struct seccomp_filter *parent,
|
||||
struct seccomp_filter *child)
|
||||
{
|
||||
/* NULL is the root ancestor. */
|
||||
if (parent == NULL)
|
||||
return 1;
|
||||
for (; child; child = child->prev)
|
||||
if (child == parent)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* seccomp_attach_filter: Attaches a seccomp filter to current.
|
||||
* seccomp_can_sync_threads: checks if all threads can be synchronized
|
||||
*
|
||||
* Expects sighand and cred_guard_mutex locks to be held.
|
||||
*
|
||||
* Returns 0 on success, -ve on error, or the pid of a thread which was
|
||||
* either not in the correct seccomp mode or it did not have an ancestral
|
||||
* seccomp filter.
|
||||
*/
|
||||
static inline pid_t seccomp_can_sync_threads(void)
|
||||
{
|
||||
struct task_struct *thread, *caller;
|
||||
|
||||
BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
|
||||
assert_spin_locked(¤t->sighand->siglock);
|
||||
|
||||
/* Validate all threads being eligible for synchronization. */
|
||||
caller = current;
|
||||
for_each_thread(caller, thread) {
|
||||
pid_t failed;
|
||||
|
||||
/* Skip current, since it is initiating the sync. */
|
||||
if (thread == caller)
|
||||
continue;
|
||||
|
||||
if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
|
||||
(thread->seccomp.mode == SECCOMP_MODE_FILTER &&
|
||||
is_ancestor(thread->seccomp.filter,
|
||||
caller->seccomp.filter)))
|
||||
continue;
|
||||
|
||||
/* Return the first thread that cannot be synchronized. */
|
||||
failed = task_pid_vnr(thread);
|
||||
/* If the pid cannot be resolved, then return -ESRCH */
|
||||
if (unlikely(WARN_ON(failed == 0)))
|
||||
failed = -ESRCH;
|
||||
return failed;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* seccomp_sync_threads: sets all threads to use current's filter
|
||||
*
|
||||
* Expects sighand and cred_guard_mutex locks to be held, and for
|
||||
* seccomp_can_sync_threads() to have returned success already
|
||||
* without dropping the locks.
|
||||
*
|
||||
*/
|
||||
static inline void seccomp_sync_threads(void)
|
||||
{
|
||||
struct task_struct *thread, *caller;
|
||||
|
||||
BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
|
||||
assert_spin_locked(¤t->sighand->siglock);
|
||||
|
||||
/* Synchronize all threads. */
|
||||
caller = current;
|
||||
for_each_thread(caller, thread) {
|
||||
/* Skip current, since it needs no changes. */
|
||||
if (thread == caller)
|
||||
continue;
|
||||
|
||||
/* Get a task reference for the new leaf node. */
|
||||
get_seccomp_filter(caller);
|
||||
/*
|
||||
* Drop the task reference to the shared ancestor since
|
||||
* current's path will hold a reference. (This also
|
||||
* allows a put before the assignment.)
|
||||
*/
|
||||
put_seccomp_filter(thread);
|
||||
smp_store_release(&thread->seccomp.filter,
|
||||
caller->seccomp.filter);
|
||||
/*
|
||||
* Opt the other thread into seccomp if needed.
|
||||
* As threads are considered to be trust-realm
|
||||
* equivalent (see ptrace_may_access), it is safe to
|
||||
* allow one thread to transition the other.
|
||||
*/
|
||||
if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
|
||||
/*
|
||||
* Don't let an unprivileged task work around
|
||||
* the no_new_privs restriction by creating
|
||||
* a thread that sets it up, enters seccomp,
|
||||
* then dies.
|
||||
*/
|
||||
if (task_no_new_privs(caller))
|
||||
task_set_no_new_privs(thread);
|
||||
|
||||
seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* seccomp_prepare_filter: Prepares a seccomp filter for use.
|
||||
* @fprog: BPF program to install
|
||||
*
|
||||
* Returns 0 on success or an errno on failure.
|
||||
* Returns filter on success or an ERR_PTR on failure.
|
||||
*/
|
||||
static long seccomp_attach_filter(struct sock_fprog *fprog)
|
||||
static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
|
||||
{
|
||||
struct seccomp_filter *filter;
|
||||
unsigned long fp_size = fprog->len * sizeof(struct sock_filter);
|
||||
|
|
@ -234,12 +374,13 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
|
|||
long ret;
|
||||
|
||||
if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
|
||||
return -EINVAL;
|
||||
return ERR_PTR(-EINVAL);
|
||||
BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
|
||||
|
||||
for (filter = current->seccomp.filter; filter; filter = filter->prev)
|
||||
total_insns += filter->len + 4; /* include a 4 instr penalty */
|
||||
if (total_insns > MAX_INSNS_PER_PATH)
|
||||
return -ENOMEM;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
/*
|
||||
* Installing a seccomp filter requires that the task have
|
||||
|
|
@ -247,16 +388,16 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
|
|||
* This avoids scenarios where unprivileged tasks can affect the
|
||||
* behavior of privileged children.
|
||||
*/
|
||||
if (!current->no_new_privs &&
|
||||
if (!task_no_new_privs(current) &&
|
||||
security_capable_noaudit(current_cred(), current_user_ns(),
|
||||
CAP_SYS_ADMIN) != 0)
|
||||
return -EACCES;
|
||||
return ERR_PTR(-EACCES);
|
||||
|
||||
/* Allocate a new seccomp_filter */
|
||||
filter = kzalloc(sizeof(struct seccomp_filter) + fp_size,
|
||||
GFP_KERNEL|__GFP_NOWARN);
|
||||
if (!filter)
|
||||
return -ENOMEM;
|
||||
return ERR_PTR(-ENOMEM);;
|
||||
atomic_set(&filter->usage, 1);
|
||||
filter->len = fprog->len;
|
||||
|
||||
|
|
@ -275,28 +416,24 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
|
|||
if (ret)
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* If there is an existing filter, make it the prev and don't drop its
|
||||
* task reference.
|
||||
*/
|
||||
filter->prev = current->seccomp.filter;
|
||||
current->seccomp.filter = filter;
|
||||
return 0;
|
||||
return filter;
|
||||
|
||||
fail:
|
||||
kfree(filter);
|
||||
return ret;
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
/**
|
||||
* seccomp_attach_user_filter - attaches a user-supplied sock_fprog
|
||||
* seccomp_prepare_user_filter - prepares a user-supplied sock_fprog
|
||||
* @user_filter: pointer to the user data containing a sock_fprog.
|
||||
*
|
||||
* Returns 0 on success and non-zero otherwise.
|
||||
*/
|
||||
long seccomp_attach_user_filter(char __user *user_filter)
|
||||
static struct seccomp_filter *
|
||||
seccomp_prepare_user_filter(const char __user *user_filter)
|
||||
{
|
||||
struct sock_fprog fprog;
|
||||
long ret = -EFAULT;
|
||||
struct seccomp_filter *filter = ERR_PTR(-EFAULT);
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (is_compat_task()) {
|
||||
|
|
@ -309,9 +446,56 @@ long seccomp_attach_user_filter(char __user *user_filter)
|
|||
#endif
|
||||
if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
|
||||
goto out;
|
||||
ret = seccomp_attach_filter(&fprog);
|
||||
filter = seccomp_prepare_filter(&fprog);
|
||||
out:
|
||||
return ret;
|
||||
return filter;
|
||||
}
|
||||
|
||||
/**
|
||||
* seccomp_attach_filter: validate and attach filter
|
||||
* @flags: flags to change filter behavior
|
||||
* @filter: seccomp filter to add to the current process
|
||||
*
|
||||
* Caller must be holding current->sighand->siglock lock.
|
||||
*
|
||||
* Returns 0 on success, -ve on error.
|
||||
*/
|
||||
static long seccomp_attach_filter(unsigned int flags,
|
||||
struct seccomp_filter *filter)
|
||||
{
|
||||
unsigned long total_insns;
|
||||
struct seccomp_filter *walker;
|
||||
|
||||
assert_spin_locked(¤t->sighand->siglock);
|
||||
|
||||
/* Validate resulting filter length. */
|
||||
total_insns = filter->len;
|
||||
for (walker = current->seccomp.filter; walker; walker = walker->prev)
|
||||
total_insns += walker->len + 4; /* 4 instr penalty */
|
||||
if (total_insns > MAX_INSNS_PER_PATH)
|
||||
return -ENOMEM;
|
||||
|
||||
/* If thread sync has been requested, check that it is possible. */
|
||||
if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
|
||||
int ret;
|
||||
|
||||
ret = seccomp_can_sync_threads();
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is an existing filter, make it the prev and don't drop its
|
||||
* task reference.
|
||||
*/
|
||||
filter->prev = current->seccomp.filter;
|
||||
current->seccomp.filter = filter;
|
||||
|
||||
/* Now that the new filter is in place, synchronize to all threads. */
|
||||
if (flags & SECCOMP_FILTER_FLAG_TSYNC)
|
||||
seccomp_sync_threads();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* get_seccomp_filter - increments the reference count of the filter on @tsk */
|
||||
|
|
@ -324,6 +508,13 @@ void get_seccomp_filter(struct task_struct *tsk)
|
|||
atomic_inc(&orig->usage);
|
||||
}
|
||||
|
||||
static inline void seccomp_filter_free(struct seccomp_filter *filter)
|
||||
{
|
||||
if (filter) {
|
||||
kfree(filter);
|
||||
}
|
||||
}
|
||||
|
||||
/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
|
||||
void put_seccomp_filter(struct task_struct *tsk)
|
||||
{
|
||||
|
|
@ -332,7 +523,7 @@ void put_seccomp_filter(struct task_struct *tsk)
|
|||
while (orig && atomic_dec_and_test(&orig->usage)) {
|
||||
struct seccomp_filter *freeme = orig;
|
||||
orig = orig->prev;
|
||||
kfree(freeme);
|
||||
seccomp_filter_free(freeme);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -351,7 +542,7 @@ static void seccomp_send_sigsys(int syscall, int reason)
|
|||
info.si_code = SYS_SECCOMP;
|
||||
info.si_call_addr = (void __user *)KSTK_EIP(current);
|
||||
info.si_errno = reason;
|
||||
info.si_arch = syscall_get_arch(current, task_pt_regs(current));
|
||||
info.si_arch = syscall_get_arch();
|
||||
info.si_syscall = syscall;
|
||||
force_sig_info(SIGSYS, &info, current);
|
||||
}
|
||||
|
|
@ -376,12 +567,17 @@ static int mode1_syscalls_32[] = {
|
|||
|
||||
int __secure_computing(int this_syscall)
|
||||
{
|
||||
int mode = current->seccomp.mode;
|
||||
int exit_sig = 0;
|
||||
int *syscall;
|
||||
u32 ret;
|
||||
|
||||
switch (mode) {
|
||||
/*
|
||||
* Make sure that any changes to mode from another thread have
|
||||
* been seen after TIF_SECCOMP was seen.
|
||||
*/
|
||||
rmb();
|
||||
|
||||
switch (current->seccomp.mode) {
|
||||
case SECCOMP_MODE_STRICT:
|
||||
syscall = mode1_syscalls;
|
||||
#ifdef CONFIG_COMPAT
|
||||
|
|
@ -467,47 +663,152 @@ long prctl_get_seccomp(void)
|
|||
}
|
||||
|
||||
/**
|
||||
* prctl_set_seccomp: configures current->seccomp.mode
|
||||
* @seccomp_mode: requested mode to use
|
||||
* @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
|
||||
*
|
||||
* This function may be called repeatedly with a @seccomp_mode of
|
||||
* SECCOMP_MODE_FILTER to install additional filters. Every filter
|
||||
* successfully installed will be evaluated (in reverse order) for each system
|
||||
* call the task makes.
|
||||
* seccomp_set_mode_strict: internal function for setting strict seccomp
|
||||
*
|
||||
* Once current->seccomp.mode is non-zero, it may not be changed.
|
||||
*
|
||||
* Returns 0 on success or -EINVAL on failure.
|
||||
*/
|
||||
long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
|
||||
static long seccomp_set_mode_strict(void)
|
||||
{
|
||||
const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
|
||||
long ret = -EINVAL;
|
||||
|
||||
if (current->seccomp.mode &&
|
||||
current->seccomp.mode != seccomp_mode)
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
|
||||
if (!seccomp_may_assign_mode(seccomp_mode))
|
||||
goto out;
|
||||
|
||||
#ifdef TIF_NOTSC
|
||||
disable_TSC();
|
||||
#endif
|
||||
seccomp_assign_mode(current, seccomp_mode);
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SECCOMP_FILTER
|
||||
/**
|
||||
* seccomp_set_mode_filter: internal function for setting seccomp filter
|
||||
* @flags: flags to change filter behavior
|
||||
* @filter: struct sock_fprog containing filter
|
||||
*
|
||||
* This function may be called repeatedly to install additional filters.
|
||||
* Every filter successfully installed will be evaluated (in reverse order)
|
||||
* for each system call the task makes.
|
||||
*
|
||||
* Once current->seccomp.mode is non-zero, it may not be changed.
|
||||
*
|
||||
* Returns 0 on success or -EINVAL on failure.
|
||||
*/
|
||||
static long seccomp_set_mode_filter(unsigned int flags,
|
||||
const char __user *filter)
|
||||
{
|
||||
const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
|
||||
struct seccomp_filter *prepared = NULL;
|
||||
long ret = -EINVAL;
|
||||
|
||||
/* Validate flags. */
|
||||
if (flags & ~SECCOMP_FILTER_FLAG_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
/* Prepare the new filter before holding any locks. */
|
||||
prepared = seccomp_prepare_user_filter(filter);
|
||||
if (IS_ERR(prepared))
|
||||
return PTR_ERR(prepared);
|
||||
|
||||
/*
|
||||
* Make sure we cannot change seccomp or nnp state via TSYNC
|
||||
* while another thread is in the middle of calling exec.
|
||||
*/
|
||||
if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
|
||||
mutex_lock_killable(¤t->signal->cred_guard_mutex))
|
||||
goto out_free;
|
||||
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
|
||||
if (!seccomp_may_assign_mode(seccomp_mode))
|
||||
goto out;
|
||||
|
||||
ret = seccomp_attach_filter(flags, prepared);
|
||||
if (ret)
|
||||
goto out;
|
||||
/* Do not free the successfully attached filter. */
|
||||
prepared = NULL;
|
||||
|
||||
seccomp_assign_mode(current, seccomp_mode);
|
||||
out:
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
if (flags & SECCOMP_FILTER_FLAG_TSYNC)
|
||||
mutex_unlock(¤t->signal->cred_guard_mutex);
|
||||
out_free:
|
||||
seccomp_filter_free(prepared);
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
static inline long seccomp_set_mode_filter(unsigned int flags,
|
||||
const char __user *filter)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Common entry point for both prctl and syscall. */
|
||||
static long do_seccomp(unsigned int op, unsigned int flags,
|
||||
const char __user *uargs)
|
||||
{
|
||||
switch (op) {
|
||||
case SECCOMP_SET_MODE_STRICT:
|
||||
if (flags != 0 || uargs != NULL)
|
||||
return -EINVAL;
|
||||
return seccomp_set_mode_strict();
|
||||
case SECCOMP_SET_MODE_FILTER:
|
||||
return seccomp_set_mode_filter(flags, uargs);
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
|
||||
const char __user *, uargs)
|
||||
{
|
||||
return do_seccomp(op, flags, uargs);
|
||||
}
|
||||
|
||||
/**
|
||||
* prctl_set_seccomp: configures current->seccomp.mode
|
||||
* @seccomp_mode: requested mode to use
|
||||
* @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
|
||||
*
|
||||
* Returns 0 on success or -EINVAL on failure.
|
||||
*/
|
||||
long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
|
||||
{
|
||||
unsigned int op;
|
||||
char __user *uargs;
|
||||
|
||||
switch (seccomp_mode) {
|
||||
case SECCOMP_MODE_STRICT:
|
||||
ret = 0;
|
||||
#ifdef TIF_NOTSC
|
||||
disable_TSC();
|
||||
#endif
|
||||
op = SECCOMP_SET_MODE_STRICT;
|
||||
/*
|
||||
* Setting strict mode through prctl always ignored filter,
|
||||
* so make sure it is always NULL here to pass the internal
|
||||
* check in do_seccomp().
|
||||
*/
|
||||
uargs = NULL;
|
||||
break;
|
||||
#ifdef CONFIG_SECCOMP_FILTER
|
||||
case SECCOMP_MODE_FILTER:
|
||||
ret = seccomp_attach_user_filter(filter);
|
||||
if (ret)
|
||||
goto out;
|
||||
op = SECCOMP_SET_MODE_FILTER;
|
||||
uargs = filter;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
goto out;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
current->seccomp.mode = seccomp_mode;
|
||||
set_thread_flag(TIF_SECCOMP);
|
||||
out:
|
||||
return ret;
|
||||
/* prctl interface doesn't have flags, so they are always zero. */
|
||||
return do_seccomp(op, 0, uargs);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2427,12 +2427,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
|
|||
if (arg2 != 1 || arg3 || arg4 || arg5)
|
||||
return -EINVAL;
|
||||
|
||||
current->no_new_privs = 1;
|
||||
task_set_no_new_privs(current);
|
||||
break;
|
||||
case PR_GET_NO_NEW_PRIVS:
|
||||
if (arg2 || arg3 || arg4 || arg5)
|
||||
return -EINVAL;
|
||||
return current->no_new_privs ? 1 : 0;
|
||||
return task_no_new_privs(current) ? 1 : 0;
|
||||
case PR_SET_VMA:
|
||||
error = prctl_set_vma(arg2, arg3, arg4, arg5);
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -209,3 +209,6 @@ cond_syscall(compat_sys_open_by_handle_at);
|
|||
|
||||
/* compare kernel pointers */
|
||||
cond_syscall(sys_kcmp);
|
||||
|
||||
/* operate on Secure Computing state */
|
||||
cond_syscall(sys_seccomp);
|
||||
|
|
|
|||
|
|
@ -629,7 +629,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
|
|||
* There is no exception for unconfined as change_hat is not
|
||||
* available.
|
||||
*/
|
||||
if (current->no_new_privs)
|
||||
if (task_no_new_privs(current))
|
||||
return -EPERM;
|
||||
|
||||
/* released below */
|
||||
|
|
@ -780,7 +780,7 @@ int aa_change_profile(const char *ns_name, const char *hname, bool onexec,
|
|||
* no_new_privs is set because this aways results in a reduction
|
||||
* of permissions.
|
||||
*/
|
||||
if (current->no_new_privs && !unconfined(profile)) {
|
||||
if (task_no_new_privs(current) && !unconfined(profile)) {
|
||||
put_cred(cred);
|
||||
return -EPERM;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user