Merge branch 'upstream/android-3.10' into 'linaro-fixes/android-3.10'

This commit is contained in:
Amit Pundir 2014-10-13 09:43:42 +05:30
commit 51b6770b8d
42 changed files with 877 additions and 87 deletions

View File

@ -331,6 +331,7 @@ config HAVE_ARCH_SECCOMP_FILTER
- secure_computing is called from a ptrace_event()-safe context
- secure_computing return value is checked and a return value of -1
results in the system call being skipped immediately.
- seccomp syscall wired up
config SECCOMP_FILTER
def_bool y

View File

@ -59,6 +59,21 @@
#define smp_wmb() dmb()
#endif
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
___p1; \
})
#define read_barrier_depends() do { } while(0)
#define smp_read_barrier_depends() do { } while(0)

View File

@ -97,8 +97,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
memcpy(&regs->ARM_r0 + i, args, n * sizeof(args[0]));
}
static inline int syscall_get_arch(struct task_struct *task,
struct pt_regs *regs)
static inline int syscall_get_arch(void)
{
/* ARM tasks don't change audit architectures on the fly. */
return AUDIT_ARCH_ARM;

View File

@ -20,6 +20,7 @@ config ARM64
select GENERIC_TIME_VSYSCALL
select HARDIRQS_SW_RESEND
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_DEBUG_BUGVERBOSE
select HAVE_DEBUG_KMEMLEAK
@ -232,6 +233,20 @@ config ARMV7_COMPAT_CPUINFO
source "mm/Kconfig"
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
---help---
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
endmenu
menu "Boot options"

View File

@ -35,10 +35,60 @@
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
___p1; \
})
#else
#define smp_mb() asm volatile("dmb ish" : : : "memory")
#define smp_rmb() asm volatile("dmb ishld" : : : "memory")
#define smp_wmb() asm volatile("dmb ishst" : : : "memory")
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
switch (sizeof(*p)) { \
case 4: \
asm volatile ("stlr %w1, %0" \
: "=Q" (*p) : "r" (v) : "memory"); \
break; \
case 8: \
asm volatile ("stlr %1, %0" \
: "=Q" (*p) : "r" (v) : "memory"); \
break; \
} \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1; \
compiletime_assert_atomic_type(*p); \
switch (sizeof(*p)) { \
case 4: \
asm volatile ("ldar %w0, %1" \
: "=r" (___p1) : "Q" (*p) : "memory"); \
break; \
case 8: \
asm volatile ("ldar %0, %1" \
: "=r" (___p1) : "Q" (*p) : "memory"); \
break; \
} \
___p1; \
})
#endif
#define read_barrier_depends() do { } while(0)

View File

@ -191,6 +191,13 @@ typedef struct compat_siginfo {
compat_long_t _band; /* POLL_IN, POLL_OUT, POLL_MSG */
int _fd;
} _sigpoll;
/* SIGSYS */
struct {
compat_uptr_t _call_addr; /* calling user insn */
int _syscall; /* triggering system call number */
unsigned int _arch; /* AUDIT_ARCH_* of syscall */
} _sigsys;
} _sifields;
} compat_siginfo_t;

View File

@ -60,6 +60,15 @@
#define COMPAT_PT_TEXT_ADDR 0x10000
#define COMPAT_PT_DATA_ADDR 0x10004
#define COMPAT_PT_TEXT_END_ADDR 0x10008
/*
* used to skip a system call when tracer changes its number to -1
* with ptrace(PTRACE_SET_SYSCALL)
*/
#define RET_SKIP_SYSCALL -1
#define RET_SKIP_SYSCALL_TRACE -2
#define IS_SKIP_SYSCALL(no) ((int)(no & 0xffffffff) == -1)
#ifndef __ASSEMBLY__
/* sizeof(struct user) for AArch32 */

View File

@ -0,0 +1,25 @@
/*
* arch/arm64/include/asm/seccomp.h
*
* Copyright (C) 2014 Linaro Limited
* Author: AKASHI Takahiro <takahiro.akashi <at> linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef _ASM_SECCOMP_H
#define _ASM_SECCOMP_H
#include <asm/unistd.h>
#ifdef CONFIG_COMPAT
#define __NR_seccomp_read_32 __NR_compat_read
#define __NR_seccomp_write_32 __NR_compat_write
#define __NR_seccomp_exit_32 __NR_compat_exit
#define __NR_seccomp_sigreturn_32 __NR_compat_rt_sigreturn
#endif /* CONFIG_COMPAT */
#include <asm-generic/seccomp.h>
#endif /* _ASM_SECCOMP_H */

View File

@ -30,6 +30,9 @@
* Compat syscall numbers used by the AArch64 kernel.
*/
#define __NR_compat_restart_syscall 0
#define __NR_compat_exit 1
#define __NR_compat_read 3
#define __NR_compat_write 4
#define __NR_compat_sigreturn 119
#define __NR_compat_rt_sigreturn 173
@ -40,7 +43,7 @@
#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2)
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5)
#define __NR_compat_syscalls 378
#define __NR_compat_syscalls 384
#endif
#define __ARCH_WANT_SYS_CLONE

View File

@ -781,3 +781,11 @@ __SYSCALL(__NR_process_vm_writev, compat_sys_process_vm_writev)
__SYSCALL(__NR_kcmp, sys_kcmp)
#define __NR_finit_module 379
__SYSCALL(__NR_finit_module, sys_finit_module)
/* #define __NR_sched_setattr 380 */
__SYSCALL(380, sys_ni_syscall)
/* #define __NR_sched_getattr 381 */
__SYSCALL(381, sys_ni_syscall)
/* #define __NR_renameat2 382 */
__SYSCALL(382, sys_ni_syscall)
#define __NR_seccomp 383
__SYSCALL(__NR_seccomp, sys_seccomp)

View File

@ -23,6 +23,7 @@
#include <asm/hwcap.h>
#define PTRACE_SET_SYSCALL 23
/*
* PSR bits

View File

@ -25,6 +25,7 @@
#include <asm/asm-offsets.h>
#include <asm/errno.h>
#include <asm/esr.h>
#include <asm/ptrace.h>
#include <asm/thread_info.h>
#include <asm/unistd.h>
@ -662,6 +663,10 @@ __sys_trace:
mov x0, sp
bl syscall_trace_enter
adr lr, __sys_trace_return // return address
cmp w0, #RET_SKIP_SYSCALL_TRACE // skip syscall and tracing?
b.eq ret_to_user
cmp w0, #RET_SKIP_SYSCALL // skip syscall?
b.eq __sys_trace_return_skipped
uxtw scno, w0 // syscall number (possibly new)
mov x1, sp // pointer to regs
cmp scno, sc_nr // check upper syscall limit
@ -675,6 +680,7 @@ __sys_trace:
__sys_trace_return:
str x0, [sp] // save returned x0
__sys_trace_return_skipped: // x0 already in regs[0]
mov x0, sp
bl syscall_trace_exit
b ret_to_user

View File

@ -27,6 +27,7 @@
#include <linux/smp.h>
#include <linux/ptrace.h>
#include <linux/user.h>
#include <linux/seccomp.h>
#include <linux/security.h>
#include <linux/init.h>
#include <linux/signal.h>
@ -1064,7 +1065,19 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
long arch_ptrace(struct task_struct *child, long request,
unsigned long addr, unsigned long data)
{
return ptrace_request(child, request, addr, data);
int ret;
switch (request) {
case PTRACE_SET_SYSCALL:
task_pt_regs(child)->syscallno = data;
ret = 0;
break;
default:
ret = ptrace_request(child, request, addr, data);
break;
}
return ret;
}
enum ptrace_syscall_dir {
@ -1096,9 +1109,33 @@ static void tracehook_report_syscall(struct pt_regs *regs,
asmlinkage int syscall_trace_enter(struct pt_regs *regs)
{
unsigned int saved_syscallno = regs->syscallno;
/* Do the secure computing check first; failures should be fast. */
if (secure_computing(regs->syscallno) == -1)
return RET_SKIP_SYSCALL_TRACE;
if (test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
if (IS_SKIP_SYSCALL(regs->syscallno)) {
/*
* RESTRICTION: we can't modify a return value of user
* issued syscall(-1) here. In order to ease this flavor,
* we need to treat whatever value in x0 as a return value,
* but this might result in a bogus value being returned.
*/
/*
* NOTE: syscallno may also be set to -1 if fatal signal is
* detected in tracehook_report_syscall_entry(), but since
* a value set to x0 here is not used in this case, we may
* neglect the case.
*/
if (!test_thread_flag(TIF_SYSCALL_TRACE) ||
(IS_SKIP_SYSCALL(saved_syscallno)))
regs->regs[0] = -ENOSYS;
}
audit_syscall_entry(syscall_get_arch(), regs->syscallno,
regs->orig_x0, regs->regs[1], regs->regs[2], regs->regs[3]);

View File

@ -211,6 +211,14 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
err |= __put_user(from->si_uid, &to->si_uid);
err |= __put_user((compat_uptr_t)(unsigned long)from->si_ptr, &to->si_ptr);
break;
#ifdef __ARCH_SIGSYS
case __SI_SYS:
err |= __put_user((compat_uptr_t)(unsigned long)
from->si_call_addr, &to->si_call_addr);
err |= __put_user(from->si_syscall, &to->si_syscall);
err |= __put_user(from->si_arch, &to->si_arch);
break;
#endif
default: /* this is just in case for now ... */
err |= __put_user(from->si_pid, &to->si_pid);
err |= __put_user(from->si_uid, &to->si_uid);

View File

@ -45,13 +45,36 @@
# define smp_rmb() rmb()
# define smp_wmb() wmb()
# define smp_read_barrier_depends() read_barrier_depends()
#else
# define smp_mb() barrier()
# define smp_rmb() barrier()
# define smp_wmb() barrier()
# define smp_read_barrier_depends() do { } while(0)
#endif
/*
* IA64 GCC turns volatile stores into st.rel and volatile loads into ld.acq no
* need for asm trickery!
*/
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
barrier(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
barrier(); \
___p1; \
})
/*
* XXX check on this ---I suspect what Linus really wants here is
* acquire vs release semantics but we can't discuss this stuff with

View File

@ -82,4 +82,19 @@ static inline void fence(void)
#define smp_read_barrier_depends() do { } while (0)
#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
___p1; \
})
#endif /* _ASM_METAG_BARRIER_H */

View File

@ -180,4 +180,19 @@
#define nudge_writes() mb()
#endif
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
___p1; \
})
#endif /* __ASM_BARRIER_H */

View File

@ -45,11 +45,15 @@
# define SMPWMB eieio
#endif
#define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
#define smp_mb() mb()
#define smp_rmb() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
#define smp_rmb() __lwsync()
#define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
#define smp_read_barrier_depends() read_barrier_depends()
#else
#define __lwsync() barrier()
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
@ -65,4 +69,19 @@
#define data_barrier(x) \
asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory");
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
__lwsync(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
__lwsync(); \
___p1; \
})
#endif /* _ASM_POWERPC_BARRIER_H */

View File

@ -32,4 +32,19 @@
#define set_mb(var, value) do { var = value; mb(); } while (0)
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
barrier(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
barrier(); \
___p1; \
})
#endif /* __ASM_BARRIER_H */

View File

@ -89,11 +89,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
regs->orig_gpr2 = args[0];
}
static inline int syscall_get_arch(struct task_struct *task,
struct pt_regs *regs)
static inline int syscall_get_arch(void)
{
#ifdef CONFIG_COMPAT
if (test_tsk_thread_flag(task, TIF_31BIT))
if (test_tsk_thread_flag(current, TIF_31BIT))
return AUDIT_ARCH_S390;
#endif
return sizeof(long) == 8 ? AUDIT_ARCH_S390X : AUDIT_ARCH_S390;

View File

@ -53,4 +53,19 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
#define smp_read_barrier_depends() do { } while(0)
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
barrier(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
barrier(); \
___p1; \
})
#endif /* !(__SPARC64_BARRIER_H) */

View File

@ -92,12 +92,53 @@
#endif
#define smp_read_barrier_depends() read_barrier_depends()
#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
#else
#else /* !SMP */
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
#define smp_read_barrier_depends() do { } while (0)
#define set_mb(var, value) do { var = value; barrier(); } while (0)
#endif /* SMP */
#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
/*
* For either of these options x86 doesn't have a strong TSO memory
* model and we should fall back to full barriers.
*/
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
___p1; \
})
#else /* regular x86 TSO memory ordering */
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
barrier(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
barrier(); \
___p1; \
})
#endif
/*

View File

@ -90,8 +90,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
memcpy(&regs->bx + i, args, n * sizeof(args[0]));
}
static inline int syscall_get_arch(struct task_struct *task,
struct pt_regs *regs)
static inline int syscall_get_arch(void)
{
return AUDIT_ARCH_I386;
}
@ -220,8 +219,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
}
}
static inline int syscall_get_arch(struct task_struct *task,
struct pt_regs *regs)
static inline int syscall_get_arch(void)
{
#ifdef CONFIG_IA32_EMULATION
/*
@ -233,7 +231,7 @@ static inline int syscall_get_arch(struct task_struct *task,
*
* x32 tasks should be considered AUDIT_ARCH_X86_64.
*/
if (task_thread_info(task)->status & TS_COMPAT)
if (task_thread_info(current)->status & TS_COMPAT)
return AUDIT_ARCH_I386;
#endif
/* Both x32 and x86_64 are considered "64-bit". */

View File

@ -357,3 +357,7 @@
348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
349 i386 kcmp sys_kcmp
350 i386 finit_module sys_finit_module
# 351 i386 sched_setattr sys_sched_setattr
# 352 i386 sched_getattr sys_sched_getattr
# 353 i386 renameat2 sys_renameat2
354 i386 seccomp sys_seccomp

View File

@ -320,6 +320,10 @@
311 64 process_vm_writev sys_process_vm_writev
312 common kcmp sys_kcmp
313 common finit_module sys_finit_module
# 314 common sched_setattr sys_sched_setattr
# 315 common sched_getattr sys_sched_getattr
# 316 common renameat2 sys_renameat2
317 common seccomp sys_seccomp
#
# x32-specific system call numbers start at 512 to avoid cache impact

View File

@ -1220,7 +1220,7 @@ EXPORT_SYMBOL(install_exec_creds);
/*
* determine how safe it is to execute the proposed program
* - the caller must hold ->cred_guard_mutex to protect against
* PTRACE_ATTACH
* PTRACE_ATTACH or seccomp thread-sync
*/
static int check_unsafe_exec(struct linux_binprm *bprm)
{
@ -1239,7 +1239,7 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
* This isn't strictly necessary, but it makes it harder for LSMs to
* mess up.
*/
if (current->no_new_privs)
if (task_no_new_privs(current))
bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
n_fs = 1;
@ -1286,7 +1286,7 @@ int prepare_binprm(struct linux_binprm *bprm)
bprm->cred->egid = current_egid();
if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
!current->no_new_privs &&
!task_no_new_privs(current) &&
kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
/* Set-uid? */

View File

@ -46,5 +46,20 @@
#define read_barrier_depends() do {} while (0)
#define smp_read_barrier_depends() do {} while (0)
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
ACCESS_ONCE(*p) = (v); \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
___p1; \
})
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_GENERIC_BARRIER_H */

View File

@ -0,0 +1,29 @@
/*
* include/asm-generic/seccomp.h
*
* Copyright (C) 2014 Linaro Limited
* Author: AKASHI Takahiro <takahiro.akashi <at> linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef _ASM_GENERIC_SECCOMP_H
#define _ASM_GENERIC_SECCOMP_H
#include <asm-generic/unistd.h>
#if defined(CONFIG_COMPAT) && !defined(__NR_seccomp_read_32)
#define __NR_seccomp_read_32 __NR_read
#define __NR_seccomp_write_32 __NR_write
#define __NR_seccomp_exit_32 __NR_exit
#define __NR_seccomp_sigreturn_32 __NR_rt_sigreturn
#endif /* CONFIG_COMPAT && ! already defined */
#define __NR_seccomp_read __NR_read
#define __NR_seccomp_write __NR_write
#define __NR_seccomp_exit __NR_exit
#define __NR_seccomp_sigreturn __NR_rt_sigreturn
#endif /* _ASM_GENERIC_SECCOMP_H */

View File

@ -144,8 +144,6 @@ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
/**
* syscall_get_arch - return the AUDIT_ARCH for the current system call
* @task: task of interest, must be in system call entry tracing
* @regs: task_pt_regs() of @task
*
* Returns the AUDIT_ARCH_* based on the system call convention in use.
*
@ -155,5 +153,5 @@ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
* Architectures which permit CONFIG_HAVE_ARCH_SECCOMP_FILTER must
* provide an implementation of this.
*/
int syscall_get_arch(struct task_struct *task, struct pt_regs *regs);
int syscall_get_arch(void);
#endif /* _ASM_SYSCALL_H */

View File

@ -298,6 +298,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
#endif
/* Is this type a native word size -- useful for atomic operations */
#ifndef __native_word
# define __native_word(t) (sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
#endif
/* Compile time object size, -1 for unknown */
#ifndef __compiletime_object_size
# define __compiletime_object_size(obj) -1
@ -337,6 +342,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
#define compiletime_assert(condition, msg) \
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
#define compiletime_assert_atomic_type(t) \
compiletime_assert(__native_word(t), \
"Need native word sized stores/loads for atomicity.")
/*
* Prevent the compiler from merging or refetching accesses. The compiler
* is also forbidden from reordering successive instances of ACCESS_ONCE(),

View File

@ -40,6 +40,7 @@ extern struct fs_struct init_fs;
#define INIT_SIGNALS(sig) { \
.nr_threads = 1, \
.thread_head = LIST_HEAD_INIT(init_task.thread_node), \
.wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
.shared_pending = { \
.list = LIST_HEAD_INIT(sig.shared_pending.list), \
@ -213,6 +214,7 @@ extern struct task_group root_task_group;
[PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \
}, \
.thread_group = LIST_HEAD_INIT(tsk.thread_group), \
.thread_node = LIST_HEAD_INIT(init_signals.thread_head), \
INIT_IDS \
INIT_PERF_EVENTS(tsk) \
INIT_TRACE_IRQFLAGS \

View File

@ -476,6 +476,7 @@ struct signal_struct {
atomic_t sigcnt;
atomic_t live;
int nr_threads;
struct list_head thread_head;
wait_queue_head_t wait_chldexit; /* for wait4() */
@ -1117,13 +1118,12 @@ struct task_struct {
* execve */
unsigned in_iowait:1;
/* task may not gain privileges */
unsigned no_new_privs:1;
/* Revert to default priority/policy when forking */
unsigned sched_reset_on_fork:1;
unsigned sched_contributes_to_load:1;
unsigned long atomic_flags; /* Flags needing atomic access. */
pid_t pid;
pid_t tgid;
@ -1156,6 +1156,7 @@ struct task_struct {
/* PID/PID hash table linkage. */
struct pid_link pids[PIDTYPE_MAX];
struct list_head thread_group;
struct list_head thread_node;
struct completion *vfork_done; /* for vfork() */
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
@ -1687,6 +1688,19 @@ static inline void memalloc_noio_restore(unsigned int flags)
current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
}
/* Per-process atomic flags. */
#define PFA_NO_NEW_PRIVS 0x00000001 /* May not gain new privileges. */
static inline bool task_no_new_privs(struct task_struct *p)
{
return test_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
}
static inline void task_set_no_new_privs(struct task_struct *p)
{
set_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
}
/*
* task->jobctl flags
*/
@ -2166,6 +2180,16 @@ extern bool current_is_single_threaded(void);
#define while_each_thread(g, t) \
while ((t = next_thread(t)) != g)
#define __for_each_thread(signal, t) \
list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
#define for_each_thread(p, t) \
__for_each_thread((p)->signal, t)
/* Careful: this is a double loop, 'break' won't work as expected. */
#define for_each_process_thread(p, t) \
for_each_process(p) for_each_thread(p, t)
static inline int get_nr_threads(struct task_struct *tsk)
{
return tsk->signal->nr_threads;

View File

@ -3,6 +3,8 @@
#include <uapi/linux/seccomp.h>
#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC)
#ifdef CONFIG_SECCOMP
#include <linux/thread_info.h>
@ -14,11 +16,11 @@ struct seccomp_filter;
*
* @mode: indicates one of the valid values above for controlled
* system calls available to a process.
* @filter: The metadata and ruleset for determining what system calls
* are allowed for a task.
* @filter: must always point to a valid seccomp-filter or NULL as it is
* accessed without locking during system call entry.
*
* @filter must only be accessed from the context of current as there
* is no locking.
* is no read locking.
*/
struct seccomp {
int mode;

View File

@ -841,4 +841,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
unsigned long idx1, unsigned long idx2);
asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
const char __user *uargs);
#endif

View File

@ -692,9 +692,19 @@ __SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
__SYSCALL(__NR_kcmp, sys_kcmp)
#define __NR_finit_module 273
__SYSCALL(__NR_finit_module, sys_finit_module)
/* Backporting seccomp, skip a few ...
* #define __NR_sched_setattr 274
__SYSCALL(__NR_sched_setattr, sys_sched_setattr)
* #define __NR_sched_getattr 275
__SYSCALL(__NR_sched_getattr, sys_sched_getattr)
* #define __NR_renameat2 276
__SYSCALL(__NR_renameat2, sys_renameat2)
*/
#define __NR_seccomp 277
__SYSCALL(__NR_seccomp, sys_seccomp)
#undef __NR_syscalls
#define __NR_syscalls 274
#define __NR_syscalls 278
/*
* All syscalls below here should go away really,

View File

@ -10,6 +10,13 @@
#define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */
#define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */
/* Valid operations for seccomp syscall. */
#define SECCOMP_SET_MODE_STRICT 0
#define SECCOMP_SET_MODE_FILTER 1
/* Valid flags for SECCOMP_SET_MODE_FILTER */
#define SECCOMP_FILTER_FLAG_TSYNC 1
/*
* All BPF programs must return a 32-bit value.
* The bottom 16-bits are for optional return data.

View File

@ -74,6 +74,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
__this_cpu_dec(process_counts);
}
list_del_rcu(&p->thread_group);
list_del_rcu(&p->thread_node);
}
/*

View File

@ -327,6 +327,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
goto free_ti;
tsk->stack = ti;
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
* the sighand lock in case orig has changed between now and
* then. Until then, filter must be NULL to avoid messing up
* the usage counts on the error path calling free_task.
*/
tsk->seccomp.filter = NULL;
#endif
setup_thread_stack(tsk, orig);
clear_user_return_notifier(tsk);
@ -1061,6 +1070,11 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
sig->nr_threads = 1;
atomic_set(&sig->live, 1);
atomic_set(&sig->sigcnt, 1);
/* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */
sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node);
tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head);
init_waitqueue_head(&sig->wait_chldexit);
sig->curr_target = tsk;
init_sigpending(&sig->shared_pending);
@ -1102,6 +1116,39 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
p->flags = new_flags;
}
static void copy_seccomp(struct task_struct *p)
{
#ifdef CONFIG_SECCOMP
/*
* Must be called with sighand->lock held, which is common to
* all threads in the group. Holding cred_guard_mutex is not
* needed because this new task is not yet running and cannot
* be racing exec.
*/
assert_spin_locked(&current->sighand->siglock);
/* Ref-count the new filter user, and assign it. */
get_seccomp_filter(current);
p->seccomp = current->seccomp;
/*
* Explicitly enable no_new_privs here in case it got set
* between the task_struct being duplicated and holding the
* sighand lock. The seccomp state and nnp must be in sync.
*/
if (task_no_new_privs(current))
task_set_no_new_privs(p);
/*
* If the parent gained a seccomp mode after copying thread
* flags and between before we held the sighand lock, we have
* to manually enable the seccomp thread flag here.
*/
if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
set_tsk_thread_flag(p, TIF_SECCOMP);
#endif
}
SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
{
current->clear_child_tid = tidptr;
@ -1205,7 +1252,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
goto fork_out;
ftrace_graph_init_task(p);
get_seccomp_filter(p);
rt_mutex_init_task(p);
@ -1447,6 +1493,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
spin_lock(&current->sighand->siglock);
/*
* Copy seccomp details explicitly here, in case they were changed
* before holding sighand lock.
*/
copy_seccomp(p);
/*
* Process group and session signals need to be delivered to just the
* parent before the fork or both the parent and the child after the
@ -1487,6 +1539,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
list_add_tail(&p->sibling, &p->real_parent->children);
list_add_tail_rcu(&p->tasks, &init_task.tasks);
__this_cpu_inc(process_counts);
} else {
list_add_tail_rcu(&p->thread_node,
&p->signal->thread_head);
}
attach_pid(p, PIDTYPE_PID, pid);
nr_threads++;

View File

@ -18,15 +18,17 @@
#include <linux/compat.h>
#include <linux/sched.h>
#include <linux/seccomp.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
/* #define SECCOMP_DEBUG 1 */
#ifdef CONFIG_SECCOMP_FILTER
#include <asm/syscall.h>
#include <linux/filter.h>
#include <linux/pid.h>
#include <linux/ptrace.h>
#include <linux/security.h>
#include <linux/slab.h>
#include <linux/tracehook.h>
#include <linux/uaccess.h>
@ -95,7 +97,7 @@ u32 seccomp_bpf_load(int off)
if (off == BPF_DATA(nr))
return syscall_get_nr(current, regs);
if (off == BPF_DATA(arch))
return syscall_get_arch(current, regs);
return syscall_get_arch();
if (off >= BPF_DATA(args[0]) && off < BPF_DATA(args[6])) {
unsigned long value;
int arg = (off - BPF_DATA(args[0])) / sizeof(u64);
@ -201,32 +203,170 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
*/
static u32 seccomp_run_filters(int syscall)
{
struct seccomp_filter *f;
struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
u32 ret = SECCOMP_RET_ALLOW;
/* Ensure unexpected behavior doesn't result in failing open. */
if (WARN_ON(current->seccomp.filter == NULL))
if (unlikely(WARN_ON(f == NULL)))
return SECCOMP_RET_KILL;
/* Make sure cross-thread synced filter points somewhere sane. */
smp_read_barrier_depends();
/*
* All filters in the list are evaluated and the lowest BPF return
* value always takes priority (ignoring the DATA).
*/
for (f = current->seccomp.filter; f; f = f->prev) {
for (; f; f = f->prev) {
u32 cur_ret = sk_run_filter(NULL, f->insns);
if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
ret = cur_ret;
}
return ret;
}
#endif /* CONFIG_SECCOMP_FILTER */
static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
{
assert_spin_locked(&current->sighand->siglock);
if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
return false;
return true;
}
static inline void seccomp_assign_mode(struct task_struct *task,
unsigned long seccomp_mode)
{
assert_spin_locked(&task->sighand->siglock);
task->seccomp.mode = seccomp_mode;
/*
* Make sure TIF_SECCOMP cannot be set before the mode (and
* filter) is set.
*/
smp_mb();
set_tsk_thread_flag(task, TIF_SECCOMP);
}
#ifdef CONFIG_SECCOMP_FILTER
/* Returns 1 if the parent is an ancestor of the child. */
static int is_ancestor(struct seccomp_filter *parent,
struct seccomp_filter *child)
{
/* NULL is the root ancestor. */
if (parent == NULL)
return 1;
for (; child; child = child->prev)
if (child == parent)
return 1;
return 0;
}
/**
* seccomp_attach_filter: Attaches a seccomp filter to current.
* seccomp_can_sync_threads: checks if all threads can be synchronized
*
* Expects sighand and cred_guard_mutex locks to be held.
*
* Returns 0 on success, -ve on error, or the pid of a thread which was
* either not in the correct seccomp mode or it did not have an ancestral
* seccomp filter.
*/
static inline pid_t seccomp_can_sync_threads(void)
{
struct task_struct *thread, *caller;
BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
assert_spin_locked(&current->sighand->siglock);
/* Validate all threads being eligible for synchronization. */
caller = current;
for_each_thread(caller, thread) {
pid_t failed;
/* Skip current, since it is initiating the sync. */
if (thread == caller)
continue;
if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
(thread->seccomp.mode == SECCOMP_MODE_FILTER &&
is_ancestor(thread->seccomp.filter,
caller->seccomp.filter)))
continue;
/* Return the first thread that cannot be synchronized. */
failed = task_pid_vnr(thread);
/* If the pid cannot be resolved, then return -ESRCH */
if (unlikely(WARN_ON(failed == 0)))
failed = -ESRCH;
return failed;
}
return 0;
}
/**
* seccomp_sync_threads: sets all threads to use current's filter
*
* Expects sighand and cred_guard_mutex locks to be held, and for
* seccomp_can_sync_threads() to have returned success already
* without dropping the locks.
*
*/
static inline void seccomp_sync_threads(void)
{
struct task_struct *thread, *caller;
BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
assert_spin_locked(&current->sighand->siglock);
/* Synchronize all threads. */
caller = current;
for_each_thread(caller, thread) {
/* Skip current, since it needs no changes. */
if (thread == caller)
continue;
/* Get a task reference for the new leaf node. */
get_seccomp_filter(caller);
/*
* Drop the task reference to the shared ancestor since
* current's path will hold a reference. (This also
* allows a put before the assignment.)
*/
put_seccomp_filter(thread);
smp_store_release(&thread->seccomp.filter,
caller->seccomp.filter);
/*
* Opt the other thread into seccomp if needed.
* As threads are considered to be trust-realm
* equivalent (see ptrace_may_access), it is safe to
* allow one thread to transition the other.
*/
if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
/*
* Don't let an unprivileged task work around
* the no_new_privs restriction by creating
* a thread that sets it up, enters seccomp,
* then dies.
*/
if (task_no_new_privs(caller))
task_set_no_new_privs(thread);
seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
}
}
}
/**
* seccomp_prepare_filter: Prepares a seccomp filter for use.
* @fprog: BPF program to install
*
* Returns 0 on success or an errno on failure.
* Returns filter on success or an ERR_PTR on failure.
*/
static long seccomp_attach_filter(struct sock_fprog *fprog)
static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
{
struct seccomp_filter *filter;
unsigned long fp_size = fprog->len * sizeof(struct sock_filter);
@ -234,12 +374,13 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
long ret;
if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
return -EINVAL;
return ERR_PTR(-EINVAL);
BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
for (filter = current->seccomp.filter; filter; filter = filter->prev)
total_insns += filter->len + 4; /* include a 4 instr penalty */
if (total_insns > MAX_INSNS_PER_PATH)
return -ENOMEM;
return ERR_PTR(-ENOMEM);
/*
* Installing a seccomp filter requires that the task have
@ -247,16 +388,16 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
* This avoids scenarios where unprivileged tasks can affect the
* behavior of privileged children.
*/
if (!current->no_new_privs &&
if (!task_no_new_privs(current) &&
security_capable_noaudit(current_cred(), current_user_ns(),
CAP_SYS_ADMIN) != 0)
return -EACCES;
return ERR_PTR(-EACCES);
/* Allocate a new seccomp_filter */
filter = kzalloc(sizeof(struct seccomp_filter) + fp_size,
GFP_KERNEL|__GFP_NOWARN);
if (!filter)
return -ENOMEM;
return ERR_PTR(-ENOMEM);;
atomic_set(&filter->usage, 1);
filter->len = fprog->len;
@ -275,28 +416,24 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
if (ret)
goto fail;
/*
* If there is an existing filter, make it the prev and don't drop its
* task reference.
*/
filter->prev = current->seccomp.filter;
current->seccomp.filter = filter;
return 0;
return filter;
fail:
kfree(filter);
return ret;
return ERR_PTR(ret);
}
/**
* seccomp_attach_user_filter - attaches a user-supplied sock_fprog
* seccomp_prepare_user_filter - prepares a user-supplied sock_fprog
* @user_filter: pointer to the user data containing a sock_fprog.
*
* Returns 0 on success and non-zero otherwise.
*/
long seccomp_attach_user_filter(char __user *user_filter)
static struct seccomp_filter *
seccomp_prepare_user_filter(const char __user *user_filter)
{
struct sock_fprog fprog;
long ret = -EFAULT;
struct seccomp_filter *filter = ERR_PTR(-EFAULT);
#ifdef CONFIG_COMPAT
if (is_compat_task()) {
@ -309,9 +446,56 @@ long seccomp_attach_user_filter(char __user *user_filter)
#endif
if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
goto out;
ret = seccomp_attach_filter(&fprog);
filter = seccomp_prepare_filter(&fprog);
out:
return ret;
return filter;
}
/**
* seccomp_attach_filter: validate and attach filter
* @flags: flags to change filter behavior
* @filter: seccomp filter to add to the current process
*
* Caller must be holding current->sighand->siglock lock.
*
* Returns 0 on success, -ve on error.
*/
static long seccomp_attach_filter(unsigned int flags,
struct seccomp_filter *filter)
{
unsigned long total_insns;
struct seccomp_filter *walker;
assert_spin_locked(&current->sighand->siglock);
/* Validate resulting filter length. */
total_insns = filter->len;
for (walker = current->seccomp.filter; walker; walker = walker->prev)
total_insns += walker->len + 4; /* 4 instr penalty */
if (total_insns > MAX_INSNS_PER_PATH)
return -ENOMEM;
/* If thread sync has been requested, check that it is possible. */
if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
int ret;
ret = seccomp_can_sync_threads();
if (ret)
return ret;
}
/*
* If there is an existing filter, make it the prev and don't drop its
* task reference.
*/
filter->prev = current->seccomp.filter;
current->seccomp.filter = filter;
/* Now that the new filter is in place, synchronize to all threads. */
if (flags & SECCOMP_FILTER_FLAG_TSYNC)
seccomp_sync_threads();
return 0;
}
/* get_seccomp_filter - increments the reference count of the filter on @tsk */
@ -324,6 +508,13 @@ void get_seccomp_filter(struct task_struct *tsk)
atomic_inc(&orig->usage);
}
static inline void seccomp_filter_free(struct seccomp_filter *filter)
{
if (filter) {
kfree(filter);
}
}
/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
void put_seccomp_filter(struct task_struct *tsk)
{
@ -332,7 +523,7 @@ void put_seccomp_filter(struct task_struct *tsk)
while (orig && atomic_dec_and_test(&orig->usage)) {
struct seccomp_filter *freeme = orig;
orig = orig->prev;
kfree(freeme);
seccomp_filter_free(freeme);
}
}
@ -351,7 +542,7 @@ static void seccomp_send_sigsys(int syscall, int reason)
info.si_code = SYS_SECCOMP;
info.si_call_addr = (void __user *)KSTK_EIP(current);
info.si_errno = reason;
info.si_arch = syscall_get_arch(current, task_pt_regs(current));
info.si_arch = syscall_get_arch();
info.si_syscall = syscall;
force_sig_info(SIGSYS, &info, current);
}
@ -376,12 +567,17 @@ static int mode1_syscalls_32[] = {
int __secure_computing(int this_syscall)
{
int mode = current->seccomp.mode;
int exit_sig = 0;
int *syscall;
u32 ret;
switch (mode) {
/*
* Make sure that any changes to mode from another thread have
* been seen after TIF_SECCOMP was seen.
*/
rmb();
switch (current->seccomp.mode) {
case SECCOMP_MODE_STRICT:
syscall = mode1_syscalls;
#ifdef CONFIG_COMPAT
@ -467,47 +663,152 @@ long prctl_get_seccomp(void)
}
/**
* prctl_set_seccomp: configures current->seccomp.mode
* @seccomp_mode: requested mode to use
* @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
*
* This function may be called repeatedly with a @seccomp_mode of
* SECCOMP_MODE_FILTER to install additional filters. Every filter
* successfully installed will be evaluated (in reverse order) for each system
* call the task makes.
* seccomp_set_mode_strict: internal function for setting strict seccomp
*
* Once current->seccomp.mode is non-zero, it may not be changed.
*
* Returns 0 on success or -EINVAL on failure.
*/
long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
static long seccomp_set_mode_strict(void)
{
const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
long ret = -EINVAL;
if (current->seccomp.mode &&
current->seccomp.mode != seccomp_mode)
spin_lock_irq(&current->sighand->siglock);
if (!seccomp_may_assign_mode(seccomp_mode))
goto out;
#ifdef TIF_NOTSC
disable_TSC();
#endif
seccomp_assign_mode(current, seccomp_mode);
ret = 0;
out:
spin_unlock_irq(&current->sighand->siglock);
return ret;
}
#ifdef CONFIG_SECCOMP_FILTER
/**
* seccomp_set_mode_filter: internal function for setting seccomp filter
* @flags: flags to change filter behavior
* @filter: struct sock_fprog containing filter
*
* This function may be called repeatedly to install additional filters.
* Every filter successfully installed will be evaluated (in reverse order)
* for each system call the task makes.
*
* Once current->seccomp.mode is non-zero, it may not be changed.
*
* Returns 0 on success or -EINVAL on failure.
*/
static long seccomp_set_mode_filter(unsigned int flags,
const char __user *filter)
{
const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
struct seccomp_filter *prepared = NULL;
long ret = -EINVAL;
/* Validate flags. */
if (flags & ~SECCOMP_FILTER_FLAG_MASK)
return -EINVAL;
/* Prepare the new filter before holding any locks. */
prepared = seccomp_prepare_user_filter(filter);
if (IS_ERR(prepared))
return PTR_ERR(prepared);
/*
* Make sure we cannot change seccomp or nnp state via TSYNC
* while another thread is in the middle of calling exec.
*/
if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
mutex_lock_killable(&current->signal->cred_guard_mutex))
goto out_free;
spin_lock_irq(&current->sighand->siglock);
if (!seccomp_may_assign_mode(seccomp_mode))
goto out;
ret = seccomp_attach_filter(flags, prepared);
if (ret)
goto out;
/* Do not free the successfully attached filter. */
prepared = NULL;
seccomp_assign_mode(current, seccomp_mode);
out:
spin_unlock_irq(&current->sighand->siglock);
if (flags & SECCOMP_FILTER_FLAG_TSYNC)
mutex_unlock(&current->signal->cred_guard_mutex);
out_free:
seccomp_filter_free(prepared);
return ret;
}
#else
static inline long seccomp_set_mode_filter(unsigned int flags,
const char __user *filter)
{
return -EINVAL;
}
#endif
/* Common entry point for both prctl and syscall. */
static long do_seccomp(unsigned int op, unsigned int flags,
const char __user *uargs)
{
switch (op) {
case SECCOMP_SET_MODE_STRICT:
if (flags != 0 || uargs != NULL)
return -EINVAL;
return seccomp_set_mode_strict();
case SECCOMP_SET_MODE_FILTER:
return seccomp_set_mode_filter(flags, uargs);
default:
return -EINVAL;
}
}
SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
const char __user *, uargs)
{
return do_seccomp(op, flags, uargs);
}
/**
* prctl_set_seccomp: configures current->seccomp.mode
* @seccomp_mode: requested mode to use
* @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
*
* Returns 0 on success or -EINVAL on failure.
*/
long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
{
unsigned int op;
char __user *uargs;
switch (seccomp_mode) {
case SECCOMP_MODE_STRICT:
ret = 0;
#ifdef TIF_NOTSC
disable_TSC();
#endif
op = SECCOMP_SET_MODE_STRICT;
/*
* Setting strict mode through prctl always ignored filter,
* so make sure it is always NULL here to pass the internal
* check in do_seccomp().
*/
uargs = NULL;
break;
#ifdef CONFIG_SECCOMP_FILTER
case SECCOMP_MODE_FILTER:
ret = seccomp_attach_user_filter(filter);
if (ret)
goto out;
op = SECCOMP_SET_MODE_FILTER;
uargs = filter;
break;
#endif
default:
goto out;
return -EINVAL;
}
current->seccomp.mode = seccomp_mode;
set_thread_flag(TIF_SECCOMP);
out:
return ret;
/* prctl interface doesn't have flags, so they are always zero. */
return do_seccomp(op, 0, uargs);
}

View File

@ -2427,12 +2427,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
if (arg2 != 1 || arg3 || arg4 || arg5)
return -EINVAL;
current->no_new_privs = 1;
task_set_no_new_privs(current);
break;
case PR_GET_NO_NEW_PRIVS:
if (arg2 || arg3 || arg4 || arg5)
return -EINVAL;
return current->no_new_privs ? 1 : 0;
return task_no_new_privs(current) ? 1 : 0;
case PR_SET_VMA:
error = prctl_set_vma(arg2, arg3, arg4, arg5);
break;

View File

@ -209,3 +209,6 @@ cond_syscall(compat_sys_open_by_handle_at);
/* compare kernel pointers */
cond_syscall(sys_kcmp);
/* operate on Secure Computing state */
cond_syscall(sys_seccomp);

View File

@ -629,7 +629,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
* There is no exception for unconfined as change_hat is not
* available.
*/
if (current->no_new_privs)
if (task_no_new_privs(current))
return -EPERM;
/* released below */
@ -780,7 +780,7 @@ int aa_change_profile(const char *ns_name, const char *hname, bool onexec,
* no_new_privs is set because this aways results in a reduction
* of permissions.
*/
if (current->no_new_privs && !unconfined(profile)) {
if (task_no_new_privs(current) && !unconfined(profile)) {
put_cred(cred);
return -EPERM;
}