From 89563feb836a95316305bab9fe75f85c64170175 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Tue, 9 Sep 2014 17:36:05 -0700 Subject: [PATCH 01/69] arm64: process: dump memory around registers when displaying regs A port of 8608d7c4418c75841c562a90cddd9beae5798a48 to ARM64. Both the original code and this port are limited to dumping kernel addresses, so don't bother if the registers are from a userspace process. Change-Id: Idc76804c54efaaeb70311cbb500c54db6dac4525 Signed-off-by: Greg Hackmann --- arch/arm64/kernel/process.c | 66 +++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index e2eb9453d3a1..7c11c74f7f54 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -141,6 +141,70 @@ void machine_restart(char *cmd) while (1); } +/* + * dump a block of kernel memory from around the given address + */ +static void show_data(unsigned long addr, int nbytes, const char *name) +{ + int i, j; + int nlines; + u32 *p; + + /* + * don't attempt to dump non-kernel addresses or + * values that are probably just small negative numbers + */ + if (addr < PAGE_OFFSET || addr > -256UL) + return; + + printk("\n%s: %#lx:\n", name, addr); + + /* + * round address down to a 32 bit boundary + * and always dump a multiple of 32 bytes + */ + p = (u32 *)(addr & ~(sizeof(u32) - 1)); + nbytes += (addr & (sizeof(u32) - 1)); + nlines = (nbytes + 31) / 32; + + + for (i = 0; i < nlines; i++) { + /* + * just display low 16 bits of address to keep + * each line of the dump < 80 characters + */ + printk("%04lx ", (unsigned long)p & 0xffff); + for (j = 0; j < 8; j++) { + u32 data; + if (probe_kernel_address(p, data)) { + printk(" ********"); + } else { + printk(" %08x", data); + } + ++p; + } + printk("\n"); + } +} + +static void show_extra_register_data(struct pt_regs *regs, int nbytes) +{ + mm_segment_t fs; + unsigned int i; + + fs = get_fs(); + set_fs(KERNEL_DS); + show_data(regs->pc - nbytes, nbytes * 2, "PC"); + show_data(regs->regs[30] - nbytes, nbytes * 2, "LR"); + show_data(regs->sp - nbytes, nbytes * 2, "SP"); + for (i = 0; i < 30; i++) { + char name[4]; + snprintf(name, sizeof(name), "X%u", i); + show_data(regs->regs[i] - nbytes, nbytes * 2, name); + } + set_fs(fs); +} + void __show_regs(struct pt_regs *regs) { int i; @@ -156,6 +220,8 @@ void __show_regs(struct pt_regs *regs) if (i % 2 == 0) printk("\n"); } + if (!user_mode(regs)) + show_extra_register_data(regs, 128); printk("\n"); } From c23e1b19b17aff981414a77eaca235e1592b0852 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Tue, 9 Sep 2014 12:14:40 -0700 Subject: [PATCH 02/69] arm64: add HWCAP_EVTSTRM and associated hwcap refactoring Take the hwcaps changes from 46efe547aca8498d51b64460c02366ae4032ca32 to facilitate cherry-picking later hwcaps changes, while skipping the timer changes that actually enable event streams for now. The timer changes depend on some non-trivial changes made after 3.10, and can safely be dropped: the kernel will just continue reporting that HWCAP_EVTSTRM is not available. Bug: 17431179 Change-Id: I41548846f8cd7ae8147a2b115cc0f84708e29552 Signed-off-by: Greg Hackmann --- arch/arm64/include/asm/hwcap.h | 11 ++++++----- arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/kernel/setup.c | 11 +++++++++++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 6d4482fa35bc..3a48433dfb57 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -30,6 +30,7 @@ #define COMPAT_HWCAP_IDIVA (1 << 17) #define COMPAT_HWCAP_IDIVT (1 << 18) #define COMPAT_HWCAP_IDIV (COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT) +#define COMPAT_HWCAP_EVTSTRM (1 << 21) #ifndef __ASSEMBLY__ /* @@ -37,11 +38,11 @@ * instruction set this cpu supports. */ #define ELF_HWCAP (elf_hwcap) -#define COMPAT_ELF_HWCAP (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ - COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ - COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ - COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ - COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) + +#ifdef CONFIG_COMPAT +#define COMPAT_ELF_HWCAP (compat_elf_hwcap) +extern unsigned int compat_elf_hwcap; +#endif extern unsigned int elf_hwcap; #endif diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index eea497578b87..9b12476e9c85 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -21,6 +21,7 @@ */ #define HWCAP_FP (1 << 0) #define HWCAP_ASIMD (1 << 1) +#define HWCAP_EVTSTRM (1 << 2) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index add6ea616843..57cb0063d74c 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -60,6 +60,16 @@ EXPORT_SYMBOL(processor_id); unsigned int elf_hwcap __read_mostly; EXPORT_SYMBOL_GPL(elf_hwcap); +#ifdef CONFIG_COMPAT +#define COMPAT_ELF_HWCAP_DEFAULT \ + (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ + COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ + COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ + COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ + COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) +unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; +#endif + static const char *cpu_name; static const char *machine_name; phys_addr_t __fdt_pointer __initdata; @@ -309,6 +319,7 @@ subsys_initcall(topology_init); static const char *hwcap_str[] = { "fp", "asimd", + "evtstrm", NULL }; From d648cd8603215f0c5518780869016cde67084ba5 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 18 Sep 2013 16:14:28 +0100 Subject: [PATCH 03/69] arm64: Widen hwcap to be 64 bit Under arm64 elf_hwcap is a 32 bit quantity, but it is stored in a 64 bit auxiliary ELF field and glibc reads hwcap as 64 bit. This patch widens elf_hwcap to be 64 bit. Signed-off-by: Steve Capper Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/hwcap.h | 2 +- arch/arm64/kernel/setup.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 3a48433dfb57..6cddbb0c9f54 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -44,6 +44,6 @@ extern unsigned int compat_elf_hwcap; #endif -extern unsigned int elf_hwcap; +extern unsigned long elf_hwcap; #endif #endif diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 57cb0063d74c..d6c340eb45cc 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -57,7 +57,7 @@ unsigned int processor_id; EXPORT_SYMBOL(processor_id); -unsigned int elf_hwcap __read_mostly; +unsigned long elf_hwcap __read_mostly; EXPORT_SYMBOL_GPL(elf_hwcap); #ifdef CONFIG_COMPAT From 500cee98c8e2b2abdedbbe81bc1f9871c5ec8a97 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Mon, 16 Dec 2013 21:04:36 +0000 Subject: [PATCH 04/69] arm64: Add hwcaps for crypto and CRC32 extensions. Advertise the optional cryptographic and CRC32 instructions to user space where present. Several hwcap bits [3-7] are allocated. Signed-off-by: Steve Capper [bit 2 is taken now so use bits 3-7 instead] Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/uapi/asm/hwcap.h | 6 ++++- arch/arm64/kernel/setup.c | 37 +++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 9b12476e9c85..73cf0f54d57c 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -22,6 +22,10 @@ #define HWCAP_FP (1 << 0) #define HWCAP_ASIMD (1 << 1) #define HWCAP_EVTSTRM (1 << 2) - +#define HWCAP_AES (1 << 3) +#define HWCAP_PMULL (1 << 4) +#define HWCAP_SHA1 (1 << 5) +#define HWCAP_SHA2 (1 << 6) +#define HWCAP_CRC32 (1 << 7) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index d6c340eb45cc..a480eae24ec9 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -110,6 +110,7 @@ void __init early_print(const char *str, ...) static void __init setup_processor(void) { struct cpu_info *cpu_info; + u64 features, block; /* * locate processor in the list of supported processor @@ -130,6 +131,37 @@ static void __init setup_processor(void) sprintf(init_utsname()->machine, "aarch64"); elf_hwcap = 0; + + /* + * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks. + * The blocks we test below represent incremental functionality + * for non-negative values. Negative values are reserved. + */ + features = read_cpuid(ID_AA64ISAR0_EL1); + block = (features >> 4) & 0xf; + if (!(block & 0x8)) { + switch (block) { + default: + case 2: + elf_hwcap |= HWCAP_PMULL; + case 1: + elf_hwcap |= HWCAP_AES; + case 0: + break; + } + } + + block = (features >> 8) & 0xf; + if (block && !(block & 0x8)) + elf_hwcap |= HWCAP_SHA1; + + block = (features >> 12) & 0xf; + if (block && !(block & 0x8)) + elf_hwcap |= HWCAP_SHA2; + + block = (features >> 16) & 0xf; + if (block && !(block & 0x8)) + elf_hwcap |= HWCAP_CRC32; } static void __init setup_machine_fdt(phys_addr_t dt_phys) @@ -320,6 +352,11 @@ static const char *hwcap_str[] = { "fp", "asimd", "evtstrm", + "aes", + "pmull", + "sha1", + "sha2", + "crc32", NULL }; From 1ffb7f631abe362e99dd64e566cc62255f4423c3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 3 Mar 2014 07:34:44 +0000 Subject: [PATCH 05/69] binfmt_elf: add ELF_HWCAP2 to compat auxv entries Add ELF_HWCAP2 to the set of auxv entries that is passed to a 32-bit ELF program running in 32-bit compat mode under a 64-bit kernel. Signed-off-by: Ard Biesheuvel Acked-by: Andrew Morton Signed-off-by: Catalin Marinas --- fs/compat_binfmt_elf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index a81147e2e4ef..4d24d17bcfc1 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c @@ -88,6 +88,11 @@ static void cputime_to_compat_timeval(const cputime_t cputime, #define ELF_HWCAP COMPAT_ELF_HWCAP #endif +#ifdef COMPAT_ELF_HWCAP2 +#undef ELF_HWCAP2 +#define ELF_HWCAP2 COMPAT_ELF_HWCAP2 +#endif + #ifdef COMPAT_ARCH_DLINFO #undef ARCH_DLINFO #define ARCH_DLINFO COMPAT_ARCH_DLINFO From a6483864467b280cb3d9e916823d3bd9a78f3ded Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 3 Mar 2014 07:34:45 +0000 Subject: [PATCH 06/69] arm64: add AT_HWCAP2 support for 32-bit compat Add support for the ELF auxv entry AT_HWCAP2 when running 32-bit ELF binaries in compat mode. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/hwcap.h | 3 ++- arch/arm64/kernel/setup.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 6cddbb0c9f54..9a4cbd60c88e 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -41,7 +41,8 @@ #ifdef CONFIG_COMPAT #define COMPAT_ELF_HWCAP (compat_elf_hwcap) -extern unsigned int compat_elf_hwcap; +#define COMPAT_ELF_HWCAP2 (compat_elf_hwcap2) +extern unsigned int compat_elf_hwcap, compat_elf_hwcap2; #endif extern unsigned long elf_hwcap; diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index a480eae24ec9..f97c5d26e446 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -68,6 +68,7 @@ EXPORT_SYMBOL_GPL(elf_hwcap); COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; +unsigned int compat_elf_hwcap2 __read_mostly; #endif static const char *cpu_name; From 2c15311ffaad6ff6aa7f0a735694d4d2c7e4b3c5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 3 Mar 2014 07:34:46 +0000 Subject: [PATCH 07/69] arm64: advertise ARMv8 extensions to 32-bit compat ELF binaries This adds support for advertising the presence of ARMv8 Crypto Extensions in the Aarch32 execution state to 32-bit ELF binaries running in 32-bit compat mode under the arm64 kernel. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/hwcap.h | 6 ++++++ arch/arm64/kernel/setup.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 9a4cbd60c88e..024c46183c3c 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -32,6 +32,12 @@ #define COMPAT_HWCAP_IDIV (COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT) #define COMPAT_HWCAP_EVTSTRM (1 << 21) +#define COMPAT_HWCAP2_AES (1 << 0) +#define COMPAT_HWCAP2_PMULL (1 << 1) +#define COMPAT_HWCAP2_SHA1 (1 << 2) +#define COMPAT_HWCAP2_SHA2 (1 << 3) +#define COMPAT_HWCAP2_CRC32 (1 << 4) + #ifndef __ASSEMBLY__ /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index f97c5d26e446..3a06f1aa0902 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -163,6 +163,38 @@ static void __init setup_processor(void) block = (features >> 16) & 0xf; if (block && !(block & 0x8)) elf_hwcap |= HWCAP_CRC32; + +#ifdef CONFIG_COMPAT + /* + * ID_ISAR5_EL1 carries similar information as above, but pertaining to + * the Aarch32 32-bit execution state. + */ + features = read_cpuid(ID_ISAR5_EL1); + block = (features >> 4) & 0xf; + if (!(block & 0x8)) { + switch (block) { + default: + case 2: + compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL; + case 1: + compat_elf_hwcap2 |= COMPAT_HWCAP2_AES; + case 0: + break; + } + } + + block = (features >> 8) & 0xf; + if (block && !(block & 0x8)) + compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1; + + block = (features >> 12) & 0xf; + if (block && !(block & 0x8)) + compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2; + + block = (features >> 16) & 0xf; + if (block && !(block & 0x8)) + compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32; +#endif } static void __init setup_machine_fdt(phys_addr_t dt_phys) From d7592de8d09e5834d5ef54c842ffae6fccf92a25 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 16 Mar 2013 08:48:13 +0000 Subject: [PATCH 08/69] arm64: debug: consolidate software breakpoint handlers The software breakpoint handlers are hooked in directly from ptrace, which makes it difficult to add additional handlers for things like kprobes and kgdb. This patch moves the handling code into debug-monitors.c, where we can dispatch to different debug subsystems more easily. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/debug-monitors.h | 9 ++++ arch/arm64/include/asm/ptrace.h | 2 - arch/arm64/kernel/debug-monitors.c | 66 ++++++++++++++++++++++++- arch/arm64/kernel/ptrace.c | 59 ---------------------- arch/arm64/kernel/traps.c | 5 +- 5 files changed, 75 insertions(+), 66 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 7eaa0b302493..ef8235c68c09 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -83,6 +83,15 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs) } #endif +#ifdef CONFIG_COMPAT +int aarch32_break_handler(struct pt_regs *regs); +#else +static int aarch32_break_handler(struct pt_regs *regs) +{ + return -EFAULT; +} +#endif + #endif /* __ASSEMBLY */ #endif /* __KERNEL__ */ #endif /* __ASM_DEBUG_MONITORS_H */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 7304fa2fd9fa..41e59e2459ff 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -171,7 +171,5 @@ extern unsigned long profile_pc(struct pt_regs *regs); #define profile_pc(regs) instruction_pointer(regs) #endif -extern int aarch32_break_trap(struct pt_regs *regs); - #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index f4726dc054b3..08018e3df580 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -226,13 +227,74 @@ static int single_step_handler(unsigned long addr, unsigned int esr, return 0; } -static int __init single_step_init(void) +static int brk_handler(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + siginfo_t info; + + if (!user_mode(regs)) + return -EFAULT; + + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = (void __user *)instruction_pointer(regs), + }; + + force_sig_info(SIGTRAP, &info, current); + return 0; +} + +int aarch32_break_handler(struct pt_regs *regs) +{ + siginfo_t info; + unsigned int instr; + bool bp = false; + void __user *pc = (void __user *)instruction_pointer(regs); + + if (!compat_user_mode(regs)) + return -EFAULT; + + if (compat_thumb_mode(regs)) { + /* get 16-bit Thumb instruction */ + get_user(instr, (u16 __user *)pc); + if (instr == AARCH32_BREAK_THUMB2_LO) { + /* get second half of 32-bit Thumb-2 instruction */ + get_user(instr, (u16 __user *)(pc + 2)); + bp = instr == AARCH32_BREAK_THUMB2_HI; + } else { + bp = instr == AARCH32_BREAK_THUMB; + } + } else { + /* 32-bit ARM instruction */ + get_user(instr, (u32 __user *)pc); + bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM; + } + + if (!bp) + return -EFAULT; + + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = pc, + }; + + force_sig_info(SIGTRAP, &info, current); + return 0; +} + +static int __init debug_traps_init(void) { hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, TRAP_HWBKPT, "single-step handler"); + hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, + TRAP_BRKPT, "ptrace BRK handler"); return 0; } -arch_initcall(single_step_init); +arch_initcall(debug_traps_init); /* Re-enable single step for syscall restarting. */ void user_rewind_single_step(struct task_struct *task) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index aebfc1519e8e..7190a6544cab 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -53,28 +53,6 @@ void ptrace_disable(struct task_struct *child) { } -/* - * Handle hitting a breakpoint. - */ -static int ptrace_break(struct pt_regs *regs) -{ - siginfo_t info = { - .si_signo = SIGTRAP, - .si_errno = 0, - .si_code = TRAP_BRKPT, - .si_addr = (void __user *)instruction_pointer(regs), - }; - - force_sig_info(SIGTRAP, &info, current); - return 0; -} - -static int arm64_break_trap(unsigned long addr, unsigned int esr, - struct pt_regs *regs) -{ - return ptrace_break(regs); -} - #ifdef CONFIG_HAVE_HW_BREAKPOINT /* * Handle hitting a HW-breakpoint. @@ -819,33 +797,6 @@ static const struct user_regset_view user_aarch32_view = { .regsets = aarch32_regsets, .n = ARRAY_SIZE(aarch32_regsets) }; -int aarch32_break_trap(struct pt_regs *regs) -{ - unsigned int instr; - bool bp = false; - void __user *pc = (void __user *)instruction_pointer(regs); - - if (compat_thumb_mode(regs)) { - /* get 16-bit Thumb instruction */ - get_user(instr, (u16 __user *)pc); - if (instr == AARCH32_BREAK_THUMB2_LO) { - /* get second half of 32-bit Thumb-2 instruction */ - get_user(instr, (u16 __user *)(pc + 2)); - bp = instr == AARCH32_BREAK_THUMB2_HI; - } else { - bp = instr == AARCH32_BREAK_THUMB; - } - } else { - /* 32-bit ARM instruction */ - get_user(instr, (u32 __user *)pc); - bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM; - } - - if (bp) - return ptrace_break(regs); - return 1; -} - static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off, compat_ulong_t __user *ret) { @@ -1113,16 +1064,6 @@ long arch_ptrace(struct task_struct *child, long request, return ptrace_request(child, request, addr, data); } - -static int __init ptrace_break_init(void) -{ - hook_debug_fault_code(DBG_ESR_EVT_BRK, arm64_break_trap, SIGTRAP, - TRAP_BRKPT, "ptrace BRK handler"); - return 0; -} -core_initcall(ptrace_break_init); - - asmlinkage int syscall_trace(int dir, struct pt_regs *regs) { unsigned long saved_reg; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index f30852d28590..7ffadddb645d 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -32,6 +32,7 @@ #include #include +#include #include #include #include @@ -261,11 +262,9 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) siginfo_t info; void __user *pc = (void __user *)instruction_pointer(regs); -#ifdef CONFIG_COMPAT /* check for AArch32 breakpoint instructions */ - if (compat_user_mode(regs) && aarch32_break_trap(regs) == 0) + if (!aarch32_break_handler(regs)) return; -#endif if (show_unhandled_signals && unhandled_signal(current, SIGILL) && printk_ratelimit()) { From 51ee4b18842045289a9557c9d5f817f1c16a8bd8 Mon Sep 17 00:00:00 2001 From: Alex Van Brunt Date: Thu, 30 Jan 2014 15:07:34 -0800 Subject: [PATCH 09/69] arm64: copy conditional instruction tests from arm Copy the code that is used to compute if a conditional instruction would be executed. This code is needed to support A32 instruction emulation in the kernel. Change-Id: I0bab7537efd8cc317bd20995cd36961cf95165aa Signed-off-by: Alex Van Brunt Reviewed-on: http://git-master/r/362154 Reviewed-on: http://git-master/r/365061 Reviewed-by: Richard Wiley Tested-by: Oskari Jaaskelainen --- arch/arm64/include/asm/opcodes.h | 231 +++++++++++++++++++++++++++++++ arch/arm64/kernel/Makefile | 4 +- arch/arm64/kernel/opcodes.c | 72 ++++++++++ 3 files changed, 305 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/include/asm/opcodes.h create mode 100644 arch/arm64/kernel/opcodes.c diff --git a/arch/arm64/include/asm/opcodes.h b/arch/arm64/include/asm/opcodes.h new file mode 100644 index 000000000000..fd189a522aee --- /dev/null +++ b/arch/arm64/include/asm/opcodes.h @@ -0,0 +1,231 @@ +/* + * Copied from arch/arm/include/asm/opcodes.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_ARM_OPCODES_H +#define __ASM_ARM_OPCODES_H + +#ifndef __ASSEMBLY__ +#include +extern asmlinkage unsigned int arm_check_condition(u32 opcode, u64 psr); +#endif + +#define ARM_OPCODE_CONDTEST_FAIL 0 +#define ARM_OPCODE_CONDTEST_PASS 1 +#define ARM_OPCODE_CONDTEST_UNCOND 2 + + +/* + * Assembler opcode byteswap helpers. + * These are only intended for use by this header: don't use them directly, + * because they will be suboptimal in most cases. + */ +#define ___asm_opcode_swab32(x) ( \ + (((x) << 24) & 0xFF000000) \ + | (((x) << 8) & 0x00FF0000) \ + | (((x) >> 8) & 0x0000FF00) \ + | (((x) >> 24) & 0x000000FF) \ +) +#define ___asm_opcode_swab16(x) ( \ + (((x) << 8) & 0xFF00) \ + | (((x) >> 8) & 0x00FF) \ +) +#define ___asm_opcode_swahb32(x) ( \ + (((x) << 8) & 0xFF00FF00) \ + | (((x) >> 8) & 0x00FF00FF) \ +) +#define ___asm_opcode_swahw32(x) ( \ + (((x) << 16) & 0xFFFF0000) \ + | (((x) >> 16) & 0x0000FFFF) \ +) +#define ___asm_opcode_identity32(x) ((x) & 0xFFFFFFFF) +#define ___asm_opcode_identity16(x) ((x) & 0xFFFF) + + +/* + * Opcode byteswap helpers + * + * These macros help with converting instructions between a canonical integer + * format and in-memory representation, in an endianness-agnostic manner. + * + * __mem_to_opcode_*() convert from in-memory representation to canonical form. + * __opcode_to_mem_*() convert from canonical form to in-memory representation. + * + * + * Canonical instruction representation: + * + * ARM: 0xKKLLMMNN + * Thumb 16-bit: 0x0000KKLL, where KK < 0xE8 + * Thumb 32-bit: 0xKKLLMMNN, where KK >= 0xE8 + * + * There is no way to distinguish an ARM instruction in canonical representation + * from a Thumb instruction (just as these cannot be distinguished in memory). + * Where this distinction is important, it needs to be tracked separately. + * + * Note that values in the range 0x0000E800..0xE7FFFFFF intentionally do not + * represent any valid Thumb-2 instruction. For this range, + * __opcode_is_thumb32() and __opcode_is_thumb16() will both be false. + * + * The ___asm variants are intended only for use by this header, in situations + * involving inline assembler. For .S files, the normal __opcode_*() macros + * should do the right thing. + */ +#ifdef __ASSEMBLY__ + +#define ___opcode_swab32(x) ___asm_opcode_swab32(x) +#define ___opcode_swab16(x) ___asm_opcode_swab16(x) +#define ___opcode_swahb32(x) ___asm_opcode_swahb32(x) +#define ___opcode_swahw32(x) ___asm_opcode_swahw32(x) +#define ___opcode_identity32(x) ___asm_opcode_identity32(x) +#define ___opcode_identity16(x) ___asm_opcode_identity16(x) + +#else /* ! __ASSEMBLY__ */ + +#include +#include + +#define ___opcode_swab32(x) swab32(x) +#define ___opcode_swab16(x) swab16(x) +#define ___opcode_swahb32(x) swahb32(x) +#define ___opcode_swahw32(x) swahw32(x) +#define ___opcode_identity32(x) ((u32)(x)) +#define ___opcode_identity16(x) ((u16)(x)) + +#endif /* ! __ASSEMBLY__ */ + + +#ifdef CONFIG_CPU_ENDIAN_BE8 + +#define __opcode_to_mem_arm(x) ___opcode_swab32(x) +#define __opcode_to_mem_thumb16(x) ___opcode_swab16(x) +#define __opcode_to_mem_thumb32(x) ___opcode_swahb32(x) +#define ___asm_opcode_to_mem_arm(x) ___asm_opcode_swab32(x) +#define ___asm_opcode_to_mem_thumb16(x) ___asm_opcode_swab16(x) +#define ___asm_opcode_to_mem_thumb32(x) ___asm_opcode_swahb32(x) + +#else /* ! CONFIG_CPU_ENDIAN_BE8 */ + +#define __opcode_to_mem_arm(x) ___opcode_identity32(x) +#define __opcode_to_mem_thumb16(x) ___opcode_identity16(x) +#define ___asm_opcode_to_mem_arm(x) ___asm_opcode_identity32(x) +#define ___asm_opcode_to_mem_thumb16(x) ___asm_opcode_identity16(x) +#ifndef CONFIG_CPU_ENDIAN_BE32 +/* + * On BE32 systems, using 32-bit accesses to store Thumb instructions will not + * work in all cases, due to alignment constraints. For now, a correct + * version is not provided for BE32. + */ +#define __opcode_to_mem_thumb32(x) ___opcode_swahw32(x) +#define ___asm_opcode_to_mem_thumb32(x) ___asm_opcode_swahw32(x) +#endif + +#endif /* ! CONFIG_CPU_ENDIAN_BE8 */ + +#define __mem_to_opcode_arm(x) __opcode_to_mem_arm(x) +#define __mem_to_opcode_thumb16(x) __opcode_to_mem_thumb16(x) +#ifndef CONFIG_CPU_ENDIAN_BE32 +#define __mem_to_opcode_thumb32(x) __opcode_to_mem_thumb32(x) +#endif + +/* Operations specific to Thumb opcodes */ + +/* Instruction size checks: */ +#define __opcode_is_thumb32(x) ( \ + ((x) & 0xF8000000) == 0xE8000000 \ + || ((x) & 0xF0000000) == 0xF0000000 \ +) +#define __opcode_is_thumb16(x) ( \ + ((x) & 0xFFFF0000) == 0 \ + && !(((x) & 0xF800) == 0xE800 || ((x) & 0xF000) == 0xF000) \ +) + +/* Operations to construct or split 32-bit Thumb instructions: */ +#define __opcode_thumb32_first(x) (___opcode_identity16((x) >> 16)) +#define __opcode_thumb32_second(x) (___opcode_identity16(x)) +#define __opcode_thumb32_compose(first, second) ( \ + (___opcode_identity32(___opcode_identity16(first)) << 16) \ + | ___opcode_identity32(___opcode_identity16(second)) \ +) +#define ___asm_opcode_thumb32_first(x) (___asm_opcode_identity16((x) >> 16)) +#define ___asm_opcode_thumb32_second(x) (___asm_opcode_identity16(x)) +#define ___asm_opcode_thumb32_compose(first, second) ( \ + (___asm_opcode_identity32(___asm_opcode_identity16(first)) << 16) \ + | ___asm_opcode_identity32(___asm_opcode_identity16(second)) \ +) + +/* + * Opcode injection helpers + * + * In rare cases it is necessary to assemble an opcode which the + * assembler does not support directly, or which would normally be + * rejected because of the CFLAGS or AFLAGS used to build the affected + * file. + * + * Before using these macros, consider carefully whether it is feasible + * instead to change the build flags for your file, or whether it really + * makes sense to support old assembler versions when building that + * particular kernel feature. + * + * The macros defined here should only be used where there is no viable + * alternative. + * + * + * __inst_arm(x): emit the specified ARM opcode + * __inst_thumb16(x): emit the specified 16-bit Thumb opcode + * __inst_thumb32(x): emit the specified 32-bit Thumb opcode + * + * __inst_arm_thumb16(arm, thumb): emit either the specified arm or + * 16-bit Thumb opcode, depending on whether an ARM or Thumb-2 + * kernel is being built + * + * __inst_arm_thumb32(arm, thumb): emit either the specified arm or + * 32-bit Thumb opcode, depending on whether an ARM or Thumb-2 + * kernel is being built + * + * + * Note that using these macros directly is poor practice. Instead, you + * should use them to define human-readable wrapper macros to encode the + * instructions that you care about. In code which might run on ARMv7 or + * above, you can usually use the __inst_arm_thumb{16,32} macros to + * specify the ARM and Thumb alternatives at the same time. This ensures + * that the correct opcode gets emitted depending on the instruction set + * used for the kernel build. + * + * Look at opcodes-virt.h for an example of how to use these macros. + */ +#include + +#define __inst_arm(x) ___inst_arm(___asm_opcode_to_mem_arm(x)) +#define __inst_thumb32(x) ___inst_thumb32( \ + ___asm_opcode_to_mem_thumb16(___asm_opcode_thumb32_first(x)), \ + ___asm_opcode_to_mem_thumb16(___asm_opcode_thumb32_second(x)) \ +) +#define __inst_thumb16(x) ___inst_thumb16(___asm_opcode_to_mem_thumb16(x)) + +#ifdef CONFIG_THUMB2_KERNEL +#define __inst_arm_thumb16(arm_opcode, thumb_opcode) \ + __inst_thumb16(thumb_opcode) +#define __inst_arm_thumb32(arm_opcode, thumb_opcode) \ + __inst_thumb32(thumb_opcode) +#else +#define __inst_arm_thumb16(arm_opcode, thumb_opcode) __inst_arm(arm_opcode) +#define __inst_arm_thumb32(arm_opcode, thumb_opcode) __inst_arm(arm_opcode) +#endif + +/* Helpers for the helpers. Don't use these directly. */ +#ifdef __ASSEMBLY__ +#define ___inst_arm(x) .long x +#define ___inst_thumb16(x) .short x +#define ___inst_thumb32(first, second) .short first, second +#else +#define ___inst_arm(x) ".long " __stringify(x) "\n\t" +#define ___inst_thumb16(x) ".short " __stringify(x) "\n\t" +#define ___inst_thumb32(first, second) \ + ".short " __stringify(first) ", " __stringify(second) "\n\t" +#endif + +#endif /* __ASM_ARM_OPCODES_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 7b4b564961d4..d7ff1913c506 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -9,9 +9,9 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ - hyp-stub.o psci.o + hyp-stub.o psci.o opcodes.o -arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ +arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o diff --git a/arch/arm64/kernel/opcodes.c b/arch/arm64/kernel/opcodes.c new file mode 100644 index 000000000000..ceb5a04a1e12 --- /dev/null +++ b/arch/arm64/kernel/opcodes.c @@ -0,0 +1,72 @@ +/* + * Copied from linux/arch/arm/kernel/opcodes.c + * + * A32 condition code lookup feature moved from nwfpe/fpopcode.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +#define ARM_OPCODE_CONDITION_UNCOND 0xf + +/* + * condition code lookup table + * index into the table is test code: EQ, NE, ... LT, GT, AL, NV + * + * bit position in short is condition code: NZCV + */ +static const unsigned short cc_map[16] = { + 0xF0F0, /* EQ == Z set */ + 0x0F0F, /* NE */ + 0xCCCC, /* CS == C set */ + 0x3333, /* CC */ + 0xFF00, /* MI == N set */ + 0x00FF, /* PL */ + 0xAAAA, /* VS == V set */ + 0x5555, /* VC */ + 0x0C0C, /* HI == C set && Z clear */ + 0xF3F3, /* LS == C clear || Z set */ + 0xAA55, /* GE == (N==V) */ + 0x55AA, /* LT == (N!=V) */ + 0x0A05, /* GT == (!Z && (N==V)) */ + 0xF5FA, /* LE == (Z || (N!=V)) */ + 0xFFFF, /* AL always */ + 0 /* NV */ +}; + +/* + * Returns: + * ARM_OPCODE_CONDTEST_FAIL - if condition fails + * ARM_OPCODE_CONDTEST_PASS - if condition passes (including AL) + * ARM_OPCODE_CONDTEST_UNCOND - if NV condition, or separate unconditional + * opcode space from v5 onwards + * + * Code that tests whether a conditional instruction would pass its condition + * check should check that return value == ARM_OPCODE_CONDTEST_PASS. + * + * Code that tests if a condition means that the instruction would be executed + * (regardless of conditional or unconditional) should instead check that the + * return value != ARM_OPCODE_CONDTEST_FAIL. + */ +asmlinkage unsigned int arm_check_condition(u32 opcode, u64 psr) +{ + u32 cc_bits = opcode >> 28; + u32 psr_cond = (u32)(psr & 0xffffffff) >> 28; + unsigned int ret; + + if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) { + if ((cc_map[cc_bits] >> (psr_cond)) & 1) + ret = ARM_OPCODE_CONDTEST_PASS; + else + ret = ARM_OPCODE_CONDTEST_FAIL; + } else { + ret = ARM_OPCODE_CONDTEST_UNCOND; + } + + return ret; +} +EXPORT_SYMBOL_GPL(arm_check_condition); From 99738c2dc1ad034c0b71318b245b063479ed423a Mon Sep 17 00:00:00 2001 From: Alex Van Brunt Date: Wed, 29 Jan 2014 13:45:20 -0800 Subject: [PATCH 10/69] arm64: ptrace: add is_wide_instruction() macro Add the is_wide_instruction() macro. This was copied from the arm architecture. Change-Id: I28f83b47f5c587fe778dc2846df77673f8dd918b Signed-off-by: Alex Van Brunt Reviewed-on: http://git-master/r/361737 Reviewed-by: Peng Du Reviewed-on: http://git-master/r/365060 Reviewed-by: Richard Wiley Tested-by: Oskari Jaaskelainen --- arch/arm64/include/asm/ptrace.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 41e59e2459ff..fff28950e660 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -171,5 +171,13 @@ extern unsigned long profile_pc(struct pt_regs *regs); #define profile_pc(regs) instruction_pointer(regs) #endif +/* + * True if instr is a 32-bit thumb instruction. This works if instr + * is the first or only half-word of a thumb instruction. It also works + * when instr holds all 32-bits of a wide thumb instruction if stored + * in the form (first_half<<16)|(second_half) + */ +#define is_wide_instruction(instr) ((unsigned)(instr) >= 0xe800) + #endif /* __ASSEMBLY__ */ #endif From bc97ba4de74958ead0d40d44d1b4516e7f3de8dd Mon Sep 17 00:00:00 2001 From: Alex Van Brunt Date: Wed, 29 Jan 2014 13:41:01 -0800 Subject: [PATCH 11/69] arm64: add undefined instruction handler hooks Add undefined instruction handler hooks similar to the system in the arm archetecture. One difference is that hooks can only be added at boot time and they can never be removed. This removes the need for the spinlock in the handler. Change-Id: I4684937f5209ca2a64ee63947bb2ab6411ae14f7 Signed-off-by: Alex Van Brunt Reviewed-on: http://git-master/r/361736 Reviewed-on: http://git-master/r/365059 Reviewed-by: Richard Wiley Tested-by: Oskari Jaaskelainen --- arch/arm64/include/asm/traps.h | 13 +++++++++++ arch/arm64/kernel/traps.c | 40 ++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 10ca8ff93cc2..75f51eadb9eb 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -18,6 +18,19 @@ #ifndef __ASM_TRAP_H #define __ASM_TRAP_H +#include + +struct undef_hook { + struct list_head node; + u32 instr_mask; + u32 instr_val; + u32 pstate_mask; + u32 pstate_val; + int (*fn)(struct pt_regs *regs, unsigned int instr); +}; + +void register_undef_hook(struct undef_hook *hook); + static inline int in_exception_text(unsigned long ptr) { extern char __exception_text_start[]; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 7ffadddb645d..4f8bf320ec99 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -3,6 +3,7 @@ * * Copyright (C) 1995-2009 Russell King * Copyright (C) 2012 ARM Ltd. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -257,15 +258,54 @@ void arm64_notify_die(const char *str, struct pt_regs *regs, die(str, regs, err); } +static LIST_HEAD(undef_hook); + +void register_undef_hook(struct undef_hook *hook) +{ + list_add(&hook->node, &undef_hook); +} + +static int call_undef_hook(struct pt_regs *regs, unsigned int instr) +{ + struct undef_hook *hook; + int (*fn)(struct pt_regs *regs, unsigned int instr) = NULL; + + list_for_each_entry(hook, &undef_hook, node) + if ((instr & hook->instr_mask) == hook->instr_val && + (regs->pstate & hook->pstate_mask) == hook->pstate_val) + fn = hook->fn; + + return fn ? fn(regs, instr) : 1; +} + asmlinkage void __exception do_undefinstr(struct pt_regs *regs) { + u32 instr; siginfo_t info; void __user *pc = (void __user *)instruction_pointer(regs); /* check for AArch32 breakpoint instructions */ if (!aarch32_break_handler(regs)) return; + if (compat_thumb_mode(regs)) { + if (get_user(instr, (u16 __user *)pc)) + goto die_sig; + if (is_wide_instruction(instr)) { + u32 instr2; + if (get_user(instr2, (u16 __user *)pc+1)) + goto die_sig; + instr <<= 16; + instr |= instr2; + } + } else if ((get_user(instr, (u32 __user *)pc))) { + goto die_sig; + } + + if (call_undef_hook(regs, instr) == 0) + return; + +die_sig: if (show_unhandled_signals && unhandled_signal(current, SIGILL) && printk_ratelimit()) { pr_info("%s[%d]: undefined instruction: pc=%p\n", From b3987a9c4664d18b8c8af6d43e6ae1904c8febce Mon Sep 17 00:00:00 2001 From: Peng Du Date: Wed, 23 Jul 2014 11:40:33 -0700 Subject: [PATCH 12/69] arm64: kernel: check mode for get_user in undefinstr get_user() should be called only for user_mode undef instruction. Change-Id: Ia654783de0cf72abac6847ac9630236f9f0d6ebb Signed-off-by: Peng Du Reviewed-on: http://git-master/r/441348 Reviewed-by: Thomas Cherry Reviewed-by: Bo Yan --- arch/arm64/kernel/traps.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 4f8bf320ec99..0da47699510b 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -287,19 +287,23 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) /* check for AArch32 breakpoint instructions */ if (!aarch32_break_handler(regs)) return; - if (compat_thumb_mode(regs)) { - if (get_user(instr, (u16 __user *)pc)) - goto die_sig; - if (is_wide_instruction(instr)) { - u32 instr2; - if (get_user(instr2, (u16 __user *)pc+1)) + if (user_mode(regs)) { + if (compat_thumb_mode(regs)) { + if (get_user(instr, (u16 __user *)pc)) goto die_sig; - instr <<= 16; - instr |= instr2; + if (is_wide_instruction(instr)) { + u32 instr2; + if (get_user(instr2, (u16 __user *)pc+1)) + goto die_sig; + instr <<= 16; + instr |= instr2; + } + } else if (get_user(instr, (u32 __user *)pc)) { + goto die_sig; } - - } else if ((get_user(instr, (u32 __user *)pc))) { - goto die_sig; + } else { + /* kernel mode */ + instr = *((u32 *)pc); } if (call_undef_hook(regs, instr) == 0) From 75079f8dfc7d4b32844a24e30bafa317e0af97ee Mon Sep 17 00:00:00 2001 From: Alex Van Brunt Date: Tue, 11 Feb 2014 10:08:51 -0800 Subject: [PATCH 13/69] arm64: a backwards compatible config option Create a config option that when selected configures the kernel to be as backwards compatable with kernels that ran on an ARMv7 processor as possible. Change-Id: I7cd67e6d4174335f9a67aba2a39dfd993f240c27 Signed-off-by: Alex Van Brunt Reviewed-on: http://git-master/r/366094 Reviewed-by: Richard Wiley Reviewed-by: Automatic_Commit_Validation_User Tested-by: Oskari Jaaskelainen --- arch/arm64/Kconfig | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3ee27d34a020..437dd6cf5ac8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -182,6 +182,16 @@ config HW_PERF_EVENTS Enable hardware performance counter support for perf events. If disabled, perf events will use software events only. +config ARMV7_COMPAT + bool "Kernel support for ARMv7 applications" + depends on COMPAT + help + This option enables features that allow that ran on an ARMv7 or older + processor to continue functioning. + + If you want to execute ARMv7 applications, say Y + + source "mm/Kconfig" endmenu From ecff782253197463f33d719a4fed4bd39c8de469 Mon Sep 17 00:00:00 2001 From: Alex Van Brunt Date: Thu, 30 Jan 2014 15:10:39 -0800 Subject: [PATCH 14/69] arm64: emulate the swp/swpb instruction The swp and spwb instructions were deprecated in ARMv6. ARMv8 obsoleted the instruction. Despite this, many applications rely on these instruuctions. This patch starts with the version present in the arm architecture. However, it uses the ldx*()/stx*() functions to implement the handler in C code. It also removes a lot of code that is not needed. Change-Id: I6882fbe5f71bfa8f9e9a75d067b2111188c6f2fa Signed-off-by: Alex Van Brunt Reviewed-on: http://git-master/r/366097 Reviewed-by: Richard Wiley Tested-by: Oskari Jaaskelainen Conflicts: arch/arm64/Kconfig arch/arm64/kernel/Makefile --- arch/arm64/Kconfig | 26 ++++++ arch/arm64/kernel/Makefile | 3 + arch/arm64/kernel/swp_emulate.c | 155 ++++++++++++++++++++++++++++++++ 3 files changed, 184 insertions(+) create mode 100644 arch/arm64/kernel/swp_emulate.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 437dd6cf5ac8..787cee384e1c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -152,6 +152,31 @@ config NR_CPUS depends on SMP default "4" +config SWP_EMULATE + bool "Emulate SWP/SWPB instructions" + help + ARMv6 architecture deprecates use of the SWP/SWPB instructions. ARMv8 + oblosetes the use of SWP/SWPB instructions. ARMv7 multiprocessing + extensions introduce the ability to disable these instructions, + triggering an undefined instruction exception when executed. Say Y + here to enable software emulation of these instructions for userspace + (not kernel) using LDREX/STREX. Also creates /proc/cpu/swp_emulation + for statistics. + + In some older versions of glibc [<=2.8] SWP is used during futex + trylock() operations with the assumption that the code will not + be preempted. This invalid assumption may be more likely to fail + with SWP emulation enabled, leading to deadlock of the user + application. + + NOTE: when accessing uncached shared regions, LDREX/STREX rely + on an external transaction monitoring block called a global + monitor to maintain update atomicity. If your system does not + implement a global monitor, this option can cause programs that + perform SWP operations to uncached memory to deadlock. + + If unsure, say Y. + source kernel/Kconfig.preempt config HZ @@ -185,6 +210,7 @@ config HW_PERF_EVENTS config ARMV7_COMPAT bool "Kernel support for ARMv7 applications" depends on COMPAT + select SWP_EMULATE help This option enables features that allow that ran on an ARMv7 or older processor to continue functioning. diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index d7ff1913c506..3e6706630ae5 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -18,6 +18,9 @@ arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +arm64-obj-$(CONFIG_ARM_CPU_SUSPEND) += sleep.o suspend.o + +obj-$(CONFIG_SWP_EMULATE) += swp_emulate.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/swp_emulate.c b/arch/arm64/kernel/swp_emulate.c new file mode 100644 index 000000000000..05ffe59b5bc4 --- /dev/null +++ b/arch/arm64/kernel/swp_emulate.c @@ -0,0 +1,155 @@ +/* + * Derived from from linux/arch/arm/kernel/swp_emulate.c + * + * Copyright (C) 2009 ARM Limited + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Implements emulation of the SWP/SWPB instructions using load-exclusive and + * store-exclusive for processors that have them disabled (or future ones that + * might not implement them). + * + * Syntax of SWP{B} instruction: SWP{B} , , [] + * Where: Rt = destination + * Rt2 = source + * Rn = address + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* + * Macros/defines for extracting register numbers from instruction. + */ +#define EXTRACT_REG_NUM(instruction, offset) \ + (((instruction) & (0xf << (offset))) >> (offset)) +#define RN_OFFSET 16 +#define RT_OFFSET 12 +#define RT2_OFFSET 0 +/* + * Bit 22 of the instruction encoding distinguishes between + * the SWP and SWPB variants (bit set means SWPB). + */ +#define TYPE_SWPB (1 << 22) + +static pid_t previous_pid; + +u64 swpb_count = 0; +u64 swp_count = 0; + +/* + * swp_handler logs the id of calling process, dissects the instruction, sanity + * checks the memory location, calls emulate_swpX for the actual operation and + * deals with fixup/error handling before returning + */ +static int swp_handler(struct pt_regs *regs, unsigned int instr) +{ + u32 address_reg, destreg, data, type; + uintptr_t address; + unsigned int res = 0; + u32 temp32; + u8 temp8; + + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc); + + res = arm_check_condition(instr, regs->pstate); + switch (res) { + case ARM_OPCODE_CONDTEST_PASS: + break; + case ARM_OPCODE_CONDTEST_FAIL: + /* Condition failed - return to next instruction */ + regs->pc += 4; + return 0; + case ARM_OPCODE_CONDTEST_UNCOND: + /* If unconditional encoding - not a SWP, undef */ + return -EFAULT; + default: + return -EINVAL; + } + + if (current->pid != previous_pid) { + pr_warn("\"%s\" (%ld) uses obsolete SWP{B} instruction\n", + current->comm, (unsigned long)current->pid); + previous_pid = current->pid; + } + + address = regs->regs[EXTRACT_REG_NUM(instr, RN_OFFSET)] & 0xffffffff; + data = regs->regs[EXTRACT_REG_NUM(instr, RT2_OFFSET)]; + destreg = EXTRACT_REG_NUM(instr, RT_OFFSET); + + type = instr & TYPE_SWPB; + + /* Check access in reasonable access range for both SWP and SWPB */ + if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) { + pr_debug("SWP{B} emulation: access to %p not allowed!\n", + (void *)address); + res = -EFAULT; + } + if (type == TYPE_SWPB) { + do { + temp8 = ldax8((u8 *) address); + } while (stx8((u8 *) address, (u8) data)); + regs->regs[destreg] = temp8; + regs->pc += 4; + swpb_count++; + } else if (address & 0x3) { + /* SWP to unaligned address not permitted */ + pr_debug("SWP instruction on unaligned pointer!\n"); + return -EFAULT; + } else { + do { + temp32 = ldax32((u32 *) address); + } while (stlx32((u32 *) address, (u32) data)); + regs->regs[destreg] = temp32; + regs->pc += 4; + swp_count++; + } + + return 0; +} + +/* + * Only emulate SWP/SWPB executed in ARM state/User mode. + * The kernel must be SWP free and SWP{B} does not exist in Thumb/ThumbEE. + */ +static struct undef_hook swp_hook = { + .instr_mask = 0x0fb00ff0, + .instr_val = 0x01000090, + .pstate_mask = COMPAT_PSR_MODE_MASK | COMPAT_PSR_T_BIT, + .pstate_val = COMPAT_PSR_MODE_USR, + .fn = swp_handler +}; + +/* + * Register handler and create status file in /proc/cpu + * Invoked as late_initcall, since not needed before init spawned. + */ +static int __init swp_emulation_init(void) +{ + struct dentry *dir; + dir = debugfs_create_dir("swp_emulate", NULL); + debugfs_create_u64("swp_count", S_IRUGO | S_IWUSR, dir, &swp_count); + debugfs_create_u64("swpb_count", S_IRUGO | S_IWUSR, dir, &swpb_count); + + pr_notice("Registering SWP/SWPB emulation handler\n"); + register_undef_hook(&swp_hook); + + + return 0; +} + +late_initcall(swp_emulation_init); From d0c5e1444d3efbe361bb5e9fc0d8dd0aeb6ae2fb Mon Sep 17 00:00:00 2001 From: Alex Van Brunt Date: Tue, 18 Feb 2014 17:50:57 -0800 Subject: [PATCH 15/69] arm64: fix a warning and a typo in SWP emulation The store-release-exclusive is missing the "L" that makes it a release rather than a normal store-exclusive. Remove a variable that is not used and causes a compiler warning. Change-Id: I91633a352b805ed9af450b632c9ee394235637c4 Signed-off-by: Alex Van Brunt Reviewed-on: http://git-master/r/369076 Reviewed-by: Richard Wiley Reviewed-by: Automatic_Commit_Validation_User --- arch/arm64/kernel/swp_emulate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/swp_emulate.c b/arch/arm64/kernel/swp_emulate.c index 05ffe59b5bc4..1dd824e56eb4 100644 --- a/arch/arm64/kernel/swp_emulate.c +++ b/arch/arm64/kernel/swp_emulate.c @@ -58,7 +58,7 @@ u64 swp_count = 0; */ static int swp_handler(struct pt_regs *regs, unsigned int instr) { - u32 address_reg, destreg, data, type; + u32 destreg, data, type; uintptr_t address; unsigned int res = 0; u32 temp32; @@ -102,7 +102,7 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr) if (type == TYPE_SWPB) { do { temp8 = ldax8((u8 *) address); - } while (stx8((u8 *) address, (u8) data)); + } while (stlx8((u8 *) address, (u8) data)); regs->regs[destreg] = temp8; regs->pc += 4; swpb_count++; From 70f16b592ee11f74eb0bc2664202b87ee3582bd8 Mon Sep 17 00:00:00 2001 From: Alex Van Brunt Date: Thu, 20 Feb 2014 18:18:53 -0800 Subject: [PATCH 16/69] arm64: add fault handling to SWP emulation Add excpetion table and fixup for SWP/SWPB instruction emulation. This prevents the kernel from panicing when emulating a SWP/SWPB instruction that access unmapped memory. Change-Id: I4a9ca34fa161a0f306cdb663827d9bee39cec733 Signed-off-by: Alex Van Brunt Reviewed-on: http://git-master/r/370278 --- arch/arm64/kernel/swp_emulate.c | 78 ++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/swp_emulate.c b/arch/arm64/kernel/swp_emulate.c index 1dd824e56eb4..b1a176bdede3 100644 --- a/arch/arm64/kernel/swp_emulate.c +++ b/arch/arm64/kernel/swp_emulate.c @@ -32,6 +32,71 @@ #include #include +/* + * Error-checking SWP macros implemented using ldrex{b}/strex{b} + */ + +static int swpb(u8 in, u8 *out, u8 *addr) +{ + u8 _out; + int res; + int err = 0; + + do { + __asm__ __volatile__( + "0: ldxrb %w1, %4\n" + "1: stxrb %w0, %w3, %4\n" + "2:\n" + " .section .fixup,\"ax\"\n" + " .align 2\n" + "3: mov %w2, %5\n" + " b 2b\n" + " .previous\n" + " .section __ex_table,\"a\"\n" + " .align 3\n" + " .quad 0b, 3b\n" + " .quad 1b, 3b\n" + " .previous" + : "=&r" (res), "=r" (_out), "=r" (err) + : "r" (in), "Q" (addr), "i" (-EFAULT) + : "cc", "memory"); + } while (err == 0 && res != 0); + + if (err == 0) + *out = _out; + return err; +} + +static int swp(u32 in, u32 *out, u32 *addr) +{ + u32 _out; + int res; + int err = 0; + + do { + __asm__ __volatile__( + "0: ldxr %w1, %4\n" + "1: stxr %w0, %w3, %4\n" + "2:\n" + " .section .fixup,\"ax\"\n" + " .align 2\n" + "3: mov %w2, %5\n" + " b 2b\n" + " .previous\n" + " .section __ex_table,\"a\"\n" + " .align 3\n" + " .quad 0b, 3b\n" + " .quad 1b, 3b\n" + " .previous" + : "=&r" (res), "=r" (_out), "=r" (err) + : "r" (in), "Q" (addr), "i" (-EFAULT) + : "cc", "memory"); + } while (err == 0 && res != 0); + + if (err == 0) + *out = _out; + return err; +} /* * Macros/defines for extracting register numbers from instruction. */ @@ -61,6 +126,7 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr) u32 destreg, data, type; uintptr_t address; unsigned int res = 0; + int err; u32 temp32; u8 temp8; @@ -100,9 +166,9 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr) res = -EFAULT; } if (type == TYPE_SWPB) { - do { - temp8 = ldax8((u8 *) address); - } while (stlx8((u8 *) address, (u8) data)); + err = swpb((u8) data, &temp8, (u8 *) address); + if (err) + return err; regs->regs[destreg] = temp8; regs->pc += 4; swpb_count++; @@ -111,9 +177,9 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr) pr_debug("SWP instruction on unaligned pointer!\n"); return -EFAULT; } else { - do { - temp32 = ldax32((u32 *) address); - } while (stlx32((u32 *) address, (u32) data)); + err = swp((u32) data, &temp32, (u32 *) address); + if (err) + return err; regs->regs[destreg] = temp32; regs->pc += 4; swp_count++; From ff5878c3916bdfc1229eeade86728814c4dd4e4f Mon Sep 17 00:00:00 2001 From: Rich Wiley Date: Mon, 10 Mar 2014 14:01:06 -0700 Subject: [PATCH 17/69] arm64: fix SWP instruction emulation initial variable values may get overwritten if they're listed as an output in ASM, even if they're not explicitly written to. Change-Id: I2a239e1819850a2a7005a46e83d82deac4ca303b Signed-off-by: Rich Wiley Reviewed-on: http://git-master/r/379646 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Li Li (SW-TEGRA) Tested-by: Li Li (SW-TEGRA) GVS: Gerrit_Virtual_Submit Reviewed-by: Alexander Van Brunt --- arch/arm64/kernel/swp_emulate.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/swp_emulate.c b/arch/arm64/kernel/swp_emulate.c index b1a176bdede3..508fd2edb8ab 100644 --- a/arch/arm64/kernel/swp_emulate.c +++ b/arch/arm64/kernel/swp_emulate.c @@ -40,12 +40,13 @@ static int swpb(u8 in, u8 *out, u8 *addr) { u8 _out; int res; - int err = 0; + int err; do { __asm__ __volatile__( "0: ldxrb %w1, %4\n" "1: stxrb %w0, %w3, %4\n" + " mov %w2, #0\n" "2:\n" " .section .fixup,\"ax\"\n" " .align 2\n" @@ -58,7 +59,7 @@ static int swpb(u8 in, u8 *out, u8 *addr) " .quad 1b, 3b\n" " .previous" : "=&r" (res), "=r" (_out), "=r" (err) - : "r" (in), "Q" (addr), "i" (-EFAULT) + : "r" (in), "Q" (*addr), "i" (-EFAULT) : "cc", "memory"); } while (err == 0 && res != 0); @@ -77,6 +78,7 @@ static int swp(u32 in, u32 *out, u32 *addr) __asm__ __volatile__( "0: ldxr %w1, %4\n" "1: stxr %w0, %w3, %4\n" + " mov %w2, #0\n" "2:\n" " .section .fixup,\"ax\"\n" " .align 2\n" @@ -89,7 +91,7 @@ static int swp(u32 in, u32 *out, u32 *addr) " .quad 1b, 3b\n" " .previous" : "=&r" (res), "=r" (_out), "=r" (err) - : "r" (in), "Q" (addr), "i" (-EFAULT) + : "r" (in), "Q" (*addr), "i" (-EFAULT) : "cc", "memory"); } while (err == 0 && res != 0); From 80cb26c175627cb9633aeae13adc8455450bf77a Mon Sep 17 00:00:00 2001 From: Alex Van Brunt Date: Tue, 28 Jan 2014 12:40:10 -0800 Subject: [PATCH 18/69] arm64: optionally set CP15BEN in SCTLR Setting CP15BEN allows legacy applications running in AArch32 mode that use CP15 DMB as similar instructions to continue running. Change-Id: If76d3c6ee12865ff8c4b4e7aed01146bead87773 Signed-off-by: Alex Van Brunt Reviewed-on: http://git-master/r/366096 Reviewed-by: Richard Wiley Tested-by: Oskari Jaaskelainen --- arch/arm64/Kconfig | 11 +++++++++++ arch/arm64/mm/proc.S | 15 +++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 787cee384e1c..0df94f10cda6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -218,6 +218,17 @@ config ARMV7_COMPAT If you want to execute ARMv7 applications, say Y +config ARMV7_COMPAT_CP15_BARRIER + bool "Allow applications to use the CP15 barrier operations" + depends on ARMV7_COMPAT + default y + help + This option allows applications to use deprecated CP15 barrier + instructions. This is useful because this was the only way to create + a barrier on older ARM processors. + + If you want to execute ARMv7 applications, say Y + source "mm/Kconfig" endmenu diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 9428de8a8f37..48fffb27a7bf 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -160,6 +160,20 @@ ENTRY(__cpu_setup) ret // return to head.S ENDPROC(__cpu_setup) +#ifdef CONFIG_ARMV7_COMPAT_CP15_BARRIER + /* + * n n T + * U E WT T UD US IHBS + * CE0 XWHW CZ ME TEEA S + * .... .IEE .... NEAI TE.I ..AD DEN0 ACAM + * 0011 0... 1101 ..0. ..0. 10.. .... .... < hardware reserved + * .... .100 .... 01.1 11.1 ..01 0011 1101 < software settings + */ + .type crval, #object +crval: + .word 0x030802e2 // clear + .word 0x0405d13d // set +#else /* * n n T * U E WT T UD US IHBS @@ -172,3 +186,4 @@ ENDPROC(__cpu_setup) crval: .word 0x030802e2 // clear .word 0x0405d11d // set +#endif From bad15588d39c24ecb76593f632a0ab5d71ace7ed Mon Sep 17 00:00:00 2001 From: Rich Wiley Date: Wed, 4 Jun 2014 11:41:53 -0700 Subject: [PATCH 19/69] arm64: make SCTLR compat config depend on CONFIG_ARMV7_COMPAT Conflicts: arch/arm64/mm/proc.S Change-Id: I76e0067839c96e3082b42c80d3fc670cf3d371b5 Signed-off-by: Rich Wiley Reviewed-on: http://git-master/r/422173 GVS: Gerrit_Virtual_Submit Reviewed-by: Alexander Van Brunt Reviewed-by: Bharat Nihalani Tested-by: Bharat Nihalani --- arch/arm64/Kconfig | 11 ----------- arch/arm64/mm/proc.S | 2 +- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0df94f10cda6..787cee384e1c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -218,17 +218,6 @@ config ARMV7_COMPAT If you want to execute ARMv7 applications, say Y -config ARMV7_COMPAT_CP15_BARRIER - bool "Allow applications to use the CP15 barrier operations" - depends on ARMV7_COMPAT - default y - help - This option allows applications to use deprecated CP15 barrier - instructions. This is useful because this was the only way to create - a barrier on older ARM processors. - - If you want to execute ARMv7 applications, say Y - source "mm/Kconfig" endmenu diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 48fffb27a7bf..9796b33a708b 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -160,7 +160,7 @@ ENTRY(__cpu_setup) ret // return to head.S ENDPROC(__cpu_setup) -#ifdef CONFIG_ARMV7_COMPAT_CP15_BARRIER +#ifdef CONFIG_ARMV7_COMPAT /* * n n T * U E WT T UD US IHBS From 2e0602939baf22b8f9057f7626c189248383d4ae Mon Sep 17 00:00:00 2001 From: Rich Wiley Date: Wed, 4 Jun 2014 11:44:03 -0700 Subject: [PATCH 20/69] arm64: enable deprecated SETEND instruction in SCTLR compat config Change-Id: I703d4843f8aab2ec63324f04cc13aaabae88e163 Signed-off-by: Rich Wiley Reviewed-on: http://git-master/r/422174 GVS: Gerrit_Virtual_Submit Reviewed-by: Alexander Van Brunt Reviewed-by: Bharat Nihalani Tested-by: Bharat Nihalani --- arch/arm64/mm/proc.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 9796b33a708b..3e5dcd9897db 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -172,7 +172,7 @@ ENDPROC(__cpu_setup) .type crval, #object crval: .word 0x030802e2 // clear - .word 0x0405d13d // set + .word 0x0405d03d // set #else /* * n n T From cba0c6b2913c0d075a7434025f5dc29cd813707f Mon Sep 17 00:00:00 2001 From: Alex Van Brunt Date: Thu, 9 Jan 2014 12:51:05 -0800 Subject: [PATCH 21/69] arm64: cpuinfo: ARMv7 compatable cpuinfo option To be backwards compatable with the output of cpuinfo on an ARMv7, print the features that were optional in ARMv7 but are required in ARMv8. Change-Id: Ic728f71be4a971adc79ef552f25cfbf95a4dac29 Signed-off-by: Alex Van Brunt Reviewed-on: http://git-master/r/366095 Reviewed-by: Richard Wiley Tested-by: Oskari Jaaskelainen --- arch/arm64/Kconfig | 10 ++++++++++ arch/arm64/kernel/setup.c | 7 ++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 787cee384e1c..71c2a070ace4 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -217,6 +217,16 @@ config ARMV7_COMPAT If you want to execute ARMv7 applications, say Y +config ARMV7_COMPAT_CPUINFO + bool "Report backwards compatible cpu features in /proc/cpuinfo" + depends on ARMV7_COMPAT + default y + help + This option makes /proc/cpuinfo list CPU features that an ARMv7 or + earlier kernel would report, but are not optional on an ARMv8 or later + processor. + + If you want to execute ARMv7 applications, say Y source "mm/Kconfig" diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 3a06f1aa0902..efe4850da9d0 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -420,9 +420,14 @@ static int c_show(struct seq_file *m, void *v) for (i = 0; hwcap_str[i]; i++) if (elf_hwcap & (1 << i)) seq_printf(m, "%s ", hwcap_str[i]); +#ifdef CONFIG_ARMV7_COMPAT_CPUINFO + /* Print out the non-optional ARMv8 HW capabilities */ + seq_printf(m, "wp half thumb fastmult vfp edsp neon vfpv3d16 tlsi "); + seq_printf(m, "vfpv4 idiva idivt "); +#endif seq_printf(m, "\nCPU implementer\t: 0x%02x\n", read_cpuid_id() >> 24); - seq_printf(m, "CPU architecture: AArch64\n"); + seq_printf(m, "CPU architecture: 8\n"); seq_printf(m, "CPU variant\t: 0x%x\n", (read_cpuid_id() >> 20) & 15); seq_printf(m, "CPU part\t: 0x%03x\n", (read_cpuid_id() >> 4) & 0xfff); seq_printf(m, "CPU revision\t: %d\n", read_cpuid_id() & 15); From 779a6cf48cf313582b8d88757a798866a29195d0 Mon Sep 17 00:00:00 2001 From: Alex Van Brunt Date: Thu, 20 Feb 2014 10:46:21 -0800 Subject: [PATCH 22/69] arm64: report vfpv3 instead of vfpv3d16 vfpv3 is the correct version for an ARMv8 processor and it is the version reported by an A15. Change-Id: I486f3af21a352c27775888cca332a48d7e0c59ce Signed-off-by: Alex Van Brunt Reviewed-on: http://git-master/r/370076 --- arch/arm64/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index efe4850da9d0..18fcb7ca9b95 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -422,7 +422,7 @@ static int c_show(struct seq_file *m, void *v) seq_printf(m, "%s ", hwcap_str[i]); #ifdef CONFIG_ARMV7_COMPAT_CPUINFO /* Print out the non-optional ARMv8 HW capabilities */ - seq_printf(m, "wp half thumb fastmult vfp edsp neon vfpv3d16 tlsi "); + seq_printf(m, "wp half thumb fastmult vfp edsp neon vfpv3 tlsi "); seq_printf(m, "vfpv4 idiva idivt "); #endif From 3868e7f8d47992922756d1aa6590f0d556c669b8 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Tue, 5 Aug 2014 16:14:27 -0700 Subject: [PATCH 23/69] arm64: restrict effects of ARMV7_COMPAT_CPUINFO to ARMv7 tasks Since ARMV7_COMPAT_CPUINFO only exists to support existing ARMv7 binaries, restrict its effects to compat tasks Bug: 16819658 Change-Id: I1092de596c7822d23f5f3f8a05b417a3cb49f593 Signed-off-by: Greg Hackmann --- arch/arm64/kernel/setup.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 18fcb7ca9b95..06aeec407b20 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -421,13 +421,19 @@ static int c_show(struct seq_file *m, void *v) if (elf_hwcap & (1 << i)) seq_printf(m, "%s ", hwcap_str[i]); #ifdef CONFIG_ARMV7_COMPAT_CPUINFO - /* Print out the non-optional ARMv8 HW capabilities */ - seq_printf(m, "wp half thumb fastmult vfp edsp neon vfpv3 tlsi "); - seq_printf(m, "vfpv4 idiva idivt "); + if (is_compat_task()) { + /* Print out the non-optional ARMv8 HW capabilities */ + seq_printf(m, "wp half thumb fastmult vfp edsp neon vfpv3 tlsi "); + seq_printf(m, "vfpv4 idiva idivt "); + } #endif seq_printf(m, "\nCPU implementer\t: 0x%02x\n", read_cpuid_id() >> 24); - seq_printf(m, "CPU architecture: 8\n"); + seq_printf(m, "CPU architecture: %s\n", +#if IS_ENABLED(CONFIG_ARMV7_COMPAT_CPUINFO) + is_compat_task() ? "8" : +#endif + "AArch64"); seq_printf(m, "CPU variant\t: 0x%x\n", (read_cpuid_id() >> 20) & 15); seq_printf(m, "CPU part\t: 0x%03x\n", (read_cpuid_id() >> 4) & 0xfff); seq_printf(m, "CPU revision\t: %d\n", read_cpuid_id() & 15); From 39fdc30ff59d36904e8f25361b07d6c88e04b078 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Mon, 15 Sep 2014 15:34:55 -0700 Subject: [PATCH 24/69] android: base-cfg: enable ARMV7_COMPAT Enables backwards-compatibility features on arm64, and has no effect (does not exist) on other architectures Change-Id: I6fc2f6567437750a0032f8a39a9cde1fb92d4ef4 Signed-off-by: Greg Hackmann --- android/configs/android-base.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index bdc4749a195f..d8503e450957 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -6,6 +6,7 @@ CONFIG_ANDROID=y CONFIG_ANDROID_BINDER_IPC=y CONFIG_ANDROID_INTF_ALARM_DEV=y CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_ARMV7_COMPAT=y CONFIG_ASHMEM=y CONFIG_BLK_DEV_DM=y CONFIG_BLK_DEV_INITRD=y From eaa22ea13d0f07969465cb7c05d74271368d4c12 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 16 Dec 2013 21:04:35 +0000 Subject: [PATCH 25/69] arm64: drop redundant macros from read_cpuid() asm/cputype.h contains a bunch of #defines for CPU id registers that essentially map to themselves. Remove the #defines and pass the tokens directly to the inline asm() that reads the registers. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index cf2749488cd4..261db2729217 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -16,23 +16,13 @@ #ifndef __ASM_CPUTYPE_H #define __ASM_CPUTYPE_H -#define ID_MIDR_EL1 "midr_el1" -#define ID_MPIDR_EL1 "mpidr_el1" -#define ID_CTR_EL0 "ctr_el0" - -#define ID_AA64PFR0_EL1 "id_aa64pfr0_el1" -#define ID_AA64DFR0_EL1 "id_aa64dfr0_el1" -#define ID_AA64AFR0_EL1 "id_aa64afr0_el1" -#define ID_AA64ISAR0_EL1 "id_aa64isar0_el1" -#define ID_AA64MMFR0_EL1 "id_aa64mmfr0_el1" - #define INVALID_HWID ULONG_MAX #define MPIDR_HWID_BITMASK 0xff00ffffff #define read_cpuid(reg) ({ \ u64 __val; \ - asm("mrs %0, " reg : "=r" (__val)); \ + asm("mrs %0, " #reg : "=r" (__val)); \ __val; \ }) @@ -51,12 +41,12 @@ */ static inline u32 __attribute_const__ read_cpuid_id(void) { - return read_cpuid(ID_MIDR_EL1); + return read_cpuid(MIDR_EL1); } static inline u64 __attribute_const__ read_cpuid_mpidr(void) { - return read_cpuid(ID_MPIDR_EL1); + return read_cpuid(MPIDR_EL1); } static inline unsigned int __attribute_const__ read_cpuid_implementor(void) @@ -71,7 +61,7 @@ static inline unsigned int __attribute_const__ read_cpuid_part_number(void) static inline u32 __attribute_const__ read_cpuid_cachetype(void) { - return read_cpuid(ID_CTR_EL0); + return read_cpuid(CTR_EL0); } #endif /* __ASSEMBLY__ */ From 8fdfc481c880ccf3e7d499c5d7313d96c500531b Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 2 Sep 2013 16:33:54 +0100 Subject: [PATCH 26/69] arm64: Remove unused cpu_name ascii in arch/arm64/mm/proc.S This string has been moved to arch/arm64/kernel/cputable.c. Signed-off-by: Catalin Marinas --- arch/arm64/mm/proc.S | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 3e5dcd9897db..25929f65b5f2 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -95,10 +95,6 @@ ENTRY(cpu_do_switch_mm) ret ENDPROC(cpu_do_switch_mm) -cpu_name: - .ascii "AArch64 Processor" - .align - .section ".text.init", #alloc, #execinstr /* From 4149e0de6d206ef915e299d4736fc5044486adff Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Wed, 17 Sep 2014 15:01:45 -0700 Subject: [PATCH 27/69] seccomp: revert previous patches in prep for updated ones This reverts the seccomp related patches committed around 2014-08-27. This allows for a cleaner cherry-pick of newly landed upstream patches. f56b1aa arm: fixup NR_syscalls to accommodate the new seccomp syscall 81ff7fa seccomp: implement SECCOMP_FILTER_FLAG_TSYNC d924727 seccomp: allow mode setting across threads 743266a seccomp: introduce writer locking 3497a88 seccomp: split filter prep from check and apply 2c6d7de MIPS: add seccomp syscall 83f1ccba ARM: add seccomp syscall a75a29b seccomp: add "seccomp" syscall 1a63bce seccomp: split mode setting routines c208e4e seccomp: extract check/assign mode helpers 6862b01 seccomp: create internal mode-setting function 1ba2ccb MAINTAINERS: create seccomp entry c2da3eb seccomp: fix memory leak on filter attach 945a225 ARM: 7888/1: seccomp: not compatible with ARM OABI Change-Id: I3f129263d68a7b3c206d79f84f7f9908d13064f6 Signed-off-by: JP Abgrall --- MAINTAINERS | 10 - arch/Kconfig | 1 - arch/arm/Kconfig | 7 +- arch/arm/include/asm/unistd.h | 2 +- arch/arm/include/uapi/asm/unistd.h | 6 - arch/arm/kernel/calls.S | 4 - arch/mips/include/uapi/asm/unistd.h | 30 +- arch/mips/kernel/scall32-o32.S | 6 - arch/mips/kernel/scall64-64.S | 4 - arch/mips/kernel/scall64-n32.S | 4 - arch/mips/kernel/scall64-o32.S | 6 +- arch/x86/syscalls/syscall_32.tbl | 5 - arch/x86/syscalls/syscall_64.tbl | 5 - fs/exec.c | 2 +- include/linux/seccomp.h | 8 +- include/linux/syscalls.h | 2 - include/uapi/asm-generic/unistd.h | 12 +- include/uapi/linux/seccomp.h | 7 - kernel/fork.c | 49 +-- kernel/seccomp.c | 454 +++++----------------------- kernel/sys_ni.c | 3 - 21 files changed, 83 insertions(+), 544 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 614483608ba4..ad7e322ad17b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7131,16 +7131,6 @@ S: Maintained F: drivers/mmc/host/sdhci.* F: drivers/mmc/host/sdhci-pltfm.[ch] -SECURE COMPUTING -M: Kees Cook -T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git seccomp -S: Supported -F: kernel/seccomp.c -F: include/uapi/linux/seccomp.h -F: include/linux/seccomp.h -K: \bsecure_computing -K: \bTIF_SECCOMP\b - SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF) M: Anton Vorontsov L: linuxppc-dev@lists.ozlabs.org diff --git a/arch/Kconfig b/arch/Kconfig index 84c94a89e75b..a4429bcd609e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -331,7 +331,6 @@ config HAVE_ARCH_SECCOMP_FILTER - secure_computing is called from a ptrace_event()-safe context - secure_computing return value is checked and a return value of -1 results in the system call being skipped immediately. - - seccomp syscall wired up config SECCOMP_FILTER def_bool y diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 0dde9b9e91c2..99887aaa04bb 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -22,7 +22,7 @@ config ARM select HAVE_AOUT select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_KGDB - select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) + select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_BPF_JIT select HAVE_C_RECORDMCOUNT @@ -1681,11 +1681,6 @@ config OABI_COMPAT in memory differs between the legacy ABI and the new ARM EABI (only for non "thumb" binaries). This option adds a tiny overhead to all syscalls and produces a slightly larger kernel. - - The seccomp filter system will not be available when this is - selected, since there is no way yet to sensibly distinguish - between calling conventions during filtering. - If you know you'll be using only pure EABI user space then you can say N here. If this option is not selected and you attempt to execute a legacy ABI binary then the result will be diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index acabef1a75df..141baa3f9a72 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -15,7 +15,7 @@ #include -#define __NR_syscalls (384) +#define __NR_syscalls (380) #define __ARM_NR_cmpxchg (__ARM_NR_BASE+0x00fff0) #define __ARCH_WANT_STAT64 diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h index bbe80a7cba0c..af33b44990ed 100644 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -406,12 +406,6 @@ #define __NR_process_vm_writev (__NR_SYSCALL_BASE+377) #define __NR_kcmp (__NR_SYSCALL_BASE+378) #define __NR_finit_module (__NR_SYSCALL_BASE+379) -/* Backporting seccomp, skip a few ... - * #define __NR_sched_setattr (__NR_SYSCALL_BASE+380) - * #define __NR_sched_getattr (__NR_SYSCALL_BASE+381) - * #define __NR_renameat2 (__NR_SYSCALL_BASE+382) - */ -#define __NR_seccomp (__NR_SYSCALL_BASE+383) /* * This may need to be greater than __NR_last_syscall+1 in order to diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 1a2e529a1340..c6ca7e376773 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -389,10 +389,6 @@ CALL(sys_process_vm_writev) CALL(sys_kcmp) CALL(sys_finit_module) -/* 380 */ CALL(sys_ni_syscall) /* CALL(sys_sched_setattr) */ - CALL(sys_ni_syscall) /* CALL(sys_sched_getattr) */ - CALL(sys_ni_syscall) /* CALL(sys_renameat2) */ - CALL(sys_seccomp) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h index af4d5c0a2f02..1dee279f9665 100644 --- a/arch/mips/include/uapi/asm/unistd.h +++ b/arch/mips/include/uapi/asm/unistd.h @@ -369,22 +369,16 @@ #define __NR_process_vm_writev (__NR_Linux + 346) #define __NR_kcmp (__NR_Linux + 347) #define __NR_finit_module (__NR_Linux + 348) -/* Backporting seccomp, skip a few ... - * #define __NR_sched_setattr (__NR_Linux + 349) - * #define __NR_sched_getattr (__NR_Linux + 350) - * #define __NR_renameat2 (__NR_Linux + 351) - */ -#define __NR_seccomp (__NR_Linux + 352) /* * Offset of the last Linux o32 flavoured syscall */ -#define __NR_Linux_syscalls 352 +#define __NR_Linux_syscalls 348 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ #define __NR_O32_Linux 4000 -#define __NR_O32_Linux_syscalls 352 +#define __NR_O32_Linux_syscalls 348 #if _MIPS_SIM == _MIPS_SIM_ABI64 @@ -701,22 +695,16 @@ #define __NR_kcmp (__NR_Linux + 306) #define __NR_finit_module (__NR_Linux + 307) #define __NR_getdents64 (__NR_Linux + 308) -/* Backporting seccomp, skip a few ... - * #define __NR_sched_setattr (__NR_Linux + 309) - * #define __NR_sched_getattr (__NR_Linux + 310) - * #define __NR_renameat2 (__NR_Linux + 311) - */ -#define __NR_seccomp (__NR_Linux + 312) /* * Offset of the last Linux 64-bit flavoured syscall */ -#define __NR_Linux_syscalls 312 +#define __NR_Linux_syscalls 308 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */ #define __NR_64_Linux 5000 -#define __NR_64_Linux_syscalls 312 +#define __NR_64_Linux_syscalls 308 #if _MIPS_SIM == _MIPS_SIM_NABI32 @@ -1037,21 +1025,15 @@ #define __NR_process_vm_writev (__NR_Linux + 310) #define __NR_kcmp (__NR_Linux + 311) #define __NR_finit_module (__NR_Linux + 312) -/* Backporting seccomp, skip a few ... - * #define __NR_sched_setattr (__NR_Linux + 313) - * #define __NR_sched_getattr (__NR_Linux + 314) - * #define __NR_renameat2 (__NR_Linux + 315) - */ -#define __NR_seccomp (__NR_Linux + 316) /* * Offset of the last N32 flavoured syscall */ -#define __NR_Linux_syscalls 316 +#define __NR_Linux_syscalls 312 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */ #define __NR_N32_Linux 6000 -#define __NR_N32_Linux_syscalls 316 +#define __NR_N32_Linux_syscalls 312 #endif /* _UAPI_ASM_UNISTD_H */ diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index bcb2184e8a47..9b36424b03c5 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -593,12 +593,6 @@ einval: li v0, -ENOSYS sys sys_process_vm_writev 6 sys sys_kcmp 5 sys sys_finit_module 3 - /* Backporting seccomp, skip a few ... */ - sys sys_ni_syscall 0 /* sys_sched_setattr */ - sys sys_ni_syscall 0 /* sys_sched_getattr */ /* 4350 */ - sys sys_ni_syscall 0 /* sys_renameat2 */ - sys sys_seccomp 3 - .endm /* We pre-compute the number of _instruction_ bytes needed to diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 285872f9d6d1..97a5909a61cf 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -424,8 +424,4 @@ sys_call_table: PTR sys_kcmp PTR sys_finit_module PTR sys_getdents64 - sys sys_ni_syscall /* sys_sched_setattr */ - sys sys_ni_syscall /* sys_sched_getattr */ /* 5310 */ - sys sys_ni_syscall /* sys_renameat2 */ - sys sys_seccomp .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index bdee1a1ed1c2..edcb6594e7b5 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -417,8 +417,4 @@ EXPORT(sysn32_call_table) PTR compat_sys_process_vm_writev /* 6310 */ PTR sys_kcmp PTR sys_finit_module - sys sys_ni_syscall /* sys_sched_setattr */ - sys sys_ni_syscall /* sys_sched_getattr */ - sys sys_ni_syscall /* sys_renameat2 */ /* 6315 */ - sys sys_seccomp .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index a1f826a24578..74f485d3c0ef 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -541,8 +541,4 @@ sys_call_table: PTR compat_sys_process_vm_writev PTR sys_kcmp PTR sys_finit_module - sys sys_ni_syscall /* sys_sched_setattr */ - sys sys_ni_syscall /* sys_sched_getattr */ /* 4350 */ - sys sys_ni_syscall /* sys_renameat2 */ - sys sys_seccomp - .size sys32_call_table,.-sys32_call_table + .size sys_call_table,.-sys_call_table diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index de6d048d0305..aabfb8380a1c 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -357,8 +357,3 @@ 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev 349 i386 kcmp sys_kcmp 350 i386 finit_module sys_finit_module -# Backporting seccomp, skip a few ... -# 351 i386 sched_setattr sys_sched_setattr -# 352 i386 sched_getattr sys_sched_getattr -# 353 i386 renameat2 sys_renameat2 -354 i386 seccomp sys_seccomp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index a40bd6eda554..38ae65dfd14f 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -320,11 +320,6 @@ 311 64 process_vm_writev sys_process_vm_writev 312 common kcmp sys_kcmp 313 common finit_module sys_finit_module -# Backporting seccomp, skip a few ... -# 314 common sched_setattr sys_sched_setattr -# 315 common sched_getattr sys_sched_getattr -# 316 common renameat2 sys_renameat2 -317 common seccomp sys_seccomp # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/fs/exec.c b/fs/exec.c index b331086ace95..ffd7a813ad3d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1220,7 +1220,7 @@ EXPORT_SYMBOL(install_exec_creds); /* * determine how safe it is to execute the proposed program * - the caller must hold ->cred_guard_mutex to protect against - * PTRACE_ATTACH or seccomp thread-sync + * PTRACE_ATTACH */ static int check_unsafe_exec(struct linux_binprm *bprm) { diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 9687691799ff..6f19cfd1840e 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -3,8 +3,6 @@ #include -#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC) - #ifdef CONFIG_SECCOMP #include @@ -16,11 +14,11 @@ struct seccomp_filter; * * @mode: indicates one of the valid values above for controlled * system calls available to a process. - * @filter: must always point to a valid seccomp-filter or NULL as it is - * accessed without locking during system call entry. + * @filter: The metadata and ruleset for determining what system calls + * are allowed for a task. * * @filter must only be accessed from the context of current as there - * is no read locking. + * is no locking. */ struct seccomp { int mode; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2a955dcc863c..4147d700a293 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -841,6 +841,4 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); -asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, - const char __user *uargs); #endif diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index b422ad5d238b..0cc74c4403e4 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -692,19 +692,9 @@ __SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \ __SYSCALL(__NR_kcmp, sys_kcmp) #define __NR_finit_module 273 __SYSCALL(__NR_finit_module, sys_finit_module) -/* Backporting seccomp, skip a few ... - * #define __NR_sched_setattr 274 -__SYSCALL(__NR_sched_setattr, sys_sched_setattr) - * #define __NR_sched_getattr 275 -__SYSCALL(__NR_sched_getattr, sys_sched_getattr) - * #define __NR_renameat2 276 -__SYSCALL(__NR_renameat2, sys_renameat2) - */ -#define __NR_seccomp 277 -__SYSCALL(__NR_seccomp, sys_seccomp) #undef __NR_syscalls -#define __NR_syscalls 278 +#define __NR_syscalls 274 /* * All syscalls below here should go away really, diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 0f238a43ff1e..ac2dc9f72973 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -10,13 +10,6 @@ #define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */ #define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */ -/* Valid operations for seccomp syscall. */ -#define SECCOMP_SET_MODE_STRICT 0 -#define SECCOMP_SET_MODE_FILTER 1 - -/* Valid flags for SECCOMP_SET_MODE_FILTER */ -#define SECCOMP_FILTER_FLAG_TSYNC 1 - /* * All BPF programs must return a 32-bit value. * The bottom 16-bits are for optional return data. diff --git a/kernel/fork.c b/kernel/fork.c index 8a3e9a91130c..41671a5d637d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -327,15 +327,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) goto free_ti; tsk->stack = ti; -#ifdef CONFIG_SECCOMP - /* - * We must handle setting up seccomp filters once we're under - * the sighand lock in case orig has changed between now and - * then. Until then, filter must be NULL to avoid messing up - * the usage counts on the error path calling free_task. - */ - tsk->seccomp.filter = NULL; -#endif setup_thread_stack(tsk, orig); clear_user_return_notifier(tsk); @@ -1111,39 +1102,6 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) p->flags = new_flags; } -static void copy_seccomp(struct task_struct *p) -{ -#ifdef CONFIG_SECCOMP - /* - * Must be called with sighand->lock held, which is common to - * all threads in the group. Holding cred_guard_mutex is not - * needed because this new task is not yet running and cannot - * be racing exec. - */ - BUG_ON(!spin_is_locked(¤t->sighand->siglock)); - - /* Ref-count the new filter user, and assign it. */ - get_seccomp_filter(current); - p->seccomp = current->seccomp; - - /* - * Explicitly enable no_new_privs here in case it got set - * between the task_struct being duplicated and holding the - * sighand lock. The seccomp state and nnp must be in sync. - */ - if (task_no_new_privs(current)) - task_set_no_new_privs(p); - - /* - * If the parent gained a seccomp mode after copying thread - * flags and between before we held the sighand lock, we have - * to manually enable the seccomp thread flag here. - */ - if (p->seccomp.mode != SECCOMP_MODE_DISABLED) - set_tsk_thread_flag(p, TIF_SECCOMP); -#endif -} - SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) { current->clear_child_tid = tidptr; @@ -1247,6 +1205,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto fork_out; ftrace_graph_init_task(p); + get_seccomp_filter(p); rt_mutex_init_task(p); @@ -1488,12 +1447,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, spin_lock(¤t->sighand->siglock); - /* - * Copy seccomp details explicitly here, in case they were changed - * before holding sighand lock. - */ - copy_seccomp(p); - /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 2d13b264d850..b7a10048a32c 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -18,17 +18,15 @@ #include #include #include -#include -#include /* #define SECCOMP_DEBUG 1 */ #ifdef CONFIG_SECCOMP_FILTER #include #include -#include #include #include +#include #include #include @@ -203,184 +201,45 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) */ static u32 seccomp_run_filters(int syscall) { - struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter); - struct seccomp_data sd; + struct seccomp_filter *f; u32 ret = SECCOMP_RET_ALLOW; /* Ensure unexpected behavior doesn't result in failing open. */ - if (unlikely(WARN_ON(f == NULL))) + if (WARN_ON(current->seccomp.filter == NULL)) return SECCOMP_RET_KILL; - /* Make sure cross-thread synced filter points somewhere sane. */ - smp_read_barrier_depends(); - - populate_seccomp_data(&sd); - /* * All filters in the list are evaluated and the lowest BPF return * value always takes priority (ignoring the DATA). */ - for (; f; f = f->prev) { - u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd); - + for (f = current->seccomp.filter; f; f = f->prev) { + u32 cur_ret = sk_run_filter(NULL, f->insns); if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) ret = cur_ret; } return ret; } -#endif /* CONFIG_SECCOMP_FILTER */ - -static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) -{ - BUG_ON(!spin_is_locked(¤t->sighand->siglock)); - - if (current->seccomp.mode && current->seccomp.mode != seccomp_mode) - return false; - - return true; -} - -static inline void seccomp_assign_mode(struct task_struct *task, - unsigned long seccomp_mode) -{ - BUG_ON(!spin_is_locked(&task->sighand->siglock)); - - task->seccomp.mode = seccomp_mode; - /* - * Make sure TIF_SECCOMP cannot be set before the mode (and - * filter) is set. - */ - smp_mb__before_atomic(); - set_tsk_thread_flag(task, TIF_SECCOMP); -} - -#ifdef CONFIG_SECCOMP_FILTER -/* Returns 1 if the parent is an ancestor of the child. */ -static int is_ancestor(struct seccomp_filter *parent, - struct seccomp_filter *child) -{ - /* NULL is the root ancestor. */ - if (parent == NULL) - return 1; - for (; child; child = child->prev) - if (child == parent) - return 1; - return 0; -} /** - * seccomp_can_sync_threads: checks if all threads can be synchronized - * - * Expects sighand and cred_guard_mutex locks to be held. - * - * Returns 0 on success, -ve on error, or the pid of a thread which was - * either not in the correct seccomp mode or it did not have an ancestral - * seccomp filter. - */ -static inline pid_t seccomp_can_sync_threads(void) -{ - struct task_struct *thread, *caller; - - BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); - BUG_ON(!spin_is_locked(¤t->sighand->siglock)); - - /* Validate all threads being eligible for synchronization. */ - caller = current; - for_each_thread(caller, thread) { - pid_t failed; - - /* Skip current, since it is initiating the sync. */ - if (thread == caller) - continue; - - if (thread->seccomp.mode == SECCOMP_MODE_DISABLED || - (thread->seccomp.mode == SECCOMP_MODE_FILTER && - is_ancestor(thread->seccomp.filter, - caller->seccomp.filter))) - continue; - - /* Return the first thread that cannot be synchronized. */ - failed = task_pid_vnr(thread); - /* If the pid cannot be resolved, then return -ESRCH */ - if (unlikely(WARN_ON(failed == 0))) - failed = -ESRCH; - return failed; - } - - return 0; -} - -/** - * seccomp_sync_threads: sets all threads to use current's filter - * - * Expects sighand and cred_guard_mutex locks to be held, and for - * seccomp_can_sync_threads() to have returned success already - * without dropping the locks. - * - */ -static inline void seccomp_sync_threads(void) -{ - struct task_struct *thread, *caller; - - BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); - BUG_ON(!spin_is_locked(¤t->sighand->siglock)); - - /* Synchronize all threads. */ - caller = current; - for_each_thread(caller, thread) { - /* Skip current, since it needs no changes. */ - if (thread == caller) - continue; - - /* Get a task reference for the new leaf node. */ - get_seccomp_filter(caller); - /* - * Drop the task reference to the shared ancestor since - * current's path will hold a reference. (This also - * allows a put before the assignment.) - */ - put_seccomp_filter(thread); - smp_store_release(&thread->seccomp.filter, - caller->seccomp.filter); - /* - * Opt the other thread into seccomp if needed. - * As threads are considered to be trust-realm - * equivalent (see ptrace_may_access), it is safe to - * allow one thread to transition the other. - */ - if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) { - /* - * Don't let an unprivileged task work around - * the no_new_privs restriction by creating - * a thread that sets it up, enters seccomp, - * then dies. - */ - if (task_no_new_privs(caller)) - task_set_no_new_privs(thread); - - seccomp_assign_mode(thread, SECCOMP_MODE_FILTER); - } - } -} - -/** - * seccomp_prepare_filter: Prepares a seccomp filter for use. + * seccomp_attach_filter: Attaches a seccomp filter to current. * @fprog: BPF program to install * - * Returns filter on success or an ERR_PTR on failure. + * Returns 0 on success or an errno on failure. */ -static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) +static long seccomp_attach_filter(struct sock_fprog *fprog) { struct seccomp_filter *filter; - unsigned long fp_size; - struct sock_filter *fp; - int new_len; + unsigned long fp_size = fprog->len * sizeof(struct sock_filter); + unsigned long total_insns = fprog->len; long ret; if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) - return ERR_PTR(-EINVAL); - BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter)); - fp_size = fprog->len * sizeof(struct sock_filter); + return -EINVAL; + + for (filter = current->seccomp.filter; filter; filter = filter->prev) + total_insns += filter->len + 4; /* include a 4 instr penalty */ + if (total_insns > MAX_INSNS_PER_PATH) + return -ENOMEM; /* * Installing a seccomp filter requires that the task have @@ -391,11 +250,15 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) if (!current->no_new_privs && security_capable_noaudit(current_cred(), current_user_ns(), CAP_SYS_ADMIN) != 0) - return ERR_PTR(-EACCES); + return -EACCES; - fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN); - if (!fp) - return ERR_PTR(-ENOMEM); + /* Allocate a new seccomp_filter */ + filter = kzalloc(sizeof(struct seccomp_filter) + fp_size, + GFP_KERNEL|__GFP_NOWARN); + if (!filter) + return -ENOMEM; + atomic_set(&filter->usage, 1); + filter->len = fprog->len; /* Copy the instructions from fprog. */ ret = -EFAULT; @@ -410,46 +273,30 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) /* Check and rewrite the fprog for seccomp use */ ret = seccomp_check_filter(filter->insns, filter->len); if (ret) - goto free_prog; + goto fail; - /* Allocate a new seccomp_filter */ - ret = -ENOMEM; - filter = kzalloc(sizeof(struct seccomp_filter) + - sizeof(struct sock_filter_int) * new_len, - GFP_KERNEL|__GFP_NOWARN); - if (!filter) - goto free_prog; - - ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len); - if (ret) - goto free_filter; - kfree(fp); - - atomic_set(&filter->usage, 1); - filter->len = new_len; - - return filter; - -free_filter_prog: - kfree(filter->prog); -free_filter: + /* + * If there is an existing filter, make it the prev and don't drop its + * task reference. + */ + filter->prev = current->seccomp.filter; + current->seccomp.filter = filter; + return 0; +fail: kfree(filter); -free_prog: - kfree(fp); - return ERR_PTR(ret); + return ret; } /** - * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog + * seccomp_attach_user_filter - attaches a user-supplied sock_fprog * @user_filter: pointer to the user data containing a sock_fprog. * * Returns 0 on success and non-zero otherwise. */ -static struct seccomp_filter * -seccomp_prepare_user_filter(const char __user *user_filter) +long seccomp_attach_user_filter(char __user *user_filter) { struct sock_fprog fprog; - struct seccomp_filter *filter = ERR_PTR(-EFAULT); + long ret = -EFAULT; #ifdef CONFIG_COMPAT if (is_compat_task()) { @@ -462,56 +309,9 @@ seccomp_prepare_user_filter(const char __user *user_filter) #endif if (copy_from_user(&fprog, user_filter, sizeof(fprog))) goto out; - filter = seccomp_prepare_filter(&fprog); + ret = seccomp_attach_filter(&fprog); out: - return filter; -} - -/** - * seccomp_attach_filter: validate and attach filter - * @flags: flags to change filter behavior - * @filter: seccomp filter to add to the current process - * - * Caller must be holding current->sighand->siglock lock. - * - * Returns 0 on success, -ve on error. - */ -static long seccomp_attach_filter(unsigned int flags, - struct seccomp_filter *filter) -{ - unsigned long total_insns; - struct seccomp_filter *walker; - - BUG_ON(!spin_is_locked(¤t->sighand->siglock)); - - /* Validate resulting filter length. */ - total_insns = filter->prog->len; - for (walker = current->seccomp.filter; walker; walker = walker->prev) - total_insns += walker->prog->len + 4; /* 4 instr penalty */ - if (total_insns > MAX_INSNS_PER_PATH) - return -ENOMEM; - - /* If thread sync has been requested, check that it is possible. */ - if (flags & SECCOMP_FILTER_FLAG_TSYNC) { - int ret; - - ret = seccomp_can_sync_threads(); - if (ret) - return ret; - } - - /* - * If there is an existing filter, make it the prev and don't drop its - * task reference. - */ - filter->prev = current->seccomp.filter; - current->seccomp.filter = filter; - - /* Now that the new filter is in place, synchronize to all threads. */ - if (flags & SECCOMP_FILTER_FLAG_TSYNC) - seccomp_sync_threads(); - - return 0; + return ret; } /* get_seccomp_filter - increments the reference count of the filter on @tsk */ @@ -524,14 +324,6 @@ void get_seccomp_filter(struct task_struct *tsk) atomic_inc(&orig->usage); } -static inline void seccomp_filter_free(struct seccomp_filter *filter) -{ - if (filter) { - sk_filter_free(filter->prog); - kfree(filter); - } -} - /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ void put_seccomp_filter(struct task_struct *tsk) { @@ -540,7 +332,7 @@ void put_seccomp_filter(struct task_struct *tsk) while (orig && atomic_dec_and_test(&orig->usage)) { struct seccomp_filter *freeme = orig; orig = orig->prev; - seccomp_filter_free(freeme); + kfree(freeme); } } @@ -584,17 +376,12 @@ static int mode1_syscalls_32[] = { int __secure_computing(int this_syscall) { + int mode = current->seccomp.mode; int exit_sig = 0; int *syscall; u32 ret; - /* - * Make sure that any changes to mode from another thread have - * been seen after TIF_SECCOMP was seen. - */ - rmb(); - - switch (current->seccomp.mode) { + switch (mode) { case SECCOMP_MODE_STRICT: syscall = mode1_syscalls; #ifdef CONFIG_COMPAT @@ -679,153 +466,48 @@ long prctl_get_seccomp(void) return current->seccomp.mode; } -/** - * seccomp_set_mode_strict: internal function for setting strict seccomp - * - * Once current->seccomp.mode is non-zero, it may not be changed. - * - * Returns 0 on success or -EINVAL on failure. - */ -static long seccomp_set_mode_strict(void) -{ - const unsigned long seccomp_mode = SECCOMP_MODE_STRICT; - long ret = -EINVAL; - - spin_lock_irq(¤t->sighand->siglock); - - if (!seccomp_may_assign_mode(seccomp_mode)) - goto out; - -#ifdef TIF_NOTSC - disable_TSC(); -#endif - seccomp_assign_mode(current, seccomp_mode); - ret = 0; - -out: - spin_unlock_irq(¤t->sighand->siglock); - - return ret; -} - -#ifdef CONFIG_SECCOMP_FILTER -/** - * seccomp_set_mode_filter: internal function for setting seccomp filter - * @flags: flags to change filter behavior - * @filter: struct sock_fprog containing filter - * - * This function may be called repeatedly to install additional filters. - * Every filter successfully installed will be evaluated (in reverse order) - * for each system call the task makes. - * - * Once current->seccomp.mode is non-zero, it may not be changed. - * - * Returns 0 on success or -EINVAL on failure. - */ -static long seccomp_set_mode_filter(unsigned int flags, - const char __user *filter) -{ - const unsigned long seccomp_mode = SECCOMP_MODE_FILTER; - struct seccomp_filter *prepared = NULL; - long ret = -EINVAL; - - /* Validate flags. */ - if (flags & ~SECCOMP_FILTER_FLAG_MASK) - return -EINVAL; - - /* Prepare the new filter before holding any locks. */ - prepared = seccomp_prepare_user_filter(filter); - if (IS_ERR(prepared)) - return PTR_ERR(prepared); - - /* - * Make sure we cannot change seccomp or nnp state via TSYNC - * while another thread is in the middle of calling exec. - */ - if (flags & SECCOMP_FILTER_FLAG_TSYNC && - mutex_lock_killable(¤t->signal->cred_guard_mutex)) - goto out_free; - - spin_lock_irq(¤t->sighand->siglock); - - if (!seccomp_may_assign_mode(seccomp_mode)) - goto out; - - ret = seccomp_attach_filter(flags, prepared); - if (ret) - goto out; - /* Do not free the successfully attached filter. */ - prepared = NULL; - - seccomp_assign_mode(current, seccomp_mode); -out: - spin_unlock_irq(¤t->sighand->siglock); - if (flags & SECCOMP_FILTER_FLAG_TSYNC) - mutex_unlock(¤t->signal->cred_guard_mutex); -out_free: - seccomp_filter_free(prepared); - return ret; -} -#else -static inline long seccomp_set_mode_filter(unsigned int flags, - const char __user *filter) -{ - return -EINVAL; -} -#endif - -/* Common entry point for both prctl and syscall. */ -static long do_seccomp(unsigned int op, unsigned int flags, - const char __user *uargs) -{ - switch (op) { - case SECCOMP_SET_MODE_STRICT: - if (flags != 0 || uargs != NULL) - return -EINVAL; - return seccomp_set_mode_strict(); - case SECCOMP_SET_MODE_FILTER: - return seccomp_set_mode_filter(flags, uargs); - default: - return -EINVAL; - } -} - -SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags, - const char __user *, uargs) -{ - return do_seccomp(op, flags, uargs); -} - /** * prctl_set_seccomp: configures current->seccomp.mode * @seccomp_mode: requested mode to use * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER * + * This function may be called repeatedly with a @seccomp_mode of + * SECCOMP_MODE_FILTER to install additional filters. Every filter + * successfully installed will be evaluated (in reverse order) for each system + * call the task makes. + * + * Once current->seccomp.mode is non-zero, it may not be changed. + * * Returns 0 on success or -EINVAL on failure. */ long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) { - unsigned int op; - char __user *uargs; + long ret = -EINVAL; + + if (current->seccomp.mode && + current->seccomp.mode != seccomp_mode) + goto out; switch (seccomp_mode) { case SECCOMP_MODE_STRICT: - op = SECCOMP_SET_MODE_STRICT; - /* - * Setting strict mode through prctl always ignored filter, - * so make sure it is always NULL here to pass the internal - * check in do_seccomp(). - */ - uargs = NULL; + ret = 0; +#ifdef TIF_NOTSC + disable_TSC(); +#endif break; +#ifdef CONFIG_SECCOMP_FILTER case SECCOMP_MODE_FILTER: - op = SECCOMP_SET_MODE_FILTER; - uargs = filter; + ret = seccomp_attach_user_filter(filter); + if (ret) + goto out; break; +#endif default: - return -EINVAL; + goto out; } - /* prctl interface doesn't have flags, so they are always zero. */ - return do_seccomp(op, 0, uargs); + current->seccomp.mode = seccomp_mode; + set_thread_flag(TIF_SECCOMP); +out: + return ret; } diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 7e7fc0a082c4..7078052284fd 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -209,6 +209,3 @@ cond_syscall(compat_sys_open_by_handle_at); /* compare kernel pointers */ cond_syscall(sys_kcmp); - -/* operate on Secure Computing state */ -cond_syscall(sys_seccomp); From f91c274ab850b627eaa7862ef1241ce5d720e03e Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Wed, 30 Apr 2014 10:51:29 +0100 Subject: [PATCH 28/69] arm64: make a single hook to syscall_trace() for all syscall features Currently syscall_trace() is called only for ptrace. With additional TIF_xx flags defined, it is now called in all the cases of audit, ftrace and seccomp in addition to ptrace. Acked-by: Richard Guy Briggs Acked-by: Will Deacon Signed-off-by: AKASHI Takahiro Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Conflicts: arch/arm64/include/asm/thread_info.h arch/arm64/kernel/entry.S Change-Id: Iee71c44c45b363194a1cc7182906c0afa6b5348b --- arch/arm64/include/asm/thread_info.h | 13 +++++++++++++ arch/arm64/kernel/entry.S | 5 +++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 5e95a6ce074a..c77b13b4e36e 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -97,6 +97,9 @@ static inline struct thread_info *current_thread_info(void) /* * thread information flags: * TIF_SYSCALL_TRACE - syscall trace active + * TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace + * TIF_SYSCALL_AUDIT - syscall auditing + * TIF_SECOMP - syscall secure computing * TIF_SIGPENDING - signal pending * TIF_NEED_RESCHED - rescheduling necessary * TIF_NOTIFY_RESUME - callback before returning to user @@ -108,6 +111,9 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ #define TIF_SYSCALL_TRACE 8 +#define TIF_SYSCALL_AUDIT 9 +#define TIF_SYSCALL_TRACEPOINT 10 +#define TIF_SECCOMP 11 #define TIF_POLLING_NRFLAG 16 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_FREEZE 19 @@ -120,10 +126,17 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) +#define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE) +#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ + _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP) + #endif /* __KERNEL__ */ #endif /* __ASM_THREAD_INFO_H */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 0b65510230bb..0c609291e963 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -642,8 +642,9 @@ el0_svc_naked: // compat entry point enable_irq get_thread_info tsk - ldr x16, [tsk, #TI_FLAGS] // check for syscall tracing - tbnz x16, #TIF_SYSCALL_TRACE, __sys_trace // are we tracing syscalls? + ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks + tst x16, #_TIF_SYSCALL_WORK + b.ne __sys_trace adr lr, ret_fast_syscall // return address cmp scno, sc_nr // check upper syscall limit b.hs ni_sys From 26108f2dc78941405736a7ab424d15f04b3d1ec3 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Wed, 30 Apr 2014 10:51:30 +0100 Subject: [PATCH 29/69] arm64: split syscall_trace() into separate functions for enter/exit As done in arm, this change makes it easy to confirm we invoke syscall related hooks, including syscall tracepoint, audit and seccomp which would be implemented later, in correct order. That is, undoing operations in the opposite order on exit that they were done on entry. Acked-by: Will Deacon Signed-off-by: AKASHI Takahiro Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 10 +++----- arch/arm64/kernel/ptrace.c | 50 ++++++++++++++++++++++---------------- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 0c609291e963..5b368d51369e 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -660,9 +660,8 @@ ENDPROC(el0_svc) * switches, and waiting for our parent to respond. */ __sys_trace: - mov x1, sp - mov w0, #0 // trace entry - bl syscall_trace + mov x0, sp + bl syscall_trace_enter adr lr, __sys_trace_return // return address uxtw scno, w0 // syscall number (possibly new) mov x1, sp // pointer to regs @@ -677,9 +676,8 @@ __sys_trace: __sys_trace_return: str x0, [sp] // save returned x0 - mov x1, sp - mov w0, #1 // trace exit - bl syscall_trace + mov x0, sp + bl syscall_trace_exit b ret_to_user /* diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 7190a6544cab..f3b14fd27ab4 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1064,35 +1064,43 @@ long arch_ptrace(struct task_struct *child, long request, return ptrace_request(child, request, addr, data); } -asmlinkage int syscall_trace(int dir, struct pt_regs *regs) +enum ptrace_syscall_dir { + PTRACE_SYSCALL_ENTER = 0, + PTRACE_SYSCALL_EXIT, +}; + +static void tracehook_report_syscall(struct pt_regs *regs, + enum ptrace_syscall_dir dir) { + int regno; unsigned long saved_reg; - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return regs->syscallno; + /* + * A scratch register (ip(r12) on AArch32, x7 on AArch64) is + * used to denote syscall entry/exit: + */ + regno = (is_compat_task() ? 12 : 7); + saved_reg = regs->regs[regno]; + regs->regs[regno] = dir; - if (is_compat_task()) { - /* AArch32 uses ip (r12) for scratch */ - saved_reg = regs->regs[12]; - regs->regs[12] = dir; - } else { - /* - * Save X7. X7 is used to denote syscall entry/exit: - * X7 = 0 -> entry, = 1 -> exit - */ - saved_reg = regs->regs[7]; - regs->regs[7] = dir; - } - - if (dir) + if (dir == PTRACE_SYSCALL_EXIT) tracehook_report_syscall_exit(regs, 0); else if (tracehook_report_syscall_entry(regs)) regs->syscallno = ~0UL; - if (is_compat_task()) - regs->regs[12] = saved_reg; - else - regs->regs[7] = saved_reg; + regs->regs[regno] = saved_reg; +} + +asmlinkage int syscall_trace_enter(struct pt_regs *regs) +{ + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); return regs->syscallno; } + +asmlinkage void syscall_trace_exit(struct pt_regs *regs) +{ + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); +} From cfc7e99e9e3900056028a7d90072e9ea0d886f8d Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Wed, 17 Sep 2014 17:59:28 -0700 Subject: [PATCH 30/69] arm64: Add __NR_* definitions for compat syscalls This patch adds __NR_* definitions to asm/unistd32.h, moves the __NR_compat_* definitions to asm/unistd.h and removes all the explicit unistd32.h includes apart from the one building the compat syscall table. The aim is to have the compat __NR_* definitions available but without colliding with the native syscall definitions (required by lib/compat_audit.c to avoid duplicating the audit header files between native and compat). Signed-off-by: Catalin Marinas Conflicts: arch/arm64/include/asm/unistd32.h arch/arm64/kernel/kuser32.S Change-Id: I8776881b5beb39769aadc4c4f14a51ea54325112 --- arch/arm64/include/asm/unistd.h | 17 + arch/arm64/include/asm/unistd32.h | 1156 +++++++++++++++++++---------- arch/arm64/kernel/entry.S | 1 - arch/arm64/kernel/kuser32.S | 3 + arch/arm64/kernel/signal32.c | 2 +- arch/arm64/kernel/sys_compat.c | 2 +- 6 files changed, 782 insertions(+), 399 deletions(-) diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 82ce217e94cf..38f8799a3d31 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -25,6 +25,23 @@ #define __ARCH_WANT_COMPAT_SYS_SENDFILE #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK + +/* + * Compat syscall numbers used by the AArch64 kernel. + */ +#define __NR_compat_restart_syscall 0 +#define __NR_compat_sigreturn 119 +#define __NR_compat_rt_sigreturn 173 + +/* + * The following SVCs are ARM private. + */ +#define __ARM_NR_COMPAT_BASE 0x0f0000 +#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2) +#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5) + +#define __NR_compat_syscalls 383 #endif + #define __ARCH_WANT_SYS_CLONE #include diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 58125bf008d3..dd336c150f3f 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -21,399 +21,763 @@ #define __SYSCALL(x, y) #endif -__SYSCALL(0, sys_restart_syscall) -__SYSCALL(1, sys_exit) -__SYSCALL(2, sys_fork) -__SYSCALL(3, sys_read) -__SYSCALL(4, sys_write) -__SYSCALL(5, compat_sys_open) -__SYSCALL(6, sys_close) -__SYSCALL(7, sys_ni_syscall) /* 7 was sys_waitpid */ -__SYSCALL(8, sys_creat) -__SYSCALL(9, sys_link) -__SYSCALL(10, sys_unlink) -__SYSCALL(11, compat_sys_execve) -__SYSCALL(12, sys_chdir) -__SYSCALL(13, sys_ni_syscall) /* 13 was sys_time */ -__SYSCALL(14, sys_mknod) -__SYSCALL(15, sys_chmod) -__SYSCALL(16, sys_lchown16) -__SYSCALL(17, sys_ni_syscall) /* 17 was sys_break */ -__SYSCALL(18, sys_ni_syscall) /* 18 was sys_stat */ -__SYSCALL(19, compat_sys_lseek) -__SYSCALL(20, sys_getpid) -__SYSCALL(21, compat_sys_mount) -__SYSCALL(22, sys_ni_syscall) /* 22 was sys_umount */ -__SYSCALL(23, sys_setuid16) -__SYSCALL(24, sys_getuid16) -__SYSCALL(25, sys_ni_syscall) /* 25 was sys_stime */ -__SYSCALL(26, compat_sys_ptrace) -__SYSCALL(27, sys_ni_syscall) /* 27 was sys_alarm */ -__SYSCALL(28, sys_ni_syscall) /* 28 was sys_fstat */ -__SYSCALL(29, sys_pause) -__SYSCALL(30, sys_ni_syscall) /* 30 was sys_utime */ -__SYSCALL(31, sys_ni_syscall) /* 31 was sys_stty */ -__SYSCALL(32, sys_ni_syscall) /* 32 was sys_gtty */ -__SYSCALL(33, sys_access) -__SYSCALL(34, sys_nice) -__SYSCALL(35, sys_ni_syscall) /* 35 was sys_ftime */ -__SYSCALL(36, sys_sync) -__SYSCALL(37, sys_kill) -__SYSCALL(38, sys_rename) -__SYSCALL(39, sys_mkdir) -__SYSCALL(40, sys_rmdir) -__SYSCALL(41, sys_dup) -__SYSCALL(42, sys_pipe) -__SYSCALL(43, compat_sys_times) -__SYSCALL(44, sys_ni_syscall) /* 44 was sys_prof */ -__SYSCALL(45, sys_brk) -__SYSCALL(46, sys_setgid16) -__SYSCALL(47, sys_getgid16) -__SYSCALL(48, sys_ni_syscall) /* 48 was sys_signal */ -__SYSCALL(49, sys_geteuid16) -__SYSCALL(50, sys_getegid16) -__SYSCALL(51, sys_acct) -__SYSCALL(52, sys_umount) -__SYSCALL(53, sys_ni_syscall) /* 53 was sys_lock */ -__SYSCALL(54, compat_sys_ioctl) -__SYSCALL(55, compat_sys_fcntl) -__SYSCALL(56, sys_ni_syscall) /* 56 was sys_mpx */ -__SYSCALL(57, sys_setpgid) -__SYSCALL(58, sys_ni_syscall) /* 58 was sys_ulimit */ -__SYSCALL(59, sys_ni_syscall) /* 59 was sys_olduname */ -__SYSCALL(60, sys_umask) -__SYSCALL(61, sys_chroot) -__SYSCALL(62, compat_sys_ustat) -__SYSCALL(63, sys_dup2) -__SYSCALL(64, sys_getppid) -__SYSCALL(65, sys_getpgrp) -__SYSCALL(66, sys_setsid) -__SYSCALL(67, compat_sys_sigaction) -__SYSCALL(68, sys_ni_syscall) /* 68 was sys_sgetmask */ -__SYSCALL(69, sys_ni_syscall) /* 69 was sys_ssetmask */ -__SYSCALL(70, sys_setreuid16) -__SYSCALL(71, sys_setregid16) -__SYSCALL(72, sys_sigsuspend) -__SYSCALL(73, compat_sys_sigpending) -__SYSCALL(74, sys_sethostname) -__SYSCALL(75, compat_sys_setrlimit) -__SYSCALL(76, sys_ni_syscall) /* 76 was compat_sys_getrlimit */ -__SYSCALL(77, compat_sys_getrusage) -__SYSCALL(78, compat_sys_gettimeofday) -__SYSCALL(79, compat_sys_settimeofday) -__SYSCALL(80, sys_getgroups16) -__SYSCALL(81, sys_setgroups16) -__SYSCALL(82, sys_ni_syscall) /* 82 was compat_sys_select */ -__SYSCALL(83, sys_symlink) -__SYSCALL(84, sys_ni_syscall) /* 84 was sys_lstat */ -__SYSCALL(85, sys_readlink) -__SYSCALL(86, sys_uselib) -__SYSCALL(87, sys_swapon) -__SYSCALL(88, sys_reboot) -__SYSCALL(89, sys_ni_syscall) /* 89 was sys_readdir */ -__SYSCALL(90, sys_ni_syscall) /* 90 was sys_mmap */ -__SYSCALL(91, sys_munmap) -__SYSCALL(92, compat_sys_truncate) -__SYSCALL(93, compat_sys_ftruncate) -__SYSCALL(94, sys_fchmod) -__SYSCALL(95, sys_fchown16) -__SYSCALL(96, sys_getpriority) -__SYSCALL(97, sys_setpriority) -__SYSCALL(98, sys_ni_syscall) /* 98 was sys_profil */ -__SYSCALL(99, compat_sys_statfs) -__SYSCALL(100, compat_sys_fstatfs) -__SYSCALL(101, sys_ni_syscall) /* 101 was sys_ioperm */ -__SYSCALL(102, sys_ni_syscall) /* 102 was sys_socketcall */ -__SYSCALL(103, sys_syslog) -__SYSCALL(104, compat_sys_setitimer) -__SYSCALL(105, compat_sys_getitimer) -__SYSCALL(106, compat_sys_newstat) -__SYSCALL(107, compat_sys_newlstat) -__SYSCALL(108, compat_sys_newfstat) -__SYSCALL(109, sys_ni_syscall) /* 109 was sys_uname */ -__SYSCALL(110, sys_ni_syscall) /* 110 was sys_iopl */ -__SYSCALL(111, sys_vhangup) -__SYSCALL(112, sys_ni_syscall) /* 112 was sys_idle */ -__SYSCALL(113, sys_ni_syscall) /* 113 was sys_syscall */ -__SYSCALL(114, compat_sys_wait4) -__SYSCALL(115, sys_swapoff) -__SYSCALL(116, compat_sys_sysinfo) -__SYSCALL(117, sys_ni_syscall) /* 117 was sys_ipc */ -__SYSCALL(118, sys_fsync) -__SYSCALL(119, compat_sys_sigreturn_wrapper) -__SYSCALL(120, sys_clone) -__SYSCALL(121, sys_setdomainname) -__SYSCALL(122, sys_newuname) -__SYSCALL(123, sys_ni_syscall) /* 123 was sys_modify_ldt */ -__SYSCALL(124, compat_sys_adjtimex) -__SYSCALL(125, sys_mprotect) -__SYSCALL(126, compat_sys_sigprocmask) -__SYSCALL(127, sys_ni_syscall) /* 127 was sys_create_module */ -__SYSCALL(128, sys_init_module) -__SYSCALL(129, sys_delete_module) -__SYSCALL(130, sys_ni_syscall) /* 130 was sys_get_kernel_syms */ -__SYSCALL(131, sys_quotactl) -__SYSCALL(132, sys_getpgid) -__SYSCALL(133, sys_fchdir) -__SYSCALL(134, sys_bdflush) -__SYSCALL(135, sys_sysfs) -__SYSCALL(136, sys_personality) -__SYSCALL(137, sys_ni_syscall) /* 137 was sys_afs_syscall */ -__SYSCALL(138, sys_setfsuid16) -__SYSCALL(139, sys_setfsgid16) -__SYSCALL(140, sys_llseek) -__SYSCALL(141, compat_sys_getdents) -__SYSCALL(142, compat_sys_select) -__SYSCALL(143, sys_flock) -__SYSCALL(144, sys_msync) -__SYSCALL(145, compat_sys_readv) -__SYSCALL(146, compat_sys_writev) -__SYSCALL(147, sys_getsid) -__SYSCALL(148, sys_fdatasync) -__SYSCALL(149, compat_sys_sysctl) -__SYSCALL(150, sys_mlock) -__SYSCALL(151, sys_munlock) -__SYSCALL(152, sys_mlockall) -__SYSCALL(153, sys_munlockall) -__SYSCALL(154, sys_sched_setparam) -__SYSCALL(155, sys_sched_getparam) -__SYSCALL(156, sys_sched_setscheduler) -__SYSCALL(157, sys_sched_getscheduler) -__SYSCALL(158, sys_sched_yield) -__SYSCALL(159, sys_sched_get_priority_max) -__SYSCALL(160, sys_sched_get_priority_min) -__SYSCALL(161, compat_sys_sched_rr_get_interval) -__SYSCALL(162, compat_sys_nanosleep) -__SYSCALL(163, sys_mremap) -__SYSCALL(164, sys_setresuid16) -__SYSCALL(165, sys_getresuid16) -__SYSCALL(166, sys_ni_syscall) /* 166 was sys_vm86 */ -__SYSCALL(167, sys_ni_syscall) /* 167 was sys_query_module */ -__SYSCALL(168, sys_poll) -__SYSCALL(169, sys_ni_syscall) -__SYSCALL(170, sys_setresgid16) -__SYSCALL(171, sys_getresgid16) -__SYSCALL(172, sys_prctl) -__SYSCALL(173, compat_sys_rt_sigreturn_wrapper) -__SYSCALL(174, compat_sys_rt_sigaction) -__SYSCALL(175, compat_sys_rt_sigprocmask) -__SYSCALL(176, compat_sys_rt_sigpending) -__SYSCALL(177, compat_sys_rt_sigtimedwait) -__SYSCALL(178, compat_sys_rt_sigqueueinfo) -__SYSCALL(179, compat_sys_rt_sigsuspend) -__SYSCALL(180, compat_sys_pread64_wrapper) -__SYSCALL(181, compat_sys_pwrite64_wrapper) -__SYSCALL(182, sys_chown16) -__SYSCALL(183, sys_getcwd) -__SYSCALL(184, sys_capget) -__SYSCALL(185, sys_capset) -__SYSCALL(186, compat_sys_sigaltstack) -__SYSCALL(187, compat_sys_sendfile) -__SYSCALL(188, sys_ni_syscall) /* 188 reserved */ -__SYSCALL(189, sys_ni_syscall) /* 189 reserved */ -__SYSCALL(190, sys_vfork) -__SYSCALL(191, compat_sys_getrlimit) /* SuS compliant getrlimit */ -__SYSCALL(192, sys_mmap_pgoff) -__SYSCALL(193, compat_sys_truncate64_wrapper) -__SYSCALL(194, compat_sys_ftruncate64_wrapper) -__SYSCALL(195, sys_stat64) -__SYSCALL(196, sys_lstat64) -__SYSCALL(197, sys_fstat64) -__SYSCALL(198, sys_lchown) -__SYSCALL(199, sys_getuid) -__SYSCALL(200, sys_getgid) -__SYSCALL(201, sys_geteuid) -__SYSCALL(202, sys_getegid) -__SYSCALL(203, sys_setreuid) -__SYSCALL(204, sys_setregid) -__SYSCALL(205, sys_getgroups) -__SYSCALL(206, sys_setgroups) -__SYSCALL(207, sys_fchown) -__SYSCALL(208, sys_setresuid) -__SYSCALL(209, sys_getresuid) -__SYSCALL(210, sys_setresgid) -__SYSCALL(211, sys_getresgid) -__SYSCALL(212, sys_chown) -__SYSCALL(213, sys_setuid) -__SYSCALL(214, sys_setgid) -__SYSCALL(215, sys_setfsuid) -__SYSCALL(216, sys_setfsgid) -__SYSCALL(217, compat_sys_getdents64) -__SYSCALL(218, sys_pivot_root) -__SYSCALL(219, sys_mincore) -__SYSCALL(220, sys_madvise) -__SYSCALL(221, compat_sys_fcntl64) -__SYSCALL(222, sys_ni_syscall) /* 222 for tux */ -__SYSCALL(223, sys_ni_syscall) /* 223 is unused */ -__SYSCALL(224, sys_gettid) -__SYSCALL(225, compat_sys_readahead_wrapper) -__SYSCALL(226, sys_setxattr) -__SYSCALL(227, sys_lsetxattr) -__SYSCALL(228, sys_fsetxattr) -__SYSCALL(229, sys_getxattr) -__SYSCALL(230, sys_lgetxattr) -__SYSCALL(231, sys_fgetxattr) -__SYSCALL(232, sys_listxattr) -__SYSCALL(233, sys_llistxattr) -__SYSCALL(234, sys_flistxattr) -__SYSCALL(235, sys_removexattr) -__SYSCALL(236, sys_lremovexattr) -__SYSCALL(237, sys_fremovexattr) -__SYSCALL(238, sys_tkill) -__SYSCALL(239, sys_sendfile64) -__SYSCALL(240, compat_sys_futex) -__SYSCALL(241, compat_sys_sched_setaffinity) -__SYSCALL(242, compat_sys_sched_getaffinity) -__SYSCALL(243, compat_sys_io_setup) -__SYSCALL(244, sys_io_destroy) -__SYSCALL(245, compat_sys_io_getevents) -__SYSCALL(246, compat_sys_io_submit) -__SYSCALL(247, sys_io_cancel) -__SYSCALL(248, sys_exit_group) -__SYSCALL(249, compat_sys_lookup_dcookie) -__SYSCALL(250, sys_epoll_create) -__SYSCALL(251, sys_epoll_ctl) -__SYSCALL(252, sys_epoll_wait) -__SYSCALL(253, sys_remap_file_pages) -__SYSCALL(254, sys_ni_syscall) /* 254 for set_thread_area */ -__SYSCALL(255, sys_ni_syscall) /* 255 for get_thread_area */ -__SYSCALL(256, sys_set_tid_address) -__SYSCALL(257, compat_sys_timer_create) -__SYSCALL(258, compat_sys_timer_settime) -__SYSCALL(259, compat_sys_timer_gettime) -__SYSCALL(260, sys_timer_getoverrun) -__SYSCALL(261, sys_timer_delete) -__SYSCALL(262, compat_sys_clock_settime) -__SYSCALL(263, compat_sys_clock_gettime) -__SYSCALL(264, compat_sys_clock_getres) -__SYSCALL(265, compat_sys_clock_nanosleep) -__SYSCALL(266, compat_sys_statfs64_wrapper) -__SYSCALL(267, compat_sys_fstatfs64_wrapper) -__SYSCALL(268, sys_tgkill) -__SYSCALL(269, compat_sys_utimes) -__SYSCALL(270, compat_sys_fadvise64_64_wrapper) -__SYSCALL(271, sys_pciconfig_iobase) -__SYSCALL(272, sys_pciconfig_read) -__SYSCALL(273, sys_pciconfig_write) -__SYSCALL(274, compat_sys_mq_open) -__SYSCALL(275, sys_mq_unlink) -__SYSCALL(276, compat_sys_mq_timedsend) -__SYSCALL(277, compat_sys_mq_timedreceive) -__SYSCALL(278, compat_sys_mq_notify) -__SYSCALL(279, compat_sys_mq_getsetattr) -__SYSCALL(280, compat_sys_waitid) -__SYSCALL(281, sys_socket) -__SYSCALL(282, sys_bind) -__SYSCALL(283, sys_connect) -__SYSCALL(284, sys_listen) -__SYSCALL(285, sys_accept) -__SYSCALL(286, sys_getsockname) -__SYSCALL(287, sys_getpeername) -__SYSCALL(288, sys_socketpair) -__SYSCALL(289, sys_send) -__SYSCALL(290, sys_sendto) -__SYSCALL(291, compat_sys_recv) -__SYSCALL(292, compat_sys_recvfrom) -__SYSCALL(293, sys_shutdown) -__SYSCALL(294, compat_sys_setsockopt) -__SYSCALL(295, compat_sys_getsockopt) -__SYSCALL(296, compat_sys_sendmsg) -__SYSCALL(297, compat_sys_recvmsg) -__SYSCALL(298, sys_semop) -__SYSCALL(299, sys_semget) -__SYSCALL(300, compat_sys_semctl) -__SYSCALL(301, compat_sys_msgsnd) -__SYSCALL(302, compat_sys_msgrcv) -__SYSCALL(303, sys_msgget) -__SYSCALL(304, compat_sys_msgctl) -__SYSCALL(305, compat_sys_shmat) -__SYSCALL(306, sys_shmdt) -__SYSCALL(307, sys_shmget) -__SYSCALL(308, compat_sys_shmctl) -__SYSCALL(309, sys_add_key) -__SYSCALL(310, sys_request_key) -__SYSCALL(311, compat_sys_keyctl) -__SYSCALL(312, compat_sys_semtimedop) -__SYSCALL(313, sys_ni_syscall) -__SYSCALL(314, sys_ioprio_set) -__SYSCALL(315, sys_ioprio_get) -__SYSCALL(316, sys_inotify_init) -__SYSCALL(317, sys_inotify_add_watch) -__SYSCALL(318, sys_inotify_rm_watch) -__SYSCALL(319, compat_sys_mbind) -__SYSCALL(320, compat_sys_get_mempolicy) -__SYSCALL(321, compat_sys_set_mempolicy) -__SYSCALL(322, compat_sys_openat) -__SYSCALL(323, sys_mkdirat) -__SYSCALL(324, sys_mknodat) -__SYSCALL(325, sys_fchownat) -__SYSCALL(326, compat_sys_futimesat) -__SYSCALL(327, sys_fstatat64) -__SYSCALL(328, sys_unlinkat) -__SYSCALL(329, sys_renameat) -__SYSCALL(330, sys_linkat) -__SYSCALL(331, sys_symlinkat) -__SYSCALL(332, sys_readlinkat) -__SYSCALL(333, sys_fchmodat) -__SYSCALL(334, sys_faccessat) -__SYSCALL(335, compat_sys_pselect6) -__SYSCALL(336, compat_sys_ppoll) -__SYSCALL(337, sys_unshare) -__SYSCALL(338, compat_sys_set_robust_list) -__SYSCALL(339, compat_sys_get_robust_list) -__SYSCALL(340, sys_splice) -__SYSCALL(341, compat_sys_sync_file_range2_wrapper) -__SYSCALL(342, sys_tee) -__SYSCALL(343, compat_sys_vmsplice) -__SYSCALL(344, compat_sys_move_pages) -__SYSCALL(345, sys_getcpu) -__SYSCALL(346, compat_sys_epoll_pwait) -__SYSCALL(347, compat_sys_kexec_load) -__SYSCALL(348, compat_sys_utimensat) -__SYSCALL(349, compat_sys_signalfd) -__SYSCALL(350, sys_timerfd_create) -__SYSCALL(351, sys_eventfd) -__SYSCALL(352, compat_sys_fallocate_wrapper) -__SYSCALL(353, compat_sys_timerfd_settime) -__SYSCALL(354, compat_sys_timerfd_gettime) -__SYSCALL(355, compat_sys_signalfd4) -__SYSCALL(356, sys_eventfd2) -__SYSCALL(357, sys_epoll_create1) -__SYSCALL(358, sys_dup3) -__SYSCALL(359, sys_pipe2) -__SYSCALL(360, sys_inotify_init1) -__SYSCALL(361, compat_sys_preadv) -__SYSCALL(362, compat_sys_pwritev) -__SYSCALL(363, compat_sys_rt_tgsigqueueinfo) -__SYSCALL(364, sys_perf_event_open) -__SYSCALL(365, compat_sys_recvmmsg) -__SYSCALL(366, sys_accept4) -__SYSCALL(367, sys_fanotify_init) -__SYSCALL(368, compat_sys_fanotify_mark) -__SYSCALL(369, sys_prlimit64) -__SYSCALL(370, sys_name_to_handle_at) -__SYSCALL(371, compat_sys_open_by_handle_at) -__SYSCALL(372, compat_sys_clock_adjtime) -__SYSCALL(373, sys_syncfs) -__SYSCALL(374, compat_sys_sendmmsg) -__SYSCALL(375, sys_setns) -__SYSCALL(376, compat_sys_process_vm_readv) -__SYSCALL(377, compat_sys_process_vm_writev) -__SYSCALL(378, sys_ni_syscall) /* 378 for kcmp */ - -#define __NR_compat_syscalls 379 - -/* - * Compat syscall numbers used by the AArch64 kernel. - */ -#define __NR_compat_restart_syscall 0 -#define __NR_compat_sigreturn 119 -#define __NR_compat_rt_sigreturn 173 - - -/* - * The following SVCs are ARM private. - */ -#define __ARM_NR_COMPAT_BASE 0x0f0000 -#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2) -#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5) +#define __NR_restart_syscall 0 +__SYSCALL(__NR_restart_syscall, sys_restart_syscall) +#define __NR_exit 1 +__SYSCALL(__NR_exit, sys_exit) +#define __NR_fork 2 +__SYSCALL(__NR_fork, sys_fork) +#define __NR_read 3 +__SYSCALL(__NR_read, sys_read) +#define __NR_write 4 +__SYSCALL(__NR_write, sys_write) +#define __NR_open 5 +__SYSCALL(__NR_open, compat_sys_open) +#define __NR_close 6 +__SYSCALL(__NR_close, sys_close) + /* 7 was sys_waitpid */ +__SYSCALL(7, sys_ni_syscall) +#define __NR_creat 8 +__SYSCALL(__NR_creat, sys_creat) +#define __NR_link 9 +__SYSCALL(__NR_link, sys_link) +#define __NR_unlink 10 +__SYSCALL(__NR_unlink, sys_unlink) +#define __NR_execve 11 +__SYSCALL(__NR_execve, compat_sys_execve) +#define __NR_chdir 12 +__SYSCALL(__NR_chdir, sys_chdir) + /* 13 was sys_time */ +__SYSCALL(13, sys_ni_syscall) +#define __NR_mknod 14 +__SYSCALL(__NR_mknod, sys_mknod) +#define __NR_chmod 15 +__SYSCALL(__NR_chmod, sys_chmod) +#define __NR_lchown 16 +__SYSCALL(__NR_lchown, sys_lchown16) + /* 17 was sys_break */ +__SYSCALL(17, sys_ni_syscall) + /* 18 was sys_stat */ +__SYSCALL(18, sys_ni_syscall) +#define __NR_lseek 19 +__SYSCALL(__NR_lseek, compat_sys_lseek) +#define __NR_getpid 20 +__SYSCALL(__NR_getpid, sys_getpid) +#define __NR_mount 21 +__SYSCALL(__NR_mount, compat_sys_mount) + /* 22 was sys_umount */ +__SYSCALL(22, sys_ni_syscall) +#define __NR_setuid 23 +__SYSCALL(__NR_setuid, sys_setuid16) +#define __NR_getuid 24 +__SYSCALL(__NR_getuid, sys_getuid16) + /* 25 was sys_stime */ +__SYSCALL(25, sys_ni_syscall) +#define __NR_ptrace 26 +__SYSCALL(__NR_ptrace, compat_sys_ptrace) + /* 27 was sys_alarm */ +__SYSCALL(27, sys_ni_syscall) + /* 28 was sys_fstat */ +__SYSCALL(28, sys_ni_syscall) +#define __NR_pause 29 +__SYSCALL(__NR_pause, sys_pause) + /* 30 was sys_utime */ +__SYSCALL(30, sys_ni_syscall) + /* 31 was sys_stty */ +__SYSCALL(31, sys_ni_syscall) + /* 32 was sys_gtty */ +__SYSCALL(32, sys_ni_syscall) +#define __NR_access 33 +__SYSCALL(__NR_access, sys_access) +#define __NR_nice 34 +__SYSCALL(__NR_nice, sys_nice) + /* 35 was sys_ftime */ +__SYSCALL(35, sys_ni_syscall) +#define __NR_sync 36 +__SYSCALL(__NR_sync, sys_sync) +#define __NR_kill 37 +__SYSCALL(__NR_kill, sys_kill) +#define __NR_rename 38 +__SYSCALL(__NR_rename, sys_rename) +#define __NR_mkdir 39 +__SYSCALL(__NR_mkdir, sys_mkdir) +#define __NR_rmdir 40 +__SYSCALL(__NR_rmdir, sys_rmdir) +#define __NR_dup 41 +__SYSCALL(__NR_dup, sys_dup) +#define __NR_pipe 42 +__SYSCALL(__NR_pipe, sys_pipe) +#define __NR_times 43 +__SYSCALL(__NR_times, compat_sys_times) + /* 44 was sys_prof */ +__SYSCALL(44, sys_ni_syscall) +#define __NR_brk 45 +__SYSCALL(__NR_brk, sys_brk) +#define __NR_setgid 46 +__SYSCALL(__NR_setgid, sys_setgid16) +#define __NR_getgid 47 +__SYSCALL(__NR_getgid, sys_getgid16) + /* 48 was sys_signal */ +__SYSCALL(48, sys_ni_syscall) +#define __NR_geteuid 49 +__SYSCALL(__NR_geteuid, sys_geteuid16) +#define __NR_getegid 50 +__SYSCALL(__NR_getegid, sys_getegid16) +#define __NR_acct 51 +__SYSCALL(__NR_acct, sys_acct) +#define __NR_umount2 52 +__SYSCALL(__NR_umount2, sys_umount) + /* 53 was sys_lock */ +__SYSCALL(53, sys_ni_syscall) +#define __NR_ioctl 54 +__SYSCALL(__NR_ioctl, compat_sys_ioctl) +#define __NR_fcntl 55 +__SYSCALL(__NR_fcntl, compat_sys_fcntl) + /* 56 was sys_mpx */ +__SYSCALL(56, sys_ni_syscall) +#define __NR_setpgid 57 +__SYSCALL(__NR_setpgid, sys_setpgid) + /* 58 was sys_ulimit */ +__SYSCALL(58, sys_ni_syscall) + /* 59 was sys_olduname */ +__SYSCALL(59, sys_ni_syscall) +#define __NR_umask 60 +__SYSCALL(__NR_umask, sys_umask) +#define __NR_chroot 61 +__SYSCALL(__NR_chroot, sys_chroot) +#define __NR_ustat 62 +__SYSCALL(__NR_ustat, compat_sys_ustat) +#define __NR_dup2 63 +__SYSCALL(__NR_dup2, sys_dup2) +#define __NR_getppid 64 +__SYSCALL(__NR_getppid, sys_getppid) +#define __NR_getpgrp 65 +__SYSCALL(__NR_getpgrp, sys_getpgrp) +#define __NR_setsid 66 +__SYSCALL(__NR_setsid, sys_setsid) +#define __NR_sigaction 67 +__SYSCALL(__NR_sigaction, compat_sys_sigaction) + /* 68 was sys_sgetmask */ +__SYSCALL(68, sys_ni_syscall) + /* 69 was sys_ssetmask */ +__SYSCALL(69, sys_ni_syscall) +#define __NR_setreuid 70 +__SYSCALL(__NR_setreuid, sys_setreuid16) +#define __NR_setregid 71 +__SYSCALL(__NR_setregid, sys_setregid16) +#define __NR_sigsuspend 72 +__SYSCALL(__NR_sigsuspend, sys_sigsuspend) +#define __NR_sigpending 73 +__SYSCALL(__NR_sigpending, compat_sys_sigpending) +#define __NR_sethostname 74 +__SYSCALL(__NR_sethostname, sys_sethostname) +#define __NR_setrlimit 75 +__SYSCALL(__NR_setrlimit, compat_sys_setrlimit) + /* 76 was compat_sys_getrlimit */ +__SYSCALL(76, sys_ni_syscall) +#define __NR_getrusage 77 +__SYSCALL(__NR_getrusage, compat_sys_getrusage) +#define __NR_gettimeofday 78 +__SYSCALL(__NR_gettimeofday, compat_sys_gettimeofday) +#define __NR_settimeofday 79 +__SYSCALL(__NR_settimeofday, compat_sys_settimeofday) +#define __NR_getgroups 80 +__SYSCALL(__NR_getgroups, sys_getgroups16) +#define __NR_setgroups 81 +__SYSCALL(__NR_setgroups, sys_setgroups16) + /* 82 was compat_sys_select */ +__SYSCALL(82, sys_ni_syscall) +#define __NR_symlink 83 +__SYSCALL(__NR_symlink, sys_symlink) + /* 84 was sys_lstat */ +__SYSCALL(84, sys_ni_syscall) +#define __NR_readlink 85 +__SYSCALL(__NR_readlink, sys_readlink) +#define __NR_uselib 86 +__SYSCALL(__NR_uselib, sys_uselib) +#define __NR_swapon 87 +__SYSCALL(__NR_swapon, sys_swapon) +#define __NR_reboot 88 +__SYSCALL(__NR_reboot, sys_reboot) + /* 89 was sys_readdir */ +__SYSCALL(89, sys_ni_syscall) + /* 90 was sys_mmap */ +__SYSCALL(90, sys_ni_syscall) +#define __NR_munmap 91 +__SYSCALL(__NR_munmap, sys_munmap) +#define __NR_truncate 92 +__SYSCALL(__NR_truncate, compat_sys_truncate) +#define __NR_ftruncate 93 +__SYSCALL(__NR_ftruncate, compat_sys_ftruncate) +#define __NR_fchmod 94 +__SYSCALL(__NR_fchmod, sys_fchmod) +#define __NR_fchown 95 +__SYSCALL(__NR_fchown, sys_fchown16) +#define __NR_getpriority 96 +__SYSCALL(__NR_getpriority, sys_getpriority) +#define __NR_setpriority 97 +__SYSCALL(__NR_setpriority, sys_setpriority) + /* 98 was sys_profil */ +__SYSCALL(98, sys_ni_syscall) +#define __NR_statfs 99 +__SYSCALL(__NR_statfs, compat_sys_statfs) +#define __NR_fstatfs 100 +__SYSCALL(__NR_fstatfs, compat_sys_fstatfs) + /* 101 was sys_ioperm */ +__SYSCALL(101, sys_ni_syscall) + /* 102 was sys_socketcall */ +__SYSCALL(102, sys_ni_syscall) +#define __NR_syslog 103 +__SYSCALL(__NR_syslog, sys_syslog) +#define __NR_setitimer 104 +__SYSCALL(__NR_setitimer, compat_sys_setitimer) +#define __NR_getitimer 105 +__SYSCALL(__NR_getitimer, compat_sys_getitimer) +#define __NR_stat 106 +__SYSCALL(__NR_stat, compat_sys_newstat) +#define __NR_lstat 107 +__SYSCALL(__NR_lstat, compat_sys_newlstat) +#define __NR_fstat 108 +__SYSCALL(__NR_fstat, compat_sys_newfstat) + /* 109 was sys_uname */ +__SYSCALL(109, sys_ni_syscall) + /* 110 was sys_iopl */ +__SYSCALL(110, sys_ni_syscall) +#define __NR_vhangup 111 +__SYSCALL(__NR_vhangup, sys_vhangup) + /* 112 was sys_idle */ +__SYSCALL(112, sys_ni_syscall) + /* 113 was sys_syscall */ +__SYSCALL(113, sys_ni_syscall) +#define __NR_wait4 114 +__SYSCALL(__NR_wait4, compat_sys_wait4) +#define __NR_swapoff 115 +__SYSCALL(__NR_swapoff, sys_swapoff) +#define __NR_sysinfo 116 +__SYSCALL(__NR_sysinfo, compat_sys_sysinfo) + /* 117 was sys_ipc */ +__SYSCALL(117, sys_ni_syscall) +#define __NR_fsync 118 +__SYSCALL(__NR_fsync, sys_fsync) +#define __NR_sigreturn 119 +__SYSCALL(__NR_sigreturn, compat_sys_sigreturn_wrapper) +#define __NR_clone 120 +__SYSCALL(__NR_clone, sys_clone) +#define __NR_setdomainname 121 +__SYSCALL(__NR_setdomainname, sys_setdomainname) +#define __NR_uname 122 +__SYSCALL(__NR_uname, sys_newuname) + /* 123 was sys_modify_ldt */ +__SYSCALL(123, sys_ni_syscall) +#define __NR_adjtimex 124 +__SYSCALL(__NR_adjtimex, compat_sys_adjtimex) +#define __NR_mprotect 125 +__SYSCALL(__NR_mprotect, sys_mprotect) +#define __NR_sigprocmask 126 +__SYSCALL(__NR_sigprocmask, compat_sys_sigprocmask) + /* 127 was sys_create_module */ +__SYSCALL(127, sys_ni_syscall) +#define __NR_init_module 128 +__SYSCALL(__NR_init_module, sys_init_module) +#define __NR_delete_module 129 +__SYSCALL(__NR_delete_module, sys_delete_module) + /* 130 was sys_get_kernel_syms */ +__SYSCALL(130, sys_ni_syscall) +#define __NR_quotactl 131 +__SYSCALL(__NR_quotactl, sys_quotactl) +#define __NR_getpgid 132 +__SYSCALL(__NR_getpgid, sys_getpgid) +#define __NR_fchdir 133 +__SYSCALL(__NR_fchdir, sys_fchdir) +#define __NR_bdflush 134 +__SYSCALL(__NR_bdflush, sys_bdflush) +#define __NR_sysfs 135 +__SYSCALL(__NR_sysfs, sys_sysfs) +#define __NR_personality 136 +__SYSCALL(__NR_personality, sys_personality) + /* 137 was sys_afs_syscall */ +__SYSCALL(137, sys_ni_syscall) +#define __NR_setfsuid 138 +__SYSCALL(__NR_setfsuid, sys_setfsuid16) +#define __NR_setfsgid 139 +__SYSCALL(__NR_setfsgid, sys_setfsgid16) +#define __NR__llseek 140 +__SYSCALL(__NR__llseek, sys_llseek) +#define __NR_getdents 141 +__SYSCALL(__NR_getdents, compat_sys_getdents) +#define __NR__newselect 142 +__SYSCALL(__NR__newselect, compat_sys_select) +#define __NR_flock 143 +__SYSCALL(__NR_flock, sys_flock) +#define __NR_msync 144 +__SYSCALL(__NR_msync, sys_msync) +#define __NR_readv 145 +__SYSCALL(__NR_readv, compat_sys_readv) +#define __NR_writev 146 +__SYSCALL(__NR_writev, compat_sys_writev) +#define __NR_getsid 147 +__SYSCALL(__NR_getsid, sys_getsid) +#define __NR_fdatasync 148 +__SYSCALL(__NR_fdatasync, sys_fdatasync) +#define __NR__sysctl 149 +__SYSCALL(__NR__sysctl, compat_sys_sysctl) +#define __NR_mlock 150 +__SYSCALL(__NR_mlock, sys_mlock) +#define __NR_munlock 151 +__SYSCALL(__NR_munlock, sys_munlock) +#define __NR_mlockall 152 +__SYSCALL(__NR_mlockall, sys_mlockall) +#define __NR_munlockall 153 +__SYSCALL(__NR_munlockall, sys_munlockall) +#define __NR_sched_setparam 154 +__SYSCALL(__NR_sched_setparam, sys_sched_setparam) +#define __NR_sched_getparam 155 +__SYSCALL(__NR_sched_getparam, sys_sched_getparam) +#define __NR_sched_setscheduler 156 +__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler) +#define __NR_sched_getscheduler 157 +__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler) +#define __NR_sched_yield 158 +__SYSCALL(__NR_sched_yield, sys_sched_yield) +#define __NR_sched_get_priority_max 159 +__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) +#define __NR_sched_get_priority_min 160 +__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) +#define __NR_sched_rr_get_interval 161 +__SYSCALL(__NR_sched_rr_get_interval, compat_sys_sched_rr_get_interval) +#define __NR_nanosleep 162 +__SYSCALL(__NR_nanosleep, compat_sys_nanosleep) +#define __NR_mremap 163 +__SYSCALL(__NR_mremap, sys_mremap) +#define __NR_setresuid 164 +__SYSCALL(__NR_setresuid, sys_setresuid16) +#define __NR_getresuid 165 +__SYSCALL(__NR_getresuid, sys_getresuid16) + /* 166 was sys_vm86 */ +__SYSCALL(166, sys_ni_syscall) + /* 167 was sys_query_module */ +__SYSCALL(167, sys_ni_syscall) +#define __NR_poll 168 +__SYSCALL(__NR_poll, sys_poll) +#define __NR_nfsservctl 169 +__SYSCALL(__NR_nfsservctl, sys_ni_syscall) +#define __NR_setresgid 170 +__SYSCALL(__NR_setresgid, sys_setresgid16) +#define __NR_getresgid 171 +__SYSCALL(__NR_getresgid, sys_getresgid16) +#define __NR_prctl 172 +__SYSCALL(__NR_prctl, sys_prctl) +#define __NR_rt_sigreturn 173 +__SYSCALL(__NR_rt_sigreturn, compat_sys_rt_sigreturn_wrapper) +#define __NR_rt_sigaction 174 +__SYSCALL(__NR_rt_sigaction, compat_sys_rt_sigaction) +#define __NR_rt_sigprocmask 175 +__SYSCALL(__NR_rt_sigprocmask, compat_sys_rt_sigprocmask) +#define __NR_rt_sigpending 176 +__SYSCALL(__NR_rt_sigpending, compat_sys_rt_sigpending) +#define __NR_rt_sigtimedwait 177 +__SYSCALL(__NR_rt_sigtimedwait, compat_sys_rt_sigtimedwait) +#define __NR_rt_sigqueueinfo 178 +__SYSCALL(__NR_rt_sigqueueinfo, compat_sys_rt_sigqueueinfo) +#define __NR_rt_sigsuspend 179 +__SYSCALL(__NR_rt_sigsuspend, compat_sys_rt_sigsuspend) +#define __NR_pread64 180 +__SYSCALL(__NR_pread64, compat_sys_pread64_wrapper) +#define __NR_pwrite64 181 +__SYSCALL(__NR_pwrite64, compat_sys_pwrite64_wrapper) +#define __NR_chown 182 +__SYSCALL(__NR_chown, sys_chown16) +#define __NR_getcwd 183 +__SYSCALL(__NR_getcwd, sys_getcwd) +#define __NR_capget 184 +__SYSCALL(__NR_capget, sys_capget) +#define __NR_capset 185 +__SYSCALL(__NR_capset, sys_capset) +#define __NR_sigaltstack 186 +__SYSCALL(__NR_sigaltstack, compat_sys_sigaltstack) +#define __NR_sendfile 187 +__SYSCALL(__NR_sendfile, compat_sys_sendfile) + /* 188 reserved */ +__SYSCALL(188, sys_ni_syscall) + /* 189 reserved */ +__SYSCALL(189, sys_ni_syscall) +#define __NR_vfork 190 +__SYSCALL(__NR_vfork, sys_vfork) +#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ +__SYSCALL(__NR_ugetrlimit, compat_sys_getrlimit) /* SuS compliant getrlimit */ +#define __NR_mmap2 192 +__SYSCALL(__NR_mmap2, sys_mmap_pgoff) +#define __NR_truncate64 193 +__SYSCALL(__NR_truncate64, compat_sys_truncate64_wrapper) +#define __NR_ftruncate64 194 +__SYSCALL(__NR_ftruncate64, compat_sys_ftruncate64_wrapper) +#define __NR_stat64 195 +__SYSCALL(__NR_stat64, sys_stat64) +#define __NR_lstat64 196 +__SYSCALL(__NR_lstat64, sys_lstat64) +#define __NR_fstat64 197 +__SYSCALL(__NR_fstat64, sys_fstat64) +#define __NR_lchown32 198 +__SYSCALL(__NR_lchown32, sys_lchown) +#define __NR_getuid32 199 +__SYSCALL(__NR_getuid32, sys_getuid) +#define __NR_getgid32 200 +__SYSCALL(__NR_getgid32, sys_getgid) +#define __NR_geteuid32 201 +__SYSCALL(__NR_geteuid32, sys_geteuid) +#define __NR_getegid32 202 +__SYSCALL(__NR_getegid32, sys_getegid) +#define __NR_setreuid32 203 +__SYSCALL(__NR_setreuid32, sys_setreuid) +#define __NR_setregid32 204 +__SYSCALL(__NR_setregid32, sys_setregid) +#define __NR_getgroups32 205 +__SYSCALL(__NR_getgroups32, sys_getgroups) +#define __NR_setgroups32 206 +__SYSCALL(__NR_setgroups32, sys_setgroups) +#define __NR_fchown32 207 +__SYSCALL(__NR_fchown32, sys_fchown) +#define __NR_setresuid32 208 +__SYSCALL(__NR_setresuid32, sys_setresuid) +#define __NR_getresuid32 209 +__SYSCALL(__NR_getresuid32, sys_getresuid) +#define __NR_setresgid32 210 +__SYSCALL(__NR_setresgid32, sys_setresgid) +#define __NR_getresgid32 211 +__SYSCALL(__NR_getresgid32, sys_getresgid) +#define __NR_chown32 212 +__SYSCALL(__NR_chown32, sys_chown) +#define __NR_setuid32 213 +__SYSCALL(__NR_setuid32, sys_setuid) +#define __NR_setgid32 214 +__SYSCALL(__NR_setgid32, sys_setgid) +#define __NR_setfsuid32 215 +__SYSCALL(__NR_setfsuid32, sys_setfsuid) +#define __NR_setfsgid32 216 +__SYSCALL(__NR_setfsgid32, sys_setfsgid) +#define __NR_getdents64 217 +__SYSCALL(__NR_getdents64, compat_sys_getdents64) +#define __NR_pivot_root 218 +__SYSCALL(__NR_pivot_root, sys_pivot_root) +#define __NR_mincore 219 +__SYSCALL(__NR_mincore, sys_mincore) +#define __NR_madvise 220 +__SYSCALL(__NR_madvise, sys_madvise) +#define __NR_fcntl64 221 +__SYSCALL(__NR_fcntl64, compat_sys_fcntl64) + /* 222 for tux */ +__SYSCALL(222, sys_ni_syscall) + /* 223 is unused */ +__SYSCALL(223, sys_ni_syscall) +#define __NR_gettid 224 +__SYSCALL(__NR_gettid, sys_gettid) +#define __NR_readahead 225 +__SYSCALL(__NR_readahead, compat_sys_readahead_wrapper) +#define __NR_setxattr 226 +__SYSCALL(__NR_setxattr, sys_setxattr) +#define __NR_lsetxattr 227 +__SYSCALL(__NR_lsetxattr, sys_lsetxattr) +#define __NR_fsetxattr 228 +__SYSCALL(__NR_fsetxattr, sys_fsetxattr) +#define __NR_getxattr 229 +__SYSCALL(__NR_getxattr, sys_getxattr) +#define __NR_lgetxattr 230 +__SYSCALL(__NR_lgetxattr, sys_lgetxattr) +#define __NR_fgetxattr 231 +__SYSCALL(__NR_fgetxattr, sys_fgetxattr) +#define __NR_listxattr 232 +__SYSCALL(__NR_listxattr, sys_listxattr) +#define __NR_llistxattr 233 +__SYSCALL(__NR_llistxattr, sys_llistxattr) +#define __NR_flistxattr 234 +__SYSCALL(__NR_flistxattr, sys_flistxattr) +#define __NR_removexattr 235 +__SYSCALL(__NR_removexattr, sys_removexattr) +#define __NR_lremovexattr 236 +__SYSCALL(__NR_lremovexattr, sys_lremovexattr) +#define __NR_fremovexattr 237 +__SYSCALL(__NR_fremovexattr, sys_fremovexattr) +#define __NR_tkill 238 +__SYSCALL(__NR_tkill, sys_tkill) +#define __NR_sendfile64 239 +__SYSCALL(__NR_sendfile64, sys_sendfile64) +#define __NR_futex 240 +__SYSCALL(__NR_futex, compat_sys_futex) +#define __NR_sched_setaffinity 241 +__SYSCALL(__NR_sched_setaffinity, compat_sys_sched_setaffinity) +#define __NR_sched_getaffinity 242 +__SYSCALL(__NR_sched_getaffinity, compat_sys_sched_getaffinity) +#define __NR_io_setup 243 +__SYSCALL(__NR_io_setup, compat_sys_io_setup) +#define __NR_io_destroy 244 +__SYSCALL(__NR_io_destroy, sys_io_destroy) +#define __NR_io_getevents 245 +__SYSCALL(__NR_io_getevents, compat_sys_io_getevents) +#define __NR_io_submit 246 +__SYSCALL(__NR_io_submit, compat_sys_io_submit) +#define __NR_io_cancel 247 +__SYSCALL(__NR_io_cancel, sys_io_cancel) +#define __NR_exit_group 248 +__SYSCALL(__NR_exit_group, sys_exit_group) +#define __NR_lookup_dcookie 249 +__SYSCALL(__NR_lookup_dcookie, compat_sys_lookup_dcookie) +#define __NR_epoll_create 250 +__SYSCALL(__NR_epoll_create, sys_epoll_create) +#define __NR_epoll_ctl 251 +__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl) +#define __NR_epoll_wait 252 +__SYSCALL(__NR_epoll_wait, sys_epoll_wait) +#define __NR_remap_file_pages 253 +__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) + /* 254 for set_thread_area */ +__SYSCALL(254, sys_ni_syscall) + /* 255 for get_thread_area */ +__SYSCALL(255, sys_ni_syscall) +#define __NR_set_tid_address 256 +__SYSCALL(__NR_set_tid_address, sys_set_tid_address) +#define __NR_timer_create 257 +__SYSCALL(__NR_timer_create, compat_sys_timer_create) +#define __NR_timer_settime 258 +__SYSCALL(__NR_timer_settime, compat_sys_timer_settime) +#define __NR_timer_gettime 259 +__SYSCALL(__NR_timer_gettime, compat_sys_timer_gettime) +#define __NR_timer_getoverrun 260 +__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) +#define __NR_timer_delete 261 +__SYSCALL(__NR_timer_delete, sys_timer_delete) +#define __NR_clock_settime 262 +__SYSCALL(__NR_clock_settime, compat_sys_clock_settime) +#define __NR_clock_gettime 263 +__SYSCALL(__NR_clock_gettime, compat_sys_clock_gettime) +#define __NR_clock_getres 264 +__SYSCALL(__NR_clock_getres, compat_sys_clock_getres) +#define __NR_clock_nanosleep 265 +__SYSCALL(__NR_clock_nanosleep, compat_sys_clock_nanosleep) +#define __NR_statfs64 266 +__SYSCALL(__NR_statfs64, compat_sys_statfs64_wrapper) +#define __NR_fstatfs64 267 +__SYSCALL(__NR_fstatfs64, compat_sys_fstatfs64_wrapper) +#define __NR_tgkill 268 +__SYSCALL(__NR_tgkill, sys_tgkill) +#define __NR_utimes 269 +__SYSCALL(__NR_utimes, compat_sys_utimes) +#define __NR_arm_fadvise64_64 270 +__SYSCALL(__NR_arm_fadvise64_64, compat_sys_fadvise64_64_wrapper) +#define __NR_pciconfig_iobase 271 +__SYSCALL(__NR_pciconfig_iobase, sys_pciconfig_iobase) +#define __NR_pciconfig_read 272 +__SYSCALL(__NR_pciconfig_read, sys_pciconfig_read) +#define __NR_pciconfig_write 273 +__SYSCALL(__NR_pciconfig_write, sys_pciconfig_write) +#define __NR_mq_open 274 +__SYSCALL(__NR_mq_open, compat_sys_mq_open) +#define __NR_mq_unlink 275 +__SYSCALL(__NR_mq_unlink, sys_mq_unlink) +#define __NR_mq_timedsend 276 +__SYSCALL(__NR_mq_timedsend, compat_sys_mq_timedsend) +#define __NR_mq_timedreceive 277 +__SYSCALL(__NR_mq_timedreceive, compat_sys_mq_timedreceive) +#define __NR_mq_notify 278 +__SYSCALL(__NR_mq_notify, compat_sys_mq_notify) +#define __NR_mq_getsetattr 279 +__SYSCALL(__NR_mq_getsetattr, compat_sys_mq_getsetattr) +#define __NR_waitid 280 +__SYSCALL(__NR_waitid, compat_sys_waitid) +#define __NR_socket 281 +__SYSCALL(__NR_socket, sys_socket) +#define __NR_bind 282 +__SYSCALL(__NR_bind, sys_bind) +#define __NR_connect 283 +__SYSCALL(__NR_connect, sys_connect) +#define __NR_listen 284 +__SYSCALL(__NR_listen, sys_listen) +#define __NR_accept 285 +__SYSCALL(__NR_accept, sys_accept) +#define __NR_getsockname 286 +__SYSCALL(__NR_getsockname, sys_getsockname) +#define __NR_getpeername 287 +__SYSCALL(__NR_getpeername, sys_getpeername) +#define __NR_socketpair 288 +__SYSCALL(__NR_socketpair, sys_socketpair) +#define __NR_send 289 +__SYSCALL(__NR_send, sys_send) +#define __NR_sendto 290 +__SYSCALL(__NR_sendto, sys_sendto) +#define __NR_recv 291 +__SYSCALL(__NR_recv, compat_sys_recv) +#define __NR_recvfrom 292 +__SYSCALL(__NR_recvfrom, compat_sys_recvfrom) +#define __NR_shutdown 293 +__SYSCALL(__NR_shutdown, sys_shutdown) +#define __NR_setsockopt 294 +__SYSCALL(__NR_setsockopt, compat_sys_setsockopt) +#define __NR_getsockopt 295 +__SYSCALL(__NR_getsockopt, compat_sys_getsockopt) +#define __NR_sendmsg 296 +__SYSCALL(__NR_sendmsg, compat_sys_sendmsg) +#define __NR_recvmsg 297 +__SYSCALL(__NR_recvmsg, compat_sys_recvmsg) +#define __NR_semop 298 +__SYSCALL(__NR_semop, sys_semop) +#define __NR_semget 299 +__SYSCALL(__NR_semget, sys_semget) +#define __NR_semctl 300 +__SYSCALL(__NR_semctl, compat_sys_semctl) +#define __NR_msgsnd 301 +__SYSCALL(__NR_msgsnd, compat_sys_msgsnd) +#define __NR_msgrcv 302 +__SYSCALL(__NR_msgrcv, compat_sys_msgrcv) +#define __NR_msgget 303 +__SYSCALL(__NR_msgget, sys_msgget) +#define __NR_msgctl 304 +__SYSCALL(__NR_msgctl, compat_sys_msgctl) +#define __NR_shmat 305 +__SYSCALL(__NR_shmat, compat_sys_shmat) +#define __NR_shmdt 306 +__SYSCALL(__NR_shmdt, sys_shmdt) +#define __NR_shmget 307 +__SYSCALL(__NR_shmget, sys_shmget) +#define __NR_shmctl 308 +__SYSCALL(__NR_shmctl, compat_sys_shmctl) +#define __NR_add_key 309 +__SYSCALL(__NR_add_key, sys_add_key) +#define __NR_request_key 310 +__SYSCALL(__NR_request_key, sys_request_key) +#define __NR_keyctl 311 +__SYSCALL(__NR_keyctl, compat_sys_keyctl) +#define __NR_semtimedop 312 +__SYSCALL(__NR_semtimedop, compat_sys_semtimedop) +#define __NR_vserver 313 +__SYSCALL(__NR_vserver, sys_ni_syscall) +#define __NR_ioprio_set 314 +__SYSCALL(__NR_ioprio_set, sys_ioprio_set) +#define __NR_ioprio_get 315 +__SYSCALL(__NR_ioprio_get, sys_ioprio_get) +#define __NR_inotify_init 316 +__SYSCALL(__NR_inotify_init, sys_inotify_init) +#define __NR_inotify_add_watch 317 +__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) +#define __NR_inotify_rm_watch 318 +__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) +#define __NR_mbind 319 +__SYSCALL(__NR_mbind, compat_sys_mbind) +#define __NR_get_mempolicy 320 +__SYSCALL(__NR_get_mempolicy, compat_sys_get_mempolicy) +#define __NR_set_mempolicy 321 +__SYSCALL(__NR_set_mempolicy, compat_sys_set_mempolicy) +#define __NR_openat 322 +__SYSCALL(__NR_openat, compat_sys_openat) +#define __NR_mkdirat 323 +__SYSCALL(__NR_mkdirat, sys_mkdirat) +#define __NR_mknodat 324 +__SYSCALL(__NR_mknodat, sys_mknodat) +#define __NR_fchownat 325 +__SYSCALL(__NR_fchownat, sys_fchownat) +#define __NR_futimesat 326 +__SYSCALL(__NR_futimesat, compat_sys_futimesat) +#define __NR_fstatat64 327 +__SYSCALL(__NR_fstatat64, sys_fstatat64) +#define __NR_unlinkat 328 +__SYSCALL(__NR_unlinkat, sys_unlinkat) +#define __NR_renameat 329 +__SYSCALL(__NR_renameat, sys_renameat) +#define __NR_linkat 330 +__SYSCALL(__NR_linkat, sys_linkat) +#define __NR_symlinkat 331 +__SYSCALL(__NR_symlinkat, sys_symlinkat) +#define __NR_readlinkat 332 +__SYSCALL(__NR_readlinkat, sys_readlinkat) +#define __NR_fchmodat 333 +__SYSCALL(__NR_fchmodat, sys_fchmodat) +#define __NR_faccessat 334 +__SYSCALL(__NR_faccessat, sys_faccessat) +#define __NR_pselect6 335 +__SYSCALL(__NR_pselect6, compat_sys_pselect6) +#define __NR_ppoll 336 +__SYSCALL(__NR_ppoll, compat_sys_ppoll) +#define __NR_unshare 337 +__SYSCALL(__NR_unshare, sys_unshare) +#define __NR_set_robust_list 338 +__SYSCALL(__NR_set_robust_list, compat_sys_set_robust_list) +#define __NR_get_robust_list 339 +__SYSCALL(__NR_get_robust_list, compat_sys_get_robust_list) +#define __NR_splice 340 +__SYSCALL(__NR_splice, sys_splice) +#define __NR_sync_file_range2 341 +__SYSCALL(__NR_sync_file_range2, compat_sys_sync_file_range2_wrapper) +#define __NR_tee 342 +__SYSCALL(__NR_tee, sys_tee) +#define __NR_vmsplice 343 +__SYSCALL(__NR_vmsplice, compat_sys_vmsplice) +#define __NR_move_pages 344 +__SYSCALL(__NR_move_pages, compat_sys_move_pages) +#define __NR_getcpu 345 +__SYSCALL(__NR_getcpu, sys_getcpu) +#define __NR_epoll_pwait 346 +__SYSCALL(__NR_epoll_pwait, compat_sys_epoll_pwait) +#define __NR_kexec_load 347 +__SYSCALL(__NR_kexec_load, compat_sys_kexec_load) +#define __NR_utimensat 348 +__SYSCALL(__NR_utimensat, compat_sys_utimensat) +#define __NR_signalfd 349 +__SYSCALL(__NR_signalfd, compat_sys_signalfd) +#define __NR_timerfd_create 350 +__SYSCALL(__NR_timerfd_create, sys_timerfd_create) +#define __NR_eventfd 351 +__SYSCALL(__NR_eventfd, sys_eventfd) +#define __NR_fallocate 352 +__SYSCALL(__NR_fallocate, compat_sys_fallocate_wrapper) +#define __NR_timerfd_settime 353 +__SYSCALL(__NR_timerfd_settime, compat_sys_timerfd_settime) +#define __NR_timerfd_gettime 354 +__SYSCALL(__NR_timerfd_gettime, compat_sys_timerfd_gettime) +#define __NR_signalfd4 355 +__SYSCALL(__NR_signalfd4, compat_sys_signalfd4) +#define __NR_eventfd2 356 +__SYSCALL(__NR_eventfd2, sys_eventfd2) +#define __NR_epoll_create1 357 +__SYSCALL(__NR_epoll_create1, sys_epoll_create1) +#define __NR_dup3 358 +__SYSCALL(__NR_dup3, sys_dup3) +#define __NR_pipe2 359 +__SYSCALL(__NR_pipe2, sys_pipe2) +#define __NR_inotify_init1 360 +__SYSCALL(__NR_inotify_init1, sys_inotify_init1) +#define __NR_preadv 361 +__SYSCALL(__NR_preadv, compat_sys_preadv) +#define __NR_pwritev 362 +__SYSCALL(__NR_pwritev, compat_sys_pwritev) +#define __NR_rt_tgsigqueueinfo 363 +__SYSCALL(__NR_rt_tgsigqueueinfo, compat_sys_rt_tgsigqueueinfo) +#define __NR_perf_event_open 364 +__SYSCALL(__NR_perf_event_open, sys_perf_event_open) +#define __NR_recvmmsg 365 +__SYSCALL(__NR_recvmmsg, compat_sys_recvmmsg) +#define __NR_accept4 366 +__SYSCALL(__NR_accept4, sys_accept4) +#define __NR_fanotify_init 367 +__SYSCALL(__NR_fanotify_init, sys_fanotify_init) +#define __NR_fanotify_mark 368 +__SYSCALL(__NR_fanotify_mark, compat_sys_fanotify_mark) +#define __NR_prlimit64 369 +__SYSCALL(__NR_prlimit64, sys_prlimit64) +#define __NR_name_to_handle_at 370 +__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) +#define __NR_open_by_handle_at 371 +__SYSCALL(__NR_open_by_handle_at, compat_sys_open_by_handle_at) +#define __NR_clock_adjtime 372 +__SYSCALL(__NR_clock_adjtime, compat_sys_clock_adjtime) +#define __NR_syncfs 373 +__SYSCALL(__NR_syncfs, sys_syncfs) +#define __NR_sendmmsg 374 +__SYSCALL(__NR_sendmmsg, compat_sys_sendmmsg) +#define __NR_setns 375 +__SYSCALL(__NR_setns, sys_setns) +#define __NR_process_vm_readv 376 +__SYSCALL(__NR_process_vm_readv, compat_sys_process_vm_readv) +#define __NR_process_vm_writev 377 +__SYSCALL(__NR_process_vm_writev, compat_sys_process_vm_writev) +#define __NR_kcmp 378 +__SYSCALL(__NR_kcmp, sys_kcmp) +#define __NR_finit_module 379 +__SYSCALL(__NR_finit_module, sys_finit_module) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 5b368d51369e..3c93e1a9ea8d 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -27,7 +27,6 @@ #include #include #include -#include /* * Bad Abort numbers diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 8b69ecb1d8bc..2f5b3ff7e30d 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -27,6 +27,9 @@ * * See Documentation/arm/kernel_user_helpers.txt for formal definitions. */ + +#include + .align 5 .globl __kuser_helper_start __kuser_helper_start: diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 31fbeaf4dd62..b4692837e7ab 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include struct compat_sigcontext { /* We always set these two fields to 0 */ diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 26e9c4eeaba8..de2b0226e06d 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -26,7 +26,7 @@ #include #include -#include +#include static inline void do_compat_cache_op(unsigned long start, unsigned long end, int flags) From 4f2f36a138e56efea5028970ee940ff1e2215817 Mon Sep 17 00:00:00 2001 From: Dan Aloni Date: Wed, 28 Aug 2013 14:24:53 +0100 Subject: [PATCH 31/69] Move the EM_ARM and EM_AARCH64 definitions to uapi/linux/elf-em.h Signed-off-by: Dan Aloni Signed-off-by: Catalin Marinas --- arch/arm/include/asm/elf.h | 2 -- arch/arm64/include/asm/elf.h | 3 --- include/uapi/linux/elf-em.h | 2 ++ 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index 38050b1c4800..e110a672e160 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -19,8 +19,6 @@ typedef elf_greg_t elf_gregset_t[ELF_NGREG]; typedef struct user_fp elf_fpregset_t; -#define EM_ARM 40 - #define EF_ARM_EABI_MASK 0xff000000 #define EF_ARM_EABI_UNKNOWN 0x00000000 #define EF_ARM_EABI_VER1 0x01000000 diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index fe32c0e4ac01..e7fa87f9201b 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -33,8 +33,6 @@ typedef unsigned long elf_greg_t; typedef elf_greg_t elf_gregset_t[ELF_NGREG]; typedef struct user_fpsimd_state elf_fpregset_t; -#define EM_AARCH64 183 - /* * AArch64 static relocation types. */ @@ -151,7 +149,6 @@ extern unsigned long arch_randomize_brk(struct mm_struct *mm); #define arch_randomize_brk arch_randomize_brk #ifdef CONFIG_COMPAT -#define EM_ARM 40 #define COMPAT_ELF_PLATFORM ("v8l") #define COMPAT_ELF_ET_DYN_BASE (randomize_et_dyn(2 * TASK_SIZE_32 / 3)) diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index 8e2b7bac4378..59c17a2d38ad 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -22,6 +22,7 @@ #define EM_PPC 20 /* PowerPC */ #define EM_PPC64 21 /* PowerPC64 */ #define EM_SPU 23 /* Cell BE SPU */ +#define EM_ARM 40 /* ARM 32 bit */ #define EM_SH 42 /* SuperH */ #define EM_SPARCV9 43 /* SPARC v9 64-bit */ #define EM_IA_64 50 /* HP/Intel IA-64 */ @@ -34,6 +35,7 @@ #define EM_MN10300 89 /* Panasonic/MEI MN10300, AM33 */ #define EM_BLACKFIN 106 /* ADI Blackfin Processor */ #define EM_TI_C6000 140 /* TI C6X DSPs */ +#define EM_AARCH64 183 /* ARM 64 bit */ #define EM_FRV 0x5441 /* Fujitsu FR-V */ #define EM_AVR32 0x18ad /* Atmel AVR32 */ From bf11863d45eb3dac0d0cf1f818ded11ade6e28d3 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 4 Jul 2014 08:28:30 +0100 Subject: [PATCH 32/69] arm64: Add audit support On AArch64, audit is supported through generic lib/audit.c and compat_audit.c, and so this patch adds arch specific definitions required. Acked-by Will Deacon Acked-by: Richard Guy Briggs Signed-off-by: AKASHI Takahiro Signed-off-by: Catalin Marinas Conflicts: arch/arm64/Kconfig include/uapi/linux/audit.h Change-Id: Ia6d7b25786843d43191e67d514928e3ecba11e2f --- arch/arm64/Kconfig | 2 ++ arch/arm64/include/asm/syscall.h | 14 ++++++++++++++ include/uapi/linux/audit.h | 2 ++ 3 files changed, 18 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 71c2a070ace4..2221396c2a6c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -7,6 +7,7 @@ config ARM64 select ARM_AMBA select ARM_ARCH_TIMER select ARM_GIC + select AUDIT_ARCH_COMPAT_GENERIC select CLONE_BACKWARDS select COMMON_CLK select GENERIC_CLOCKEVENTS @@ -18,6 +19,7 @@ config ARM64 select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select HARDIRQS_SW_RESEND + select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_TRACEHOOK select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 89c047f9a971..d7de9333f714 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -16,6 +16,8 @@ #ifndef __ASM_SYSCALL_H #define __ASM_SYSCALL_H +#include +#include #include @@ -98,4 +100,16 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->regs[i], args, n * sizeof(args[0])); } +/* + * We don't care about endianness (__AUDIT_ARCH_LE bit) here because + * AArch64 has the same system calls both on little- and big- endian. + */ +static inline int syscall_get_arch(void) +{ + if (is_compat_task()) + return AUDIT_ARCH_ARM; + + return AUDIT_ARCH_AARCH64; +} + #endif /* __ASM_SYSCALL_H */ diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 75cef3fd97ad..ce8750f8788a 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -324,6 +324,8 @@ enum { /* distinguish syscall tables */ #define __AUDIT_ARCH_64BIT 0x80000000 #define __AUDIT_ARCH_LE 0x40000000 + +#define AUDIT_ARCH_AARCH64 (EM_AARCH64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_ALPHA (EM_ALPHA|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_ARM (EM_ARM|__AUDIT_ARCH_LE) #define AUDIT_ARCH_ARMEB (EM_ARM) From 3e21c0bb663a23436e0eb3f61860d4fedc233bab Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Wed, 17 Sep 2014 18:18:11 -0700 Subject: [PATCH 33/69] arm64: audit: Add audit hook in syscall_trace_enter/exit() This patch adds auditing functions on entry to or exit from every system call invocation. Acked-by: Richard Guy Briggs Acked-by Will Deacon Signed-off-by: AKASHI Takahiro Signed-off-by: Catalin Marinas Conflicts: arch/arm64/kernel/ptrace.c Change-Id: I7ebff5df4acbdab56c74e584dbc5fef5d8bfc9a8 --- arch/arm64/kernel/ptrace.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index f3b14fd27ab4..ee856d9f6f64 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -19,6 +19,8 @@ * along with this program. If not, see . */ +#include +#include #include #include #include @@ -38,6 +40,7 @@ #include #include #include +#include #include #include @@ -1096,11 +1099,16 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs) if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + audit_syscall_entry(syscall_get_arch(), regs->syscallno, + regs->orig_x0, regs->regs[1], regs->regs[2], regs->regs[3]); + return regs->syscallno; } asmlinkage void syscall_trace_exit(struct pt_regs *regs) { + audit_syscall_exit(regs); + if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); } From 4b3735631fcc1056af00e382833727342f12c259 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Wed, 30 Apr 2014 10:51:31 +0100 Subject: [PATCH 34/69] arm64: Add regs_return_value() in syscall.h This macro, regs_return_value, is used mainly for audit to record system call's results, but may also be used in test_kprobes.c. Acked-by: Will Deacon Acked-by: Richard Guy Briggs Signed-off-by: AKASHI Takahiro Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/ptrace.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index fff28950e660..dbe3b00d1eb7 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -133,6 +133,11 @@ struct pt_regs { #define user_stack_pointer(regs) \ (!compat_user_mode(regs) ? (regs)->sp : (regs)->compat_sp) +static inline unsigned long regs_return_value(struct pt_regs *regs) +{ + return regs->regs[0]; +} + /* * Are the current registers suitable for user mode? (used to maintain * security in signal handlers) From ba27127b4730d9352bfda24553d1994cddd9edb3 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Wed, 30 Apr 2014 10:51:32 +0100 Subject: [PATCH 35/69] arm64: is_compat_task is defined both in asm/compat.h and linux/compat.h Some kernel files may include both linux/compat.h and asm/compat.h directly or indirectly. Since both header files contain is_compat_task() under !CONFIG_COMPAT, compiling them with !CONFIG_COMPAT will eventually fail. Such files include kernel/auditsc.c, kernel/seccomp.c and init/do_mountfs.c (do_mountfs.c may read asm/compat.h via asm/ftrace.h once ftrace is implemented). So this patch proactively 1) removes is_compat_task() under !CONFIG_COMPAT from asm/compat.h 2) replaces asm/compat.h to linux/compat.h in kernel/*.c, but asm/compat.h is still necessary in ptrace.c and process.c because they use is_compat_thread(). Acked-by: Will Deacon Signed-off-by: AKASHI Takahiro Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Conflicts: arch/arm64/kernel/hw_breakpoint.c arch/arm64/kernel/ptrace.c Change-Id: I5b8330e43ab8bdd383cd410d8223d6c1a39fa0fc --- arch/arm64/include/asm/compat.h | 5 ----- arch/arm64/kernel/hw_breakpoint.c | 2 +- arch/arm64/kernel/process.c | 1 + arch/arm64/kernel/signal.c | 2 +- 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 7058eec269ab..ae0004fe6c23 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -291,11 +291,6 @@ static inline int is_compat_thread(struct thread_info *thread) #else /* !CONFIG_COMPAT */ -static inline int is_compat_task(void) -{ - return 0; -} - static inline int is_compat_thread(struct thread_info *thread) { return 0; diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 5ab825c59db9..68e371ea1bda 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -20,13 +20,13 @@ #define pr_fmt(fmt) "hw-breakpoint: " fmt +#include #include #include #include #include #include -#include #include #include #include diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 7c11c74f7f54..977ca8aa7dd1 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -20,6 +20,7 @@ #include +#include #include #include #include diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 882f01774365..182b6fc01190 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -17,6 +17,7 @@ * along with this program. If not, see . */ +#include #include #include #include @@ -25,7 +26,6 @@ #include #include -#include #include #include #include From f884d7fae12bd203a2243883e320a0c59168deb3 Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Wed, 17 Sep 2014 19:26:43 -0700 Subject: [PATCH 36/69] arm64: Fixup __NR_* compat syscalls count. Should have gone in the cherry-pick cfc7e99e9e3900056028a7d90072e9ea0d886f8d arm64: Add __NR_* definitions for compat syscalls Change-Id: I69a69e4b1f206aad4ece1a8b06f9e23e99adcbfb --- arch/arm64/include/asm/unistd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 38f8799a3d31..a8f8f6992987 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -40,7 +40,7 @@ #define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2) #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5) -#define __NR_compat_syscalls 383 +#define __NR_compat_syscalls 378 #endif #define __ARCH_WANT_SYS_CLONE From 03c6fb711a50ed882fdafce6acf5dd92272fd0b7 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 24 Sep 2014 01:08:14 +0530 Subject: [PATCH 37/69] export tcp_nuke_addr to build ipv6 as module Otherwise we run into following build error: -----8<----- ... CC [M] net/ipv6/netfilter/ip6t_REJECT.o CC [M] net/xfrm/xfrm_ipcomp.o Building modules, stage 2. MODPOST 36 modules ERROR: "tcp_nuke_addr" [net/ipv6/ipv6.ko] undefined! make[1]: *** [__modpost] Error 1 make: *** [modules] Error 2 -----8<----- Signed-off-by: Amit Pundir --- net/ipv4/tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7921d1abc449..089a948fe834 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3565,3 +3565,4 @@ int tcp_nuke_addr(struct net *net, struct sockaddr *addr) return 0; } +EXPORT_SYMBOL_GPL(tcp_nuke_addr); From 555f3ed9473f5e71966842b557772dd5e0e5a364 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 24 Sep 2014 18:58:23 -0700 Subject: [PATCH 38/69] usb: u_ether: Add workqueue as bottom half handler for rx data path u_ether driver passes rx data to network layer and resubmits the request back to usb hardware in interrupt context. Network layer processes rx data by scheduling tasklet. For high throughput scenarios on rx data path driver is spending lot of time in interrupt context due to rx data processing by tasklet and continuous completion and re-submission of the usb requests which results in watchdog bark. Hence move the rx data processing and usb request submission to a workqueue bottom half handler. Change-Id: I316de8e267997137ac189a8b7b2846fa325f4a5a Signed-off-by: Badhri Jagan Sridharan --- drivers/usb/gadget/u_ether.c | 118 ++++++++++++++++++++++++----------- 1 file changed, 81 insertions(+), 37 deletions(-) diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c index 801e326e54c0..734d64b8e2c1 100644 --- a/drivers/usb/gadget/u_ether.c +++ b/drivers/usb/gadget/u_ether.c @@ -48,6 +48,8 @@ #define UETH__VERSION "29-May-2008" +static struct workqueue_struct *uether_wq; + struct eth_dev { /* lock is held while accessing port_usb */ @@ -71,6 +73,7 @@ struct eth_dev { struct sk_buff_head *list); struct work_struct work; + struct work_struct rx_work; unsigned long todo; #define WORK_RX_MEMORY 0 @@ -259,18 +262,16 @@ rx_submit(struct eth_dev *dev, struct usb_request *req, gfp_t gfp_flags) DBG(dev, "rx submit --> %d\n", retval); if (skb) dev_kfree_skb_any(skb); - spin_lock_irqsave(&dev->req_lock, flags); - list_add(&req->list, &dev->rx_reqs); - spin_unlock_irqrestore(&dev->req_lock, flags); } return retval; } static void rx_complete(struct usb_ep *ep, struct usb_request *req) { - struct sk_buff *skb = req->context, *skb2; + struct sk_buff *skb = req->context; struct eth_dev *dev = ep->driver_data; int status = req->status; + bool queue = 0; switch (status) { @@ -294,30 +295,9 @@ static void rx_complete(struct usb_ep *ep, struct usb_request *req) } else { skb_queue_tail(&dev->rx_frames, skb); } - skb = NULL; - skb2 = skb_dequeue(&dev->rx_frames); - while (skb2) { - if (status < 0 - || ETH_HLEN > skb2->len - || skb2->len > VLAN_ETH_FRAME_LEN) { - dev->net->stats.rx_errors++; - dev->net->stats.rx_length_errors++; - DBG(dev, "rx length %d\n", skb2->len); - dev_kfree_skb_any(skb2); - goto next_frame; - } - skb2->protocol = eth_type_trans(skb2, dev->net); - dev->net->stats.rx_packets++; - dev->net->stats.rx_bytes += skb2->len; - - /* no buffer copies needed, unless hardware can't - * use skb buffers. - */ - status = netif_rx(skb2); -next_frame: - skb2 = skb_dequeue(&dev->rx_frames); - } + if (!status) + queue = 1; break; /* software-driven interface shutdown */ @@ -340,22 +320,20 @@ static void rx_complete(struct usb_ep *ep, struct usb_request *req) /* FALLTHROUGH */ default: + queue = 1; + dev_kfree_skb_any(skb); dev->net->stats.rx_errors++; DBG(dev, "rx status %d\n", status); break; } - if (skb) - dev_kfree_skb_any(skb); - if (!netif_running(dev->net)) { clean: - spin_lock(&dev->req_lock); - list_add(&req->list, &dev->rx_reqs); - spin_unlock(&dev->req_lock); - req = NULL; - } - if (req) - rx_submit(dev, req, GFP_ATOMIC); + spin_lock(&dev->req_lock); + list_add(&req->list, &dev->rx_reqs); + spin_unlock(&dev->req_lock); + + if (queue) + queue_work(uether_wq, &dev->rx_work); } static int prealloc(struct list_head *list, struct usb_ep *ep, unsigned n) @@ -420,16 +398,24 @@ static void rx_fill(struct eth_dev *dev, gfp_t gfp_flags) { struct usb_request *req; unsigned long flags; + int req_cnt = 0; /* fill unused rxq slots with some skb */ spin_lock_irqsave(&dev->req_lock, flags); while (!list_empty(&dev->rx_reqs)) { + /* break the nexus of continuous completion and re-submission*/ + if (++req_cnt > qlen(dev->gadget)) + break; + req = container_of(dev->rx_reqs.next, struct usb_request, list); list_del_init(&req->list); spin_unlock_irqrestore(&dev->req_lock, flags); if (rx_submit(dev, req, gfp_flags) < 0) { + spin_lock_irqsave(&dev->req_lock, flags); + list_add(&req->list, &dev->rx_reqs); + spin_unlock_irqrestore(&dev->req_lock, flags); defer_kevent(dev, WORK_RX_MEMORY); return; } @@ -439,6 +425,36 @@ static void rx_fill(struct eth_dev *dev, gfp_t gfp_flags) spin_unlock_irqrestore(&dev->req_lock, flags); } +static void process_rx_w(struct work_struct *work) +{ + struct eth_dev *dev = container_of(work, struct eth_dev, rx_work); + struct sk_buff *skb; + int status = 0; + + if (!dev->port_usb) + return; + + while ((skb = skb_dequeue(&dev->rx_frames))) { + if (status < 0 + || ETH_HLEN > skb->len + || skb->len > ETH_FRAME_LEN) { + dev->net->stats.rx_errors++; + dev->net->stats.rx_length_errors++; + DBG(dev, "rx length %d\n", skb->len); + dev_kfree_skb_any(skb); + continue; + } + skb->protocol = eth_type_trans(skb, dev->net); + dev->net->stats.rx_packets++; + dev->net->stats.rx_bytes += skb->len; + + status = netif_rx_ni(skb); + } + + if (netif_running(dev->net)) + rx_fill(dev, GFP_KERNEL); +} + static void eth_work(struct work_struct *work) { struct eth_dev *dev = container_of(work, struct eth_dev, work); @@ -775,6 +791,7 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, u8 ethaddr[ETH_ALEN], spin_lock_init(&dev->lock); spin_lock_init(&dev->req_lock); INIT_WORK(&dev->work, eth_work); + INIT_WORK(&dev->rx_work, process_rx_w); INIT_LIST_HEAD(&dev->tx_reqs); INIT_LIST_HEAD(&dev->rx_reqs); @@ -933,6 +950,7 @@ void gether_disconnect(struct gether *link) { struct eth_dev *dev = link->ioport; struct usb_request *req; + struct sk_buff *skb; WARN_ON(!dev); if (!dev) @@ -974,6 +992,12 @@ void gether_disconnect(struct gether *link) spin_lock(&dev->req_lock); } spin_unlock(&dev->req_lock); + + spin_lock(&dev->rx_frames.lock); + while ((skb = __skb_dequeue(&dev->rx_frames))) + dev_kfree_skb_any(skb); + spin_unlock(&dev->rx_frames.lock); + link->out_ep->driver_data = NULL; link->out_ep->desc = NULL; @@ -986,3 +1010,23 @@ void gether_disconnect(struct gether *link) dev->port_usb = NULL; spin_unlock(&dev->lock); } + +static int __init gether_init(void) +{ + uether_wq = create_singlethread_workqueue("uether"); + if (!uether_wq) { + pr_err("%s: Unable to create workqueue: uether\n", __func__); + return -ENOMEM; + } + return 0; +} +module_init(gether_init); + +static void __exit gether_exit(void) +{ + destroy_workqueue(uether_wq); + +} +module_exit(gether_exit); +MODULE_DESCRIPTION("ethernet over USB driver"); +MODULE_LICENSE("GPL v2"); From 9c861db045e11a41ab092f82ec4bf69212dd4694 Mon Sep 17 00:00:00 2001 From: "taeju.park" Date: Fri, 14 Sep 2012 14:09:03 +0900 Subject: [PATCH 39/69] usb: gadget: prevent change of Host MAC address of 'usb0' interface On windows 7 platform, previously allocated ip address is maintained. However, Host MAC address of 'usb0' interface is changed when the tethering driver re-enumerated. Thus, the tethering network driver can't be allocated ip address from dhcp. It causes connection delay between host and phone for usb tethering. This patch prevents from changing Host MAC address of 'usb0' interface. In other words, this patch maintains the Host MAC address allocated when first tethering driver although the driver is re-enumerated. However, after reboot, the Host MAC address can be changed. Change-Id: I43add9925e9d6d90c56cffbd3ed999104448f818 Signed-off-by: Badhri Jagan Sridharan --- drivers/usb/gadget/u_ether.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c index 734d64b8e2c1..6a5065966494 100644 --- a/drivers/usb/gadget/u_ether.c +++ b/drivers/usb/gadget/u_ether.c @@ -718,6 +718,8 @@ static int eth_stop(struct net_device *net) /*-------------------------------------------------------------------------*/ +static u8 host_ethaddr[ETH_ALEN]; + /* initial value, changed by "ifconfig usb0 hw ether xx:xx:xx:xx:xx:xx" */ static char *dev_addr; module_param(dev_addr, charp, S_IRUGO); @@ -749,6 +751,17 @@ static int get_ether_addr(const char *str, u8 *dev_addr) return 1; } +static int get_host_ether_addr(u8 *str, u8 *dev_addr) +{ + memcpy(dev_addr, str, ETH_ALEN); + if (is_valid_ether_addr(dev_addr)) + return 0; + + random_ether_addr(dev_addr); + memcpy(str, dev_addr, ETH_ALEN); + return 1; +} + static const struct net_device_ops eth_netdev_ops = { .ndo_open = eth_open, .ndo_stop = eth_stop, @@ -804,9 +817,11 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, u8 ethaddr[ETH_ALEN], if (get_ether_addr(dev_addr, net->dev_addr)) dev_warn(&g->dev, "using random %s ethernet address\n", "self"); - if (get_ether_addr(host_addr, dev->host_mac)) - dev_warn(&g->dev, - "using random %s ethernet address\n", "host"); + + if (get_host_ether_addr(host_ethaddr, dev->host_mac)) + dev_warn(&g->dev, "using random %s ethernet address\n", "host"); + else + dev_warn(&g->dev, "using previous %s ethernet address\n", "host"); if (ethaddr) memcpy(ethaddr, dev->host_mac, ETH_ALEN); From 91da92a881344b1b04077f18ed7e9cc358cd0430 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Thu, 18 Sep 2014 10:42:41 -0700 Subject: [PATCH 40/69] USB: gadget: u_ether: Fix data stall issue in RNDIS tethering mode For dual speed gadget, with current no. of request(10), there is possibility of corner case occurence where all 10 reuqests are queued to HW without setting IOC bit, which could lead to data stall in RNDIS tethering and RNDIS local networking. With this patch, counter will be incremented before queueing request to HW and sets IOC bit for every nth request due to which the corner case of all requests queued to HW without IOC bit set will be avoided. Change-Id: I26515bfd9bbc8f7af38be7835692143f7093118a Signed-off-by: Vijayavardhan Vennapusa --- drivers/usb/gadget/u_ether.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c index 6a5065966494..63b590de24d6 100644 --- a/drivers/usb/gadget/u_ether.c +++ b/drivers/usb/gadget/u_ether.c @@ -61,7 +61,7 @@ struct eth_dev { spinlock_t req_lock; /* guard {rx,tx}_reqs */ struct list_head tx_reqs, rx_reqs; - atomic_t tx_qlen; + unsigned tx_qlen; struct sk_buff_head rx_frames; @@ -491,7 +491,6 @@ static void tx_complete(struct usb_ep *ep, struct usb_request *req) spin_unlock(&dev->req_lock); dev_kfree_skb_any(skb); - atomic_dec(&dev->tx_qlen); if (netif_carrier_ok(dev->net)) netif_wake_queue(dev->net); } @@ -605,12 +604,19 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, req->length = length; - /* throttle high/super speed IRQ rate back slightly */ - if (gadget_is_dualspeed(dev->gadget)) - req->no_interrupt = (dev->gadget->speed == USB_SPEED_HIGH || - dev->gadget->speed == USB_SPEED_SUPER) - ? ((atomic_read(&dev->tx_qlen) % qmult) != 0) - : 0; + /* throttle highspeed IRQ rate back slightly */ + if (gadget_is_dualspeed(dev->gadget) && + (dev->gadget->speed == USB_SPEED_HIGH)) { + dev->tx_qlen++; + if (dev->tx_qlen == qmult) { + req->no_interrupt = 0; + dev->tx_qlen = 0; + } else { + req->no_interrupt = 1; + } + } else { + req->no_interrupt = 0; + } retval = usb_ep_queue(in, req, GFP_ATOMIC); switch (retval) { @@ -619,7 +625,6 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, break; case 0: net->trans_start = jiffies; - atomic_inc(&dev->tx_qlen); } if (retval) { @@ -645,7 +650,7 @@ static void eth_start(struct eth_dev *dev, gfp_t gfp_flags) rx_fill(dev, gfp_flags); /* and open the tx floodgates */ - atomic_set(&dev->tx_qlen, 0); + dev->tx_qlen = 0; netif_wake_queue(dev->net); } From f9039de766187be99eb2e6af45023b8a32dc26b1 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Thu, 18 Sep 2014 10:46:08 -0700 Subject: [PATCH 41/69] RNDIS: Add Data aggregation (multi packet) support Add data aggregation support using RNDIS Multi Packet feature to achieve better UDP Downlink throughput. Max 3 RNDIS Packets aggregated into one RNDIS Packet with this implementation. With this change, seeing UDP Downlink throughput increase from 90 Mbps to above 100 Mbps when using Iperf and sending data more than 100 Mbps. Change-Id: I21c39482718944bb1b1068bdd02f626531e58f08 Signed-off-by: Mayank Rana Signed-off-by: Rajkumar Raghupathy --- drivers/usb/gadget/f_rndis.c | 14 ++++ drivers/usb/gadget/u_ether.c | 154 ++++++++++++++++++++++++++++++++--- 2 files changed, 156 insertions(+), 12 deletions(-) diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c index 7646a564bfda..130efdfa7881 100644 --- a/drivers/usb/gadget/f_rndis.c +++ b/drivers/usb/gadget/f_rndis.c @@ -457,6 +457,7 @@ static void rndis_command_complete(struct usb_ep *ep, struct usb_request *req) { struct f_rndis *rndis = req->context; int status; + rndis_init_msg_type *buf; /* received RNDIS command from USB_CDC_SEND_ENCAPSULATED_COMMAND */ // spin_lock(&dev->lock); @@ -464,6 +465,19 @@ static void rndis_command_complete(struct usb_ep *ep, struct usb_request *req) if (status < 0) pr_err("RNDIS command error %d, %d/%d\n", status, req->actual, req->length); + + buf = (rndis_init_msg_type *)req->buf; + + if (buf->MessageType == RNDIS_MSG_INIT) { + if (buf->MaxTransferSize > 2048) + rndis->port.multi_pkt_xfer = 1; + else + rndis->port.multi_pkt_xfer = 0; + DBG(cdev, "%s: MaxTransferSize: %d : Multi_pkt_txr: %s\n", + __func__, buf->MaxTransferSize, + rndis->port.multi_pkt_xfer ? "enabled" : + "disabled"); + } // spin_unlock(&dev->lock); } diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c index 63b590de24d6..9705c2bb15fb 100644 --- a/drivers/usb/gadget/u_ether.c +++ b/drivers/usb/gadget/u_ether.c @@ -62,6 +62,11 @@ struct eth_dev { spinlock_t req_lock; /* guard {rx,tx}_reqs */ struct list_head tx_reqs, rx_reqs; unsigned tx_qlen; +/* Minimum number of TX USB request queued to UDC */ +#define TX_REQ_THRESHOLD 5 + int no_tx_req_used; + int tx_skb_hold_count; + u32 tx_req_bufsize; struct sk_buff_head rx_frames; @@ -88,7 +93,7 @@ struct eth_dev { #define DEFAULT_QLEN 2 /* double buffering by default */ -static unsigned qmult = 5; +static unsigned qmult = 10; module_param(qmult, uint, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(qmult, "queue length multiplier at high/super speed"); @@ -472,6 +477,11 @@ static void tx_complete(struct usb_ep *ep, struct usb_request *req) { struct sk_buff *skb = req->context; struct eth_dev *dev = ep->driver_data; + struct net_device *net = dev->net; + struct usb_request *new_req; + struct usb_ep *in; + int length; + int retval; switch (req->status) { default: @@ -482,14 +492,73 @@ static void tx_complete(struct usb_ep *ep, struct usb_request *req) case -ESHUTDOWN: /* disconnect etc */ break; case 0: - dev->net->stats.tx_bytes += skb->len; + if (!req->zero) + dev->net->stats.tx_bytes += req->length-1; + else + dev->net->stats.tx_bytes += req->length; } dev->net->stats.tx_packets++; spin_lock(&dev->req_lock); - list_add(&req->list, &dev->tx_reqs); - spin_unlock(&dev->req_lock); - dev_kfree_skb_any(skb); + list_add_tail(&req->list, &dev->tx_reqs); + + if (dev->port_usb->multi_pkt_xfer) { + dev->no_tx_req_used--; + req->length = 0; + in = dev->port_usb->in_ep; + + if (!list_empty(&dev->tx_reqs)) { + new_req = container_of(dev->tx_reqs.next, + struct usb_request, list); + list_del(&new_req->list); + spin_unlock(&dev->req_lock); + if (new_req->length > 0) { + length = new_req->length; + + /* NCM requires no zlp if transfer is + * dwNtbInMaxSize */ + if (dev->port_usb->is_fixed && + length == dev->port_usb->fixed_in_len && + (length % in->maxpacket) == 0) + new_req->zero = 0; + else + new_req->zero = 1; + + /* use zlp framing on tx for strict CDC-Ether + * conformance, though any robust network rx + * path ignores extra padding. and some hardware + * doesn't like to write zlps. + */ + if (new_req->zero && !dev->zlp && + (length % in->maxpacket) == 0) { + new_req->zero = 0; + length++; + } + + new_req->length = length; + retval = usb_ep_queue(in, new_req, GFP_ATOMIC); + switch (retval) { + default: + DBG(dev, "tx queue err %d\n", retval); + break; + case 0: + spin_lock(&dev->req_lock); + dev->no_tx_req_used++; + spin_unlock(&dev->req_lock); + net->trans_start = jiffies; + } + } else { + spin_lock(&dev->req_lock); + list_add(&new_req->list, &dev->tx_reqs); + spin_unlock(&dev->req_lock); + } + } else { + spin_unlock(&dev->req_lock); + } + } else { + spin_unlock(&dev->req_lock); + dev_kfree_skb_any(skb); + } if (netif_carrier_ok(dev->net)) netif_wake_queue(dev->net); @@ -500,6 +569,26 @@ static inline int is_promisc(u16 cdc_filter) return cdc_filter & USB_CDC_PACKET_TYPE_PROMISCUOUS; } +static void alloc_tx_buffer(struct eth_dev *dev) +{ + struct list_head *act; + struct usb_request *req; + + dev->tx_req_bufsize = (TX_SKB_HOLD_THRESHOLD * + (dev->net->mtu + + sizeof(struct ethhdr) + /* size of rndis_packet_msg_type */ + + 44 + + 22)); + + list_for_each(act, &dev->tx_reqs) { + req = container_of(act, struct usb_request, list); + if (!req->buf) + req->buf = kmalloc(dev->tx_req_bufsize, + GFP_ATOMIC); + } +} + static netdev_tx_t eth_start_xmit(struct sk_buff *skb, struct net_device *net) { @@ -526,6 +615,10 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } + /* Allocate memory for tx_reqs to support multi packet transfer */ + if (dev->port_usb->multi_pkt_xfer && !dev->tx_req_bufsize) + alloc_tx_buffer(dev); + /* apply outgoing CDC or RNDIS filters */ if (!is_promisc(cdc_filter)) { u8 *dest = skb->data; @@ -580,11 +673,39 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, spin_unlock_irqrestore(&dev->lock, flags); if (!skb) goto drop; - - length = skb->len; } - req->buf = skb->data; - req->context = skb; + + spin_lock_irqsave(&dev->req_lock, flags); + dev->tx_skb_hold_count++; + spin_unlock_irqrestore(&dev->req_lock, flags); + + if (dev->port_usb->multi_pkt_xfer) { + memcpy(req->buf + req->length, skb->data, skb->len); + req->length = req->length + skb->len; + length = req->length; + dev_kfree_skb_any(skb); + + spin_lock_irqsave(&dev->req_lock, flags); + if (dev->tx_skb_hold_count < TX_SKB_HOLD_THRESHOLD) { + if (dev->no_tx_req_used > TX_REQ_THRESHOLD) { + list_add(&req->list, &dev->tx_reqs); + spin_unlock_irqrestore(&dev->req_lock, flags); + goto success; + } + } + + dev->no_tx_req_used++; + spin_unlock_irqrestore(&dev->req_lock, flags); + + spin_lock_irqsave(&dev->lock, flags); + dev->tx_skb_hold_count = 0; + spin_unlock_irqrestore(&dev->lock, flags); + } else { + length = skb->len; + req->buf = skb->data; + req->context = skb; + } + req->complete = tx_complete; /* NCM requires no zlp if transfer is dwNtbInMaxSize */ @@ -599,8 +720,10 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, * though any robust network rx path ignores extra padding. * and some hardware doesn't like to write zlps. */ - if (req->zero && !dev->zlp && (length % in->maxpacket) == 0) + if (req->zero && !dev->zlp && (length % in->maxpacket) == 0) { + req->zero = 0; length++; + } req->length = length; @@ -608,7 +731,7 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, if (gadget_is_dualspeed(dev->gadget) && (dev->gadget->speed == USB_SPEED_HIGH)) { dev->tx_qlen++; - if (dev->tx_qlen == qmult) { + if (dev->tx_qlen == (qmult/2)) { req->no_interrupt = 0; dev->tx_qlen = 0; } else { @@ -628,7 +751,8 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, } if (retval) { - dev_kfree_skb_any(skb); + if (!dev->port_usb->multi_pkt_xfer) + dev_kfree_skb_any(skb); drop: dev->net->stats.tx_dropped++; spin_lock_irqsave(&dev->req_lock, flags); @@ -637,6 +761,7 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, list_add(&req->list, &dev->tx_reqs); spin_unlock_irqrestore(&dev->req_lock, flags); } +success: return NETDEV_TX_OK; } @@ -927,6 +1052,9 @@ struct net_device *gether_connect(struct gether *link) dev->ul_max_pkts_per_xfer = link->ul_max_pkts_per_xfer; spin_lock(&dev->lock); + dev->tx_skb_hold_count = 0; + dev->no_tx_req_used = 0; + dev->tx_req_bufsize = 0; dev->port_usb = link; if (netif_running(dev->net)) { if (link->open) @@ -993,6 +1121,8 @@ void gether_disconnect(struct gether *link) list_del(&req->list); spin_unlock(&dev->req_lock); + if (link->multi_pkt_xfer) + kfree(req->buf); usb_ep_free_request(link->in_ep, req); spin_lock(&dev->req_lock); } From 5cf94796ec67e08d77e0e6d91e19ee2a083228f3 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Thu, 18 Sep 2014 10:48:48 -0700 Subject: [PATCH 42/69] ndis: Add debug support to disable RNDIS Multipacket Feature This change adds module param which allows to disable RNDIS Multi-packet Feature (Aggregation support in Downlink path) as this feature is enabled by default. To disable use this param before moving to RNDIS Composition: echo 1 > /sys/module/g_android/parameters/rndis_multipacket_dl_disable Also counts errors as Rx errors if received RNDIS packets are not following RNDIS message format as those packets are being discarded. Change-Id: I764430da78f2204af92e14bb279c11b24c7e4c67 Signed-off-by: Mayank Rana --- drivers/usb/gadget/f_rndis.c | 2 ++ drivers/usb/gadget/u_ether.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c index 130efdfa7881..a0240195b46a 100644 --- a/drivers/usb/gadget/f_rndis.c +++ b/drivers/usb/gadget/f_rndis.c @@ -477,6 +477,8 @@ static void rndis_command_complete(struct usb_ep *ep, struct usb_request *req) __func__, buf->MaxTransferSize, rndis->port.multi_pkt_xfer ? "enabled" : "disabled"); + if (rndis_multipacket_dl_disable) + rndis->port.multi_pkt_xfer = 0; } // spin_unlock(&dev->lock); } diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c index 9705c2bb15fb..ff26f5c2d38e 100644 --- a/drivers/usb/gadget/u_ether.c +++ b/drivers/usb/gadget/u_ether.c @@ -292,6 +292,10 @@ static void rx_complete(struct usb_ep *ep, struct usb_request *req) status = dev->unwrap(dev->port_usb, skb, &dev->rx_frames); + if (status == -EINVAL) + dev->net->stats.rx_errors++; + else if (status == -EOVERFLOW) + dev->net->stats.rx_over_errors++; } else { dev_kfree_skb_any(skb); status = -ENOTCONN; From 12407bb816f1b79ec5fc3c24786047c2ea41f190 Mon Sep 17 00:00:00 2001 From: xerox_lin Date: Thu, 4 Sep 2014 16:01:59 +0800 Subject: [PATCH 43/69] USB: gadget: rndis: Add module parameter for DL max packets per xfer Currently DL aggregation is supported in RNDIS driver and is set to 3 by default. And there is no support to change downlink maximum packets per transfer at runtime through module parameter. Hence add module parameter for DL maximum packets per transfer to change it at runtime. echo 6 > /sys/module/g_android/parameters/rndis_dl_max_pkt_per_xfer To disable DL aggregation during runtime, echo 1 > /sys/module/g_android/parameters/rndis_dl_max_pkt_per_xfer Change-Id: I3a1d0bc97358e2b6f233df7ae8725fb507de50db Signed-off-by: Xerox Lin Signed-off-by: Vijayavardhan Vennapusa --- drivers/usb/gadget/f_rndis.c | 11 ++++++----- drivers/usb/gadget/u_ether.c | 6 ++++-- drivers/usb/gadget/u_ether.h | 3 +-- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c index a0240195b46a..187014885687 100644 --- a/drivers/usb/gadget/f_rndis.c +++ b/drivers/usb/gadget/f_rndis.c @@ -66,10 +66,10 @@ * - MS-Windows drivers sometimes emit undocumented requests. */ -static bool rndis_multipacket_dl_disable; -module_param(rndis_multipacket_dl_disable, bool, S_IRUGO|S_IWUSR); -MODULE_PARM_DESC(rndis_multipacket_dl_disable, - "Disable RNDIS Multi-packet support in DownLink"); +static unsigned int rndis_dl_max_pkt_per_xfer = 3; +module_param(rndis_dl_max_pkt_per_xfer, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(rndis_dl_max_pkt_per_xfer, + "Maximum packets per transfer for DL aggregation"); static unsigned int rndis_ul_max_pkt_per_xfer = 3; module_param(rndis_ul_max_pkt_per_xfer, uint, S_IRUGO | S_IWUSR); @@ -477,7 +477,7 @@ static void rndis_command_complete(struct usb_ep *ep, struct usb_request *req) __func__, buf->MaxTransferSize, rndis->port.multi_pkt_xfer ? "enabled" : "disabled"); - if (rndis_multipacket_dl_disable) + if (rndis_dl_max_pkt_per_xfer <= 1) rndis->port.multi_pkt_xfer = 0; } // spin_unlock(&dev->lock); @@ -881,6 +881,7 @@ rndis_bind_config_vendor(struct usb_configuration *c, u8 ethaddr[ETH_ALEN], rndis->port.wrap = rndis_add_header; rndis->port.unwrap = rndis_rm_hdr; rndis->port.ul_max_pkts_per_xfer = rndis_ul_max_pkt_per_xfer; + rndis->port.dl_max_pkts_per_xfer = rndis_dl_max_pkt_per_xfer; rndis->port.func.name = "rndis"; rndis->port.func.strings = rndis_strings; diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c index ff26f5c2d38e..14f587efc0f6 100644 --- a/drivers/usb/gadget/u_ether.c +++ b/drivers/usb/gadget/u_ether.c @@ -72,6 +72,7 @@ struct eth_dev { unsigned header_len; unsigned ul_max_pkts_per_xfer; + unsigned dl_max_pkts_per_xfer; struct sk_buff *(*wrap)(struct gether *, struct sk_buff *skb); int (*unwrap)(struct gether *, struct sk_buff *skb, @@ -578,7 +579,7 @@ static void alloc_tx_buffer(struct eth_dev *dev) struct list_head *act; struct usb_request *req; - dev->tx_req_bufsize = (TX_SKB_HOLD_THRESHOLD * + dev->tx_req_bufsize = (dev->dl_max_pkts_per_xfer * (dev->net->mtu + sizeof(struct ethhdr) /* size of rndis_packet_msg_type */ @@ -690,7 +691,7 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, dev_kfree_skb_any(skb); spin_lock_irqsave(&dev->req_lock, flags); - if (dev->tx_skb_hold_count < TX_SKB_HOLD_THRESHOLD) { + if (dev->tx_skb_hold_count < dev->dl_max_pkts_per_xfer) { if (dev->no_tx_req_used > TX_REQ_THRESHOLD) { list_add(&req->list, &dev->tx_reqs); spin_unlock_irqrestore(&dev->req_lock, flags); @@ -1054,6 +1055,7 @@ struct net_device *gether_connect(struct gether *link) dev->unwrap = link->unwrap; dev->wrap = link->wrap; dev->ul_max_pkts_per_xfer = link->ul_max_pkts_per_xfer; + dev->dl_max_pkts_per_xfer = link->dl_max_pkts_per_xfer; spin_lock(&dev->lock); dev->tx_skb_hold_count = 0; diff --git a/drivers/usb/gadget/u_ether.h b/drivers/usb/gadget/u_ether.h index ce803d415887..67eda50ae995 100644 --- a/drivers/usb/gadget/u_ether.h +++ b/drivers/usb/gadget/u_ether.h @@ -55,8 +55,7 @@ struct gether { u32 fixed_out_len; u32 fixed_in_len; unsigned ul_max_pkts_per_xfer; -/* Max number of SKB packets to be used to create Multi Packet RNDIS */ -#define TX_SKB_HOLD_THRESHOLD 3 + unsigned dl_max_pkts_per_xfer; bool multi_pkt_xfer; struct sk_buff *(*wrap)(struct gether *port, struct sk_buff *skb); From 88835a3a4ea3b5107c3ea79ee1169aac5fadf808 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 24 Sep 2014 19:36:33 -0700 Subject: [PATCH 44/69] USB: f_rndis: fix compile error Change-Id: Ied5dd8ef905bdf84d176a5e560b09e292b68fbc5 Signed-off-by: Badhri Jagan Sridharan --- drivers/usb/gadget/f_rndis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c index 187014885687..5c274f1d7b7a 100644 --- a/drivers/usb/gadget/f_rndis.c +++ b/drivers/usb/gadget/f_rndis.c @@ -473,7 +473,7 @@ static void rndis_command_complete(struct usb_ep *ep, struct usb_request *req) rndis->port.multi_pkt_xfer = 1; else rndis->port.multi_pkt_xfer = 0; - DBG(cdev, "%s: MaxTransferSize: %d : Multi_pkt_txr: %s\n", + pr_info("%s: MaxTransferSize: %d : Multi_pkt_txr: %s\n", __func__, buf->MaxTransferSize, rndis->port.multi_pkt_xfer ? "enabled" : "disabled"); From abbfed9ed1a78701ef3db74f5287958feb897035 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Thu, 4 Sep 2014 14:54:29 +0100 Subject: [PATCH 45/69] arm64: ptrace: add PTRACE_SET_SYSCALL Note: This patch is from v6 of Takahiro's proposed "arm64: add seccomp support" patchset (leecam@google.com) To allow tracer to be able to change/skip a system call by re-writing a syscall number, there are several approaches: (1) modify x8 register with ptrace(PTRACE_SETREGSET), and handle this case later on in syscall_trace_enter(), or (2) support ptrace(PTRACE_SET_SYSCALL) as on arm Thinking of the fact that user_pt_regs doesn't expose 'syscallno' to tracer as well as that secure_computing() expects a changed syscall number to be visible, especially case of -1, before this function returns in syscall_trace_enter(), we'd better take (2). Signed-off-by: AKASHI Takahiro linaro.org> --- arch/arm64/include/uapi/asm/ptrace.h | 1 + arch/arm64/kernel/ptrace.c | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 6913643bbe54..49c61746297d 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -23,6 +23,7 @@ #include +#define PTRACE_SET_SYSCALL 23 /* * PSR bits diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index ee856d9f6f64..56a62ebf2baf 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1064,7 +1064,19 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { - return ptrace_request(child, request, addr, data); + int ret; + + switch (request) { + case PTRACE_SET_SYSCALL: + task_pt_regs(child)->syscallno = data; + ret = 0; + break; + default: + ret = ptrace_request(child, request, addr, data); + break; + } + + return ret; } enum ptrace_syscall_dir { From feb28436457d33fef9f264635291432df4b74122 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Thu, 4 Sep 2014 15:20:53 +0100 Subject: [PATCH 46/69] arm64: ptrace: allow tracer to skip a system call Note: This patch is from v6 of Takahiro's proposed "arm64: add seccomp support" patchset (leecam@google.com) If tracer specifies -1 as a syscall number, this traced system call should be skipped with a value in x0 used as a return value. This patch enables this semantics, but there is a restriction here: when syscall(-1) is issued by user, tracer cannot skip this system call and modify a return value at syscall entry. In order to ease this flavor, we need to treat whatever value in x0 as a return value, but this might result in a bogus value being returned, especially when tracer doesn't do anything at this syscall. So we always return ENOSYS instead, while we have another chance to change a return value at syscall exit. Please also note: * syscall entry tracing and syscall exit tracing (ftrace tracepoint and audit) are always executed, if enabled, even when skipping a system call (that is, -1). In this way, we can avoid a potential bug where audit_syscall_entry() might be called without audit_syscall_exit() at the previous system call being called, that would cause OOPs in audit_syscall_entry(). * syscallno may also be set to -1 if a fatal signal (SIGKILL) is detected in tracehook_report_syscall_entry(), but since a value set to x0 (ENOSYS) is not used in this case, we may neglect the case. Signed-off-by: AKASHI Takahiro linaro.org> Conflicts: arch/arm64/kernel/entry.S Change-Id: Ifcdcdbcb7c8cf97e5b5f1086a1ea4107e1d4f9a8 --- arch/arm64/include/asm/ptrace.h | 8 ++++++++ arch/arm64/kernel/entry.S | 4 ++++ arch/arm64/kernel/ptrace.c | 20 ++++++++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index dbe3b00d1eb7..c0ebe6fade63 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -60,6 +60,14 @@ #define COMPAT_PT_TEXT_ADDR 0x10000 #define COMPAT_PT_DATA_ADDR 0x10004 #define COMPAT_PT_TEXT_END_ADDR 0x10008 + +/* + * used to skip a system call when tracer changes its number to -1 + * with ptrace(PTRACE_SET_SYSCALL) + */ +#define RET_SKIP_SYSCALL -1 +#define IS_SKIP_SYSCALL(no) ((int)(no & 0xffffffff) == -1) + #ifndef __ASSEMBLY__ /* sizeof(struct user) for AArch32 */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 3c93e1a9ea8d..a30fab9ea2a3 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -662,6 +663,8 @@ __sys_trace: mov x0, sp bl syscall_trace_enter adr lr, __sys_trace_return // return address + cmp w0, #RET_SKIP_SYSCALL // skip syscall? + b.eq __sys_trace_return_skipped uxtw scno, w0 // syscall number (possibly new) mov x1, sp // pointer to regs cmp scno, sc_nr // check upper syscall limit @@ -675,6 +678,7 @@ __sys_trace: __sys_trace_return: str x0, [sp] // save returned x0 +__sys_trace_return_skipped: // x0 already in regs[0] mov x0, sp bl syscall_trace_exit b ret_to_user diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 56a62ebf2baf..2a6edfb0079b 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1108,9 +1108,29 @@ static void tracehook_report_syscall(struct pt_regs *regs, asmlinkage int syscall_trace_enter(struct pt_regs *regs) { + unsigned int saved_syscallno = regs->syscallno; + if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + if (IS_SKIP_SYSCALL(regs->syscallno)) { + /* + * RESTRICTION: we can't modify a return value of user + * issued syscall(-1) here. In order to ease this flavor, + * we need to treat whatever value in x0 as a return value, + * but this might result in a bogus value being returned. + */ + /* + * NOTE: syscallno may also be set to -1 if fatal signal is + * detected in tracehook_report_syscall_entry(), but since + * a value set to x0 here is not used in this case, we may + * neglect the case. + */ + if (!test_thread_flag(TIF_SYSCALL_TRACE) || + (IS_SKIP_SYSCALL(saved_syscallno))) + regs->regs[0] = -ENOSYS; + } + audit_syscall_entry(syscall_get_arch(), regs->syscallno, regs->orig_x0, regs->regs[1], regs->regs[2], regs->regs[3]); From dab10731da65a0deba46402ca9fadf6974676cc8 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Thu, 4 Sep 2014 15:34:14 +0100 Subject: [PATCH 47/69] asm-generic: add generic seccomp.h for secure computing mode 1 Note: This patch is from v6 of Takahiro's proposed "arm64: add seccomp support" patchset (leecam@google.com) Those values (__NR_seccomp_*) are used solely in secure_computing() to identify mode 1 system calls. If compat system calls have different syscall numbers, asm/seccomp.h may override them. Acked-by: Arnd Bergmann arndb.de> Signed-off-by: AKASHI Takahiro linaro.org> --- include/asm-generic/seccomp.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 include/asm-generic/seccomp.h diff --git a/include/asm-generic/seccomp.h b/include/asm-generic/seccomp.h new file mode 100644 index 000000000000..663ac3dc02ea --- /dev/null +++ b/include/asm-generic/seccomp.h @@ -0,0 +1,29 @@ +/* + * include/asm-generic/seccomp.h + * + * Copyright (C) 2014 Linaro Limited + * Author: AKASHI Takahiro linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _ASM_GENERIC_SECCOMP_H +#define _ASM_GENERIC_SECCOMP_H + +#include + +#if defined(CONFIG_COMPAT) && !defined(__NR_seccomp_read_32) +#define __NR_seccomp_read_32 __NR_read +#define __NR_seccomp_write_32 __NR_write +#define __NR_seccomp_exit_32 __NR_exit +#define __NR_seccomp_sigreturn_32 __NR_rt_sigreturn +#endif /* CONFIG_COMPAT && ! already defined */ + +#define __NR_seccomp_read __NR_read +#define __NR_seccomp_write __NR_write +#define __NR_seccomp_exit __NR_exit +#define __NR_seccomp_sigreturn __NR_rt_sigreturn + +#endif /* _ASM_GENERIC_SECCOMP_H */ + From 4f12b53f28a751406a27ef7501a22f9e32a9c30b Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Thu, 4 Sep 2014 15:39:13 +0100 Subject: [PATCH 48/69] add seccomp syscall for compat task Note: This patch is from v6 of Takahiro's proposed "arm64: add seccomp support" patchset (leecam@google.com) This patch allows compat task to issue seccomp() system call. Signed-off-by: AKASHI Takahiro linaro.org> Conflicts: arch/arm64/include/asm/unistd32.h Change-Id: I63d38f68da72b3333327256b4cacba2c3ddb39fc --- arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index a8f8f6992987..53f67c64a6af 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -40,7 +40,7 @@ #define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2) #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5) -#define __NR_compat_syscalls 378 +#define __NR_compat_syscalls 384 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index dd336c150f3f..63513ae2b59e 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -781,3 +781,11 @@ __SYSCALL(__NR_process_vm_writev, compat_sys_process_vm_writev) __SYSCALL(__NR_kcmp, sys_kcmp) #define __NR_finit_module 379 __SYSCALL(__NR_finit_module, sys_finit_module) +/* #define __NR_sched_setattr 380 */ +__SYSCALL(380, sys_ni_syscall) +/* #define __NR_sched_getattr 381 */ +__SYSCALL(381, sys_ni_syscall) +/* #define __NR_renameat2 382 */ +__SYSCALL(382, sys_ni_syscall) +#define __NR_seccomp 383 +__SYSCALL(__NR_seccomp, sys_ni_syscall) From 77227239d20ac6381fb1aee7b7cc902f0d14cd85 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Thu, 4 Sep 2014 15:48:01 +0100 Subject: [PATCH 49/69] arm64: add SIGSYS siginfo for compat task Note: This patch is from v6 of Takahiro's proposed "arm64: add seccomp support" patchset (leecam@google.com) SIGSYS is primarily used in secure computing to notify tracer. This patch allows signal handler on compat task to get correct information with SA_SYSINFO specified when this signal is delivered. Signed-off-by: AKASHI Takahiro linaro.org> --- arch/arm64/include/asm/compat.h | 7 +++++++ arch/arm64/kernel/signal32.c | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index ae0004fe6c23..a79da08994f2 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -191,6 +191,13 @@ typedef struct compat_siginfo { compat_long_t _band; /* POLL_IN, POLL_OUT, POLL_MSG */ int _fd; } _sigpoll; + + /* SIGSYS */ + struct { + compat_uptr_t _call_addr; /* calling user insn */ + int _syscall; /* triggering system call number */ + unsigned int _arch; /* AUDIT_ARCH_* of syscall */ + } _sigsys; } _sifields; } compat_siginfo_t; diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index b4692837e7ab..e1b8b9fb274e 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -211,6 +211,14 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user((compat_uptr_t)(unsigned long)from->si_ptr, &to->si_ptr); break; +#ifdef __ARCH_SIGSYS + case __SI_SYS: + err |= __put_user((compat_uptr_t)(unsigned long) + from->si_call_addr, &to->si_call_addr); + err |= __put_user(from->si_syscall, &to->si_syscall); + err |= __put_user(from->si_arch, &to->si_arch); + break; +#endif default: /* this is just in case for now ... */ err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); From 9499cd23f9d05ba159fac6d55dc35a7f49f9ce76 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 11 Mar 2014 12:48:43 -0400 Subject: [PATCH 50/69] syscall_get_arch: remove useless function arguments Every caller of syscall_get_arch() uses current for the task and no implementors of the function need args. So just get rid of both of those things. Admittedly, since these are inline functions we aren't wasting stack space, but it just makes the prototypes better. Signed-off-by: Eric Paris Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@linux-mips.org Cc: linux390@de.ibm.com Cc: x86@kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-s390@vger.kernel.org Cc: linux-arch@vger.kernel.org Conflicts: arch/mips/include/asm/syscall.h arch/mips/kernel/ptrace.c --- arch/arm/include/asm/syscall.h | 3 +-- arch/s390/include/asm/syscall.h | 5 ++--- arch/x86/include/asm/syscall.h | 8 +++----- include/asm-generic/syscall.h | 4 +--- kernel/seccomp.c | 4 ++-- 5 files changed, 9 insertions(+), 15 deletions(-) diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h index f1d96d4e8092..6db3caacb31a 100644 --- a/arch/arm/include/asm/syscall.h +++ b/arch/arm/include/asm/syscall.h @@ -97,8 +97,7 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->ARM_r0 + i, args, n * sizeof(args[0])); } -static inline int syscall_get_arch(struct task_struct *task, - struct pt_regs *regs) +static inline int syscall_get_arch(void) { /* ARM tasks don't change audit architectures on the fly. */ return AUDIT_ARCH_ARM; diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index cd29d2f4e4f3..bebc0bd8abc2 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -89,11 +89,10 @@ static inline void syscall_set_arguments(struct task_struct *task, regs->orig_gpr2 = args[0]; } -static inline int syscall_get_arch(struct task_struct *task, - struct pt_regs *regs) +static inline int syscall_get_arch(void) { #ifdef CONFIG_COMPAT - if (test_tsk_thread_flag(task, TIF_31BIT)) + if (test_tsk_thread_flag(current, TIF_31BIT)) return AUDIT_ARCH_S390; #endif return sizeof(long) == 8 ? AUDIT_ARCH_S390X : AUDIT_ARCH_S390; diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 2e188d68397c..f106908a12ec 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -90,8 +90,7 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->bx + i, args, n * sizeof(args[0])); } -static inline int syscall_get_arch(struct task_struct *task, - struct pt_regs *regs) +static inline int syscall_get_arch(void) { return AUDIT_ARCH_I386; } @@ -220,8 +219,7 @@ static inline void syscall_set_arguments(struct task_struct *task, } } -static inline int syscall_get_arch(struct task_struct *task, - struct pt_regs *regs) +static inline int syscall_get_arch(void) { #ifdef CONFIG_IA32_EMULATION /* @@ -233,7 +231,7 @@ static inline int syscall_get_arch(struct task_struct *task, * * x32 tasks should be considered AUDIT_ARCH_X86_64. */ - if (task_thread_info(task)->status & TS_COMPAT) + if (task_thread_info(current)->status & TS_COMPAT) return AUDIT_ARCH_I386; #endif /* Both x32 and x86_64 are considered "64-bit". */ diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h index 5b09392db673..d401e5463fb0 100644 --- a/include/asm-generic/syscall.h +++ b/include/asm-generic/syscall.h @@ -144,8 +144,6 @@ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, /** * syscall_get_arch - return the AUDIT_ARCH for the current system call - * @task: task of interest, must be in system call entry tracing - * @regs: task_pt_regs() of @task * * Returns the AUDIT_ARCH_* based on the system call convention in use. * @@ -155,5 +153,5 @@ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, * Architectures which permit CONFIG_HAVE_ARCH_SECCOMP_FILTER must * provide an implementation of this. */ -int syscall_get_arch(struct task_struct *task, struct pt_regs *regs); +int syscall_get_arch(void); #endif /* _ASM_SYSCALL_H */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index b7a10048a32c..eda2da3df822 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -95,7 +95,7 @@ u32 seccomp_bpf_load(int off) if (off == BPF_DATA(nr)) return syscall_get_nr(current, regs); if (off == BPF_DATA(arch)) - return syscall_get_arch(current, regs); + return syscall_get_arch(); if (off >= BPF_DATA(args[0]) && off < BPF_DATA(args[6])) { unsigned long value; int arg = (off - BPF_DATA(args[0])) / sizeof(u64); @@ -351,7 +351,7 @@ static void seccomp_send_sigsys(int syscall, int reason) info.si_code = SYS_SECCOMP; info.si_call_addr = (void __user *)KSTK_EIP(current); info.si_errno = reason; - info.si_arch = syscall_get_arch(current, task_pt_regs(current)); + info.si_arch = syscall_get_arch(); info.si_syscall = syscall; force_sig_info(SIGSYS, &info, current); } From 210957c2bb3b4d111963bb296e2c42beb8721929 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Thu, 4 Sep 2014 16:01:08 +0100 Subject: [PATCH 51/69] arm64: add seccomp support Note: This patch is from v6 of Takahiro's proposed "arm64: add seccomp support" patchset (leecam@google.com) secure_computing() is called first in syscall_trace_enter() so that a system call will be aborted quickly without doing succeeding syscall tracing, contrary to other cases, if seccomp rules deny that system call. On compat task, syscall numbers for system calls allowed in seccomp mode 1 are different from those on normal tasks, and so _NR_seccomp_xxx_32's need to be redefined. Signed-off-by: AKASHI Takahiro linaro.org> Conflicts: arch/arm64/Kconfig arch/arm64/kernel/entry.S Change-Id: I5ec44507d7e536df7ec9d62d30a418c26ef15100 --- arch/arm64/Kconfig | 15 +++++++++++++++ arch/arm64/include/asm/ptrace.h | 1 + arch/arm64/include/asm/seccomp.h | 25 +++++++++++++++++++++++++ arch/arm64/include/asm/unistd.h | 3 +++ arch/arm64/kernel/entry.S | 2 ++ arch/arm64/kernel/ptrace.c | 5 +++++ 6 files changed, 51 insertions(+) create mode 100644 arch/arm64/include/asm/seccomp.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2221396c2a6c..43f1a2ee307c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -20,6 +20,7 @@ config ARM64 select GENERIC_TIME_VSYSCALL select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK @@ -232,6 +233,20 @@ config ARMV7_COMPAT_CPUINFO source "mm/Kconfig" +config SECCOMP + bool "Enable seccomp to safely compute untrusted bytecode" + ---help--- + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + + endmenu menu "Boot options" diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index c0ebe6fade63..58768774c7f4 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -66,6 +66,7 @@ * with ptrace(PTRACE_SET_SYSCALL) */ #define RET_SKIP_SYSCALL -1 +#define RET_SKIP_SYSCALL_TRACE -2 #define IS_SKIP_SYSCALL(no) ((int)(no & 0xffffffff) == -1) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/seccomp.h b/arch/arm64/include/asm/seccomp.h new file mode 100644 index 000000000000..bec3a43f7b17 --- /dev/null +++ b/arch/arm64/include/asm/seccomp.h @@ -0,0 +1,25 @@ +/* + * arch/arm64/include/asm/seccomp.h + * + * Copyright (C) 2014 Linaro Limited + * Author: AKASHI Takahiro linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _ASM_SECCOMP_H +#define _ASM_SECCOMP_H + +#include + +#ifdef CONFIG_COMPAT +#define __NR_seccomp_read_32 __NR_compat_read +#define __NR_seccomp_write_32 __NR_compat_write +#define __NR_seccomp_exit_32 __NR_compat_exit +#define __NR_seccomp_sigreturn_32 __NR_compat_rt_sigreturn +#endif /* CONFIG_COMPAT */ + +#include + +#endif /* _ASM_SECCOMP_H */ diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 53f67c64a6af..2a3957faa702 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -30,6 +30,9 @@ * Compat syscall numbers used by the AArch64 kernel. */ #define __NR_compat_restart_syscall 0 +#define __NR_compat_exit 1 +#define __NR_compat_read 3 +#define __NR_compat_write 4 #define __NR_compat_sigreturn 119 #define __NR_compat_rt_sigreturn 173 diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a30fab9ea2a3..8927e52be32f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -663,6 +663,8 @@ __sys_trace: mov x0, sp bl syscall_trace_enter adr lr, __sys_trace_return // return address + cmp w0, #RET_SKIP_SYSCALL_TRACE // skip syscall and tracing? + b.eq ret_to_user cmp w0, #RET_SKIP_SYSCALL // skip syscall? b.eq __sys_trace_return_skipped uxtw scno, w0 // syscall number (possibly new) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 2a6edfb0079b..f47e70fed6af 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -1110,6 +1111,10 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs) { unsigned int saved_syscallno = regs->syscallno; + /* Do the secure computing check first; failures should be fast. */ + if (secure_computing(regs->syscallno) == -1) + return RET_SKIP_SYSCALL_TRACE; + if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); From a03a2426ea9f1d9dada33cf4a824f63e8f916c9d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Nov 2013 14:57:36 +0100 Subject: [PATCH 52/69] arch: Introduce smp_load_acquire(), smp_store_release() A number of situations currently require the heavyweight smp_mb(), even though there is no need to order prior stores against later loads. Many architectures have much cheaper ways to handle these situations, but the Linux kernel currently has no portable way to make use of them. This commit therefore supplies smp_load_acquire() and smp_store_release() to remedy this situation. The new smp_load_acquire() primitive orders the specified load against any subsequent reads or writes, while the new smp_store_release() primitive orders the specifed store against any prior reads or writes. These primitives allow array-based circular FIFOs to be implemented without an smp_mb(), and also allow a theoretical hole in rcu_assign_pointer() to be closed at no additional expense on most architectures. In addition, the RCU experience transitioning from explicit smp_read_barrier_depends() and smp_wmb() to rcu_dereference() and rcu_assign_pointer(), respectively resulted in substantial improvements in readability. It therefore seems likely that replacing other explicit barriers with smp_load_acquire() and smp_store_release() will provide similar benefits. It appears that roughly half of the explicit barriers in core kernel code might be so replaced. [Changelog by PaulMck] (cherry picked from commit 47933ad41a86a4a9b50bed7c9b9bd2ba242aac63) Reviewed-by: "Paul E. McKenney" Signed-off-by: Peter Zijlstra Acked-by: Will Deacon Cc: Benjamin Herrenschmidt Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Michael Ellerman Cc: Michael Neuling Cc: Russell King Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Victor Kaplansky Cc: Tony Luck Cc: Oleg Nesterov Link: http://lkml.kernel.org/r/20131213150640.908486364@infradead.org Signed-off-by: Ingo Molnar --- arch/arm/include/asm/barrier.h | 15 +++++++++ arch/arm64/include/asm/barrier.h | 50 +++++++++++++++++++++++++++++ arch/ia64/include/asm/barrier.h | 23 +++++++++++++ arch/metag/include/asm/barrier.h | 15 +++++++++ arch/mips/include/asm/barrier.h | 15 +++++++++ arch/powerpc/include/asm/barrier.h | 21 +++++++++++- arch/s390/include/asm/barrier.h | 15 +++++++++ arch/sparc/include/asm/barrier_64.h | 15 +++++++++ arch/x86/include/asm/barrier.h | 43 ++++++++++++++++++++++++- include/asm-generic/barrier.h | 15 +++++++++ include/linux/compiler.h | 9 ++++++ 11 files changed, 234 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 8dcd9c702d90..b00ef075bc2e 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -59,6 +59,21 @@ #define smp_wmb() dmb() #endif +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ___p1; \ +}) + #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index d4a63338a53c..78e20ba8806b 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -35,10 +35,60 @@ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() + +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ___p1; \ +}) + #else + #define smp_mb() asm volatile("dmb ish" : : : "memory") #define smp_rmb() asm volatile("dmb ishld" : : : "memory") #define smp_wmb() asm volatile("dmb ishst" : : : "memory") + +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + switch (sizeof(*p)) { \ + case 4: \ + asm volatile ("stlr %w1, %0" \ + : "=Q" (*p) : "r" (v) : "memory"); \ + break; \ + case 8: \ + asm volatile ("stlr %1, %0" \ + : "=Q" (*p) : "r" (v) : "memory"); \ + break; \ + } \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1; \ + compiletime_assert_atomic_type(*p); \ + switch (sizeof(*p)) { \ + case 4: \ + asm volatile ("ldar %w0, %1" \ + : "=r" (___p1) : "Q" (*p) : "memory"); \ + break; \ + case 8: \ + asm volatile ("ldar %0, %1" \ + : "=r" (___p1) : "Q" (*p) : "memory"); \ + break; \ + } \ + ___p1; \ +}) + #endif #define read_barrier_depends() do { } while(0) diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h index 60576e06b6fb..d0a69aa35e27 100644 --- a/arch/ia64/include/asm/barrier.h +++ b/arch/ia64/include/asm/barrier.h @@ -45,13 +45,36 @@ # define smp_rmb() rmb() # define smp_wmb() wmb() # define smp_read_barrier_depends() read_barrier_depends() + #else + # define smp_mb() barrier() # define smp_rmb() barrier() # define smp_wmb() barrier() # define smp_read_barrier_depends() do { } while(0) + #endif +/* + * IA64 GCC turns volatile stores into st.rel and volatile loads into ld.acq no + * need for asm trickery! + */ + +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ___p1; \ +}) + /* * XXX check on this ---I suspect what Linus really wants here is * acquire vs release semantics but we can't discuss this stuff with diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h index c90bfc6bf648..5d6b4b407dda 100644 --- a/arch/metag/include/asm/barrier.h +++ b/arch/metag/include/asm/barrier.h @@ -82,4 +82,19 @@ static inline void fence(void) #define smp_read_barrier_depends() do { } while (0) #define set_mb(var, value) do { var = value; smp_mb(); } while (0) +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ___p1; \ +}) + #endif /* _ASM_METAG_BARRIER_H */ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index 314ab5532019..52c5b61d7aba 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -180,4 +180,19 @@ #define nudge_writes() mb() #endif +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ___p1; \ +}) + #endif /* __ASM_BARRIER_H */ diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index ae782254e731..f89da808ce31 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -45,11 +45,15 @@ # define SMPWMB eieio #endif +#define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") + #define smp_mb() mb() -#define smp_rmb() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") +#define smp_rmb() __lwsync() #define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") #define smp_read_barrier_depends() read_barrier_depends() #else +#define __lwsync() barrier() + #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() @@ -65,4 +69,19 @@ #define data_barrier(x) \ asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory"); +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + __lwsync(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + __lwsync(); \ + ___p1; \ +}) + #endif /* _ASM_POWERPC_BARRIER_H */ diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index 16760eeb79b0..578680f6207a 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -32,4 +32,19 @@ #define set_mb(var, value) do { var = value; mb(); } while (0) +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ___p1; \ +}) + #endif /* __ASM_BARRIER_H */ diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h index 95d45986f908..b5aad964558e 100644 --- a/arch/sparc/include/asm/barrier_64.h +++ b/arch/sparc/include/asm/barrier_64.h @@ -53,4 +53,19 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ #define smp_read_barrier_depends() do { } while(0) +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ___p1; \ +}) + #endif /* !(__SPARC64_BARRIER_H) */ diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index c6cd358a1eec..04a48903b2eb 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -92,12 +92,53 @@ #endif #define smp_read_barrier_depends() read_barrier_depends() #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) -#else +#else /* !SMP */ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() #define smp_read_barrier_depends() do { } while (0) #define set_mb(var, value) do { var = value; barrier(); } while (0) +#endif /* SMP */ + +#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) + +/* + * For either of these options x86 doesn't have a strong TSO memory + * model and we should fall back to full barriers. + */ + +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ___p1; \ +}) + +#else /* regular x86 TSO memory ordering */ + +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ___p1; \ +}) + #endif /* diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 639d7a4d033b..01613b382b0e 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -46,5 +46,20 @@ #define read_barrier_depends() do {} while (0) #define smp_read_barrier_depends() do {} while (0) +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ___p1; \ +}) + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_GENERIC_BARRIER_H */ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 92669cd182a6..fe7a686dfd8d 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -298,6 +298,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) #endif +/* Is this type a native word size -- useful for atomic operations */ +#ifndef __native_word +# define __native_word(t) (sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) +#endif + /* Compile time object size, -1 for unknown */ #ifndef __compiletime_object_size # define __compiletime_object_size(obj) -1 @@ -337,6 +342,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #define compiletime_assert(condition, msg) \ _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) +#define compiletime_assert_atomic_type(t) \ + compiletime_assert(__native_word(t), \ + "Need native word sized stores/loads for atomicity.") + /* * Prevent the compiler from merging or refetching accesses. The compiler * is also forbidden from reordering successive instances of ACCESS_ONCE(), From 987a0f1102321853565c4bfecde6a5a58ac6db11 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 21 Jan 2014 15:49:56 -0800 Subject: [PATCH 53/69] introduce for_each_thread() to replace the buggy while_each_thread() while_each_thread() and next_thread() should die, almost every lockless usage is wrong. 1. Unless g == current, the lockless while_each_thread() is not safe. while_each_thread(g, t) can loop forever if g exits, next_thread() can't reach the unhashed thread in this case. Note that this can happen even if g is the group leader, it can exec. 2. Even if while_each_thread() itself was correct, people often use it wrongly. It was never safe to just take rcu_read_lock() and loop unless you verify that pid_alive(g) == T, even the first next_thread() can point to the already freed/reused memory. This patch adds signal_struct->thread_head and task->thread_node to create the normal rcu-safe list with the stable head. The new for_each_thread(g, t) helper is always safe under rcu_read_lock() as long as this task_struct can't go away. Note: of course it is ugly to have both task_struct->thread_node and the old task_struct->thread_group, we will kill it later, after we change the users of while_each_thread() to use for_each_thread(). Perhaps we can kill it even before we convert all users, we can reimplement next_thread(t) using the new thread_head/thread_node. But we can't do this right now because this will lead to subtle behavioural changes. For example, do/while_each_thread() always sees at least one task, while for_each_thread() can do nothing if the whole thread group has died. Or thread_group_empty(), currently its semantics is not clear unless thread_group_leader(p) and we need to audit the callers before we can change it. So this patch adds the new interface which has to coexist with the old one for some time, hopefully the next changes will be more or less straightforward and the old one will go away soon. Bug 200004307 Signed-off-by: Oleg Nesterov Reviewed-by: Sergey Dyasly Tested-by: Sergey Dyasly Reviewed-by: Sameer Nanda Acked-by: David Rientjes Cc: "Eric W. Biederman" Cc: Frederic Weisbecker Cc: Mandeep Singh Baines Cc: "Ma, Xindong" Cc: Michal Hocko Cc: "Tu, Xiaobing" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 0c740d0afc3bff0a097ad03a1c8df92757516f5c) Signed-off-by: Sri Krishna chowdary Change-Id: Id689cb1383ceba2561b66188d88258619b68f5c6 Reviewed-on: http://git-master/r/419041 Reviewed-by: Bharat Nihalani --- include/linux/init_task.h | 2 ++ include/linux/sched.h | 12 ++++++++++++ kernel/exit.c | 1 + kernel/fork.c | 8 ++++++++ 4 files changed, 23 insertions(+) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 5cd0f0949927..998f4dfedecf 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -40,6 +40,7 @@ extern struct fs_struct init_fs; #define INIT_SIGNALS(sig) { \ .nr_threads = 1, \ + .thread_head = LIST_HEAD_INIT(init_task.thread_node), \ .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\ .shared_pending = { \ .list = LIST_HEAD_INIT(sig.shared_pending.list), \ @@ -213,6 +214,7 @@ extern struct task_group root_task_group; [PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \ }, \ .thread_group = LIST_HEAD_INIT(tsk.thread_group), \ + .thread_node = LIST_HEAD_INIT(init_signals.thread_head), \ INIT_IDS \ INIT_PERF_EVENTS(tsk) \ INIT_TRACE_IRQFLAGS \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 7ccfeb1e4067..036d74cf457f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -476,6 +476,7 @@ struct signal_struct { atomic_t sigcnt; atomic_t live; int nr_threads; + struct list_head thread_head; wait_queue_head_t wait_chldexit; /* for wait4() */ @@ -1156,6 +1157,7 @@ struct task_struct { /* PID/PID hash table linkage. */ struct pid_link pids[PIDTYPE_MAX]; struct list_head thread_group; + struct list_head thread_node; struct completion *vfork_done; /* for vfork() */ int __user *set_child_tid; /* CLONE_CHILD_SETTID */ @@ -2166,6 +2168,16 @@ extern bool current_is_single_threaded(void); #define while_each_thread(g, t) \ while ((t = next_thread(t)) != g) +#define __for_each_thread(signal, t) \ + list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node) + +#define for_each_thread(p, t) \ + __for_each_thread((p)->signal, t) + +/* Careful: this is a double loop, 'break' won't work as expected. */ +#define for_each_process_thread(p, t) \ + for_each_process(p) for_each_thread(p, t) + static inline int get_nr_threads(struct task_struct *tsk) { return tsk->signal->nr_threads; diff --git a/kernel/exit.c b/kernel/exit.c index 6a057750ebbb..8e2166363b4b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -74,6 +74,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead) __this_cpu_dec(process_counts); } list_del_rcu(&p->thread_group); + list_del_rcu(&p->thread_node); } /* diff --git a/kernel/fork.c b/kernel/fork.c index 41671a5d637d..32a78ff21949 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1061,6 +1061,11 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->nr_threads = 1; atomic_set(&sig->live, 1); atomic_set(&sig->sigcnt, 1); + + /* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */ + sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node); + tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head); + init_waitqueue_head(&sig->wait_chldexit); sig->curr_target = tsk; init_sigpending(&sig->shared_pending); @@ -1487,6 +1492,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); __this_cpu_inc(process_counts); + } else { + list_add_tail_rcu(&p->thread_node, + &p->signal->thread_head); } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; From 2a30a4386e4a7e1283157c4cf4cfcc0306b22ac8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 21 May 2014 15:02:11 -0700 Subject: [PATCH 54/69] seccomp: create internal mode-setting function In preparation for having other callers of the seccomp mode setting logic, split the prctl entry point away from the core logic that performs seccomp mode setting. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski --- kernel/seccomp.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index eda2da3df822..d445b9c24d27 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -467,7 +467,7 @@ long prctl_get_seccomp(void) } /** - * prctl_set_seccomp: configures current->seccomp.mode + * seccomp_set_mode: internal function for setting seccomp mode * @seccomp_mode: requested mode to use * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER * @@ -480,7 +480,7 @@ long prctl_get_seccomp(void) * * Returns 0 on success or -EINVAL on failure. */ -long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) +static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter) { long ret = -EINVAL; @@ -511,3 +511,15 @@ long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) out: return ret; } + +/** + * prctl_set_seccomp: configures current->seccomp.mode + * @seccomp_mode: requested mode to use + * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER + * + * Returns 0 on success or -EINVAL on failure. + */ +long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) +{ + return seccomp_set_mode(seccomp_mode, filter); +} From b8a9cff6dbe9cfddbb4d17e2dea496e523544687 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Jun 2014 15:38:02 -0700 Subject: [PATCH 55/69] seccomp: extract check/assign mode helpers To support splitting mode 1 from mode 2, extract the mode checking and assignment logic into common functions. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski --- kernel/seccomp.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index d445b9c24d27..b5f2538755bf 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -219,7 +219,23 @@ static u32 seccomp_run_filters(int syscall) } return ret; } +#endif /* CONFIG_SECCOMP_FILTER */ +static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) +{ + if (current->seccomp.mode && current->seccomp.mode != seccomp_mode) + return false; + + return true; +} + +static inline void seccomp_assign_mode(unsigned long seccomp_mode) +{ + current->seccomp.mode = seccomp_mode; + set_tsk_thread_flag(current, TIF_SECCOMP); +} + +#ifdef CONFIG_SECCOMP_FILTER /** * seccomp_attach_filter: Attaches a seccomp filter to current. * @fprog: BPF program to install @@ -484,8 +500,7 @@ static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter) { long ret = -EINVAL; - if (current->seccomp.mode && - current->seccomp.mode != seccomp_mode) + if (!seccomp_may_assign_mode(seccomp_mode)) goto out; switch (seccomp_mode) { @@ -506,8 +521,7 @@ static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter) goto out; } - current->seccomp.mode = seccomp_mode; - set_thread_flag(TIF_SECCOMP); + seccomp_assign_mode(seccomp_mode); out: return ret; } From 8908dde5a7fdca974374b0dbe6dfb10f69df7216 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Jun 2014 15:55:25 -0700 Subject: [PATCH 56/69] seccomp: split mode setting routines Separates the two mode setting paths to make things more readable with fewer #ifdefs within function bodies. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski --- kernel/seccomp.c | 71 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index b5f2538755bf..dab81904040f 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -483,48 +483,66 @@ long prctl_get_seccomp(void) } /** - * seccomp_set_mode: internal function for setting seccomp mode - * @seccomp_mode: requested mode to use - * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER - * - * This function may be called repeatedly with a @seccomp_mode of - * SECCOMP_MODE_FILTER to install additional filters. Every filter - * successfully installed will be evaluated (in reverse order) for each system - * call the task makes. + * seccomp_set_mode_strict: internal function for setting strict seccomp * * Once current->seccomp.mode is non-zero, it may not be changed. * * Returns 0 on success or -EINVAL on failure. */ -static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter) +static long seccomp_set_mode_strict(void) { + const unsigned long seccomp_mode = SECCOMP_MODE_STRICT; long ret = -EINVAL; if (!seccomp_may_assign_mode(seccomp_mode)) goto out; - switch (seccomp_mode) { - case SECCOMP_MODE_STRICT: - ret = 0; #ifdef TIF_NOTSC - disable_TSC(); + disable_TSC(); #endif - break; + seccomp_assign_mode(seccomp_mode); + ret = 0; + +out: + + return ret; +} + #ifdef CONFIG_SECCOMP_FILTER - case SECCOMP_MODE_FILTER: - ret = seccomp_attach_user_filter(filter); - if (ret) - goto out; - break; -#endif - default: +/** + * seccomp_set_mode_filter: internal function for setting seccomp filter + * @filter: struct sock_fprog containing filter + * + * This function may be called repeatedly to install additional filters. + * Every filter successfully installed will be evaluated (in reverse order) + * for each system call the task makes. + * + * Once current->seccomp.mode is non-zero, it may not be changed. + * + * Returns 0 on success or -EINVAL on failure. + */ +static long seccomp_set_mode_filter(char __user *filter) +{ + const unsigned long seccomp_mode = SECCOMP_MODE_FILTER; + long ret = -EINVAL; + + if (!seccomp_may_assign_mode(seccomp_mode)) + goto out; + + ret = seccomp_attach_user_filter(filter); + if (ret) goto out; - } seccomp_assign_mode(seccomp_mode); out: return ret; } +#else +static inline long seccomp_set_mode_filter(char __user *filter) +{ + return -EINVAL; +} +#endif /** * prctl_set_seccomp: configures current->seccomp.mode @@ -535,5 +553,12 @@ static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter) */ long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) { - return seccomp_set_mode(seccomp_mode, filter); + switch (seccomp_mode) { + case SECCOMP_MODE_STRICT: + return seccomp_set_mode_strict(); + case SECCOMP_MODE_FILTER: + return seccomp_set_mode_filter(filter); + default: + return -EINVAL; + } } From e985fd474debedb269fba27006eda50d0b6f07ef Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Jun 2014 16:08:24 -0700 Subject: [PATCH 57/69] seccomp: add "seccomp" syscall This adds the new "seccomp" syscall with both an "operation" and "flags" parameter for future expansion. The third argument is a pointer value, used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). In addition to the TSYNC flag later in this patch series, there is a non-zero chance that this syscall could be used for configuring a fixed argument area for seccomp-tracer-aware processes to pass syscall arguments in the future. Hence, the use of "seccomp" not simply "seccomp_add_filter" for this syscall. Additionally, this syscall uses operation, flags, and user pointer for arguments because strictly passing arguments via a user pointer would mean seccomp itself would be unable to trivially filter the seccomp syscall itself. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski Conflicts: arch/x86/syscalls/syscall_32.tbl arch/x86/syscalls/syscall_64.tbl include/uapi/asm-generic/unistd.h kernel/seccomp.c And fixup of unistd32.h to truly enable sys_secomp. Change-Id: I95bea02382c52007d22e5e9dc563c7d055c2c83f --- arch/Kconfig | 1 + arch/arm64/include/asm/unistd32.h | 2 +- arch/x86/syscalls/syscall_32.tbl | 1 + arch/x86/syscalls/syscall_64.tbl | 1 + include/linux/syscalls.h | 2 ++ include/uapi/asm-generic/unistd.h | 12 ++++++- include/uapi/linux/seccomp.h | 4 +++ kernel/seccomp.c | 55 ++++++++++++++++++++++++++++--- kernel/sys_ni.c | 3 ++ 9 files changed, 74 insertions(+), 7 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index a4429bcd609e..84c94a89e75b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -331,6 +331,7 @@ config HAVE_ARCH_SECCOMP_FILTER - secure_computing is called from a ptrace_event()-safe context - secure_computing return value is checked and a return value of -1 results in the system call being skipped immediately. + - seccomp syscall wired up config SECCOMP_FILTER def_bool y diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 63513ae2b59e..76d094565090 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -788,4 +788,4 @@ __SYSCALL(381, sys_ni_syscall) /* #define __NR_renameat2 382 */ __SYSCALL(382, sys_ni_syscall) #define __NR_seccomp 383 -__SYSCALL(__NR_seccomp, sys_ni_syscall) +__SYSCALL(__NR_seccomp, sys_seccomp) diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index aabfb8380a1c..8605e9e0f19a 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -357,3 +357,4 @@ 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev 349 i386 kcmp sys_kcmp 350 i386 finit_module sys_finit_module +351 i386 seccomp sys_seccomp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 38ae65dfd14f..4cb491567b85 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -320,6 +320,7 @@ 311 64 process_vm_writev sys_process_vm_writev 312 common kcmp sys_kcmp 313 common finit_module sys_finit_module +314 common seccomp sys_seccomp # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 4147d700a293..2a955dcc863c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -841,4 +841,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs); #endif diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 0cc74c4403e4..b422ad5d238b 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -692,9 +692,19 @@ __SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \ __SYSCALL(__NR_kcmp, sys_kcmp) #define __NR_finit_module 273 __SYSCALL(__NR_finit_module, sys_finit_module) +/* Backporting seccomp, skip a few ... + * #define __NR_sched_setattr 274 +__SYSCALL(__NR_sched_setattr, sys_sched_setattr) + * #define __NR_sched_getattr 275 +__SYSCALL(__NR_sched_getattr, sys_sched_getattr) + * #define __NR_renameat2 276 +__SYSCALL(__NR_renameat2, sys_renameat2) + */ +#define __NR_seccomp 277 +__SYSCALL(__NR_seccomp, sys_seccomp) #undef __NR_syscalls -#define __NR_syscalls 274 +#define __NR_syscalls 278 /* * All syscalls below here should go away really, diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index ac2dc9f72973..b258878ba754 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -10,6 +10,10 @@ #define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */ #define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */ +/* Valid operations for seccomp syscall. */ +#define SECCOMP_SET_MODE_STRICT 0 +#define SECCOMP_SET_MODE_FILTER 1 + /* * All BPF programs must return a 32-bit value. * The bottom 16-bits are for optional return data. diff --git a/kernel/seccomp.c b/kernel/seccomp.c index dab81904040f..d4b61b967423 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -18,6 +18,7 @@ #include #include #include +#include /* #define SECCOMP_DEBUG 1 */ @@ -309,7 +310,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) * * Returns 0 on success and non-zero otherwise. */ -long seccomp_attach_user_filter(char __user *user_filter) +static long seccomp_attach_user_filter(const char __user *user_filter) { struct sock_fprog fprog; long ret = -EFAULT; @@ -511,6 +512,7 @@ static long seccomp_set_mode_strict(void) #ifdef CONFIG_SECCOMP_FILTER /** * seccomp_set_mode_filter: internal function for setting seccomp filter + * @flags: flags to change filter behavior * @filter: struct sock_fprog containing filter * * This function may be called repeatedly to install additional filters. @@ -521,11 +523,16 @@ static long seccomp_set_mode_strict(void) * * Returns 0 on success or -EINVAL on failure. */ -static long seccomp_set_mode_filter(char __user *filter) +static long seccomp_set_mode_filter(unsigned int flags, + const char __user *filter) { const unsigned long seccomp_mode = SECCOMP_MODE_FILTER; long ret = -EINVAL; + /* Validate flags. */ + if (flags != 0) + goto out; + if (!seccomp_may_assign_mode(seccomp_mode)) goto out; @@ -538,12 +545,35 @@ static long seccomp_set_mode_filter(char __user *filter) return ret; } #else -static inline long seccomp_set_mode_filter(char __user *filter) +static inline long seccomp_set_mode_filter(unsigned int flags, + const char __user *filter) { return -EINVAL; } #endif +/* Common entry point for both prctl and syscall. */ +static long do_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs) +{ + switch (op) { + case SECCOMP_SET_MODE_STRICT: + if (flags != 0 || uargs != NULL) + return -EINVAL; + return seccomp_set_mode_strict(); + case SECCOMP_SET_MODE_FILTER: + return seccomp_set_mode_filter(flags, uargs); + default: + return -EINVAL; + } +} + +SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags, + const char __user *, uargs) +{ + return do_seccomp(op, flags, uargs); +} + /** * prctl_set_seccomp: configures current->seccomp.mode * @seccomp_mode: requested mode to use @@ -553,12 +583,27 @@ static inline long seccomp_set_mode_filter(char __user *filter) */ long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) { + unsigned int op; + char __user *uargs; + switch (seccomp_mode) { case SECCOMP_MODE_STRICT: - return seccomp_set_mode_strict(); + op = SECCOMP_SET_MODE_STRICT; + /* + * Setting strict mode through prctl always ignored filter, + * so make sure it is always NULL here to pass the internal + * check in do_seccomp(). + */ + uargs = NULL; + break; case SECCOMP_MODE_FILTER: - return seccomp_set_mode_filter(filter); + op = SECCOMP_SET_MODE_FILTER; + uargs = filter; + break; default: return -EINVAL; } + + /* prctl interface doesn't have flags, so they are always zero. */ + return do_seccomp(op, 0, uargs); } diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 7078052284fd..7e7fc0a082c4 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -209,3 +209,6 @@ cond_syscall(compat_sys_open_by_handle_at); /* compare kernel pointers */ cond_syscall(sys_kcmp); + +/* operate on Secure Computing state */ +cond_syscall(sys_seccomp); From 9d0ff694bc22fb458acb763811a677696c60725b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 21 May 2014 15:23:46 -0700 Subject: [PATCH 58/69] sched: move no_new_privs into new atomic flags Since seccomp transitions between threads requires updates to the no_new_privs flag to be atomic, the flag must be part of an atomic flag set. This moves the nnp flag into a separate task field, and introduces accessors. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski Conflicts: kernel/sys.c --- fs/exec.c | 4 ++-- include/linux/sched.h | 18 +++++++++++++++--- kernel/seccomp.c | 2 +- kernel/sys.c | 4 ++-- security/apparmor/domain.c | 4 ++-- 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index ffd7a813ad3d..cb5fb9aa320e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1239,7 +1239,7 @@ static int check_unsafe_exec(struct linux_binprm *bprm) * This isn't strictly necessary, but it makes it harder for LSMs to * mess up. */ - if (current->no_new_privs) + if (task_no_new_privs(current)) bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS; n_fs = 1; @@ -1286,7 +1286,7 @@ int prepare_binprm(struct linux_binprm *bprm) bprm->cred->egid = current_egid(); if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) && - !current->no_new_privs && + !task_no_new_privs(current) && kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) && kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) { /* Set-uid? */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 036d74cf457f..da5fe76a069b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1118,13 +1118,12 @@ struct task_struct { * execve */ unsigned in_iowait:1; - /* task may not gain privileges */ - unsigned no_new_privs:1; - /* Revert to default priority/policy when forking */ unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; + unsigned long atomic_flags; /* Flags needing atomic access. */ + pid_t pid; pid_t tgid; @@ -1689,6 +1688,19 @@ static inline void memalloc_noio_restore(unsigned int flags) current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; } +/* Per-process atomic flags. */ +#define PFA_NO_NEW_PRIVS 0x00000001 /* May not gain new privileges. */ + +static inline bool task_no_new_privs(struct task_struct *p) +{ + return test_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags); +} + +static inline void task_set_no_new_privs(struct task_struct *p) +{ + set_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags); +} + /* * task->jobctl flags */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index d4b61b967423..5390739066a6 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -264,7 +264,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) * This avoids scenarios where unprivileged tasks can affect the * behavior of privileged children. */ - if (!current->no_new_privs && + if (!task_no_new_privs(current) && security_capable_noaudit(current_cred(), current_user_ns(), CAP_SYS_ADMIN) != 0) return -EACCES; diff --git a/kernel/sys.c b/kernel/sys.c index 65d3e55bd282..0b08c9f000f3 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2427,12 +2427,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, if (arg2 != 1 || arg3 || arg4 || arg5) return -EINVAL; - current->no_new_privs = 1; + task_set_no_new_privs(current); break; case PR_GET_NO_NEW_PRIVS: if (arg2 || arg3 || arg4 || arg5) return -EINVAL; - return current->no_new_privs ? 1 : 0; + return task_no_new_privs(current) ? 1 : 0; case PR_SET_VMA: error = prctl_set_vma(arg2, arg3, arg4, arg5); break; diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 859abdaac1ea..9aaa4e72cc1f 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -629,7 +629,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest) * There is no exception for unconfined as change_hat is not * available. */ - if (current->no_new_privs) + if (task_no_new_privs(current)) return -EPERM; /* released below */ @@ -780,7 +780,7 @@ int aa_change_profile(const char *ns_name, const char *hname, bool onexec, * no_new_privs is set because this aways results in a reduction * of permissions. */ - if (current->no_new_privs && !unconfined(profile)) { + if (task_no_new_privs(current) && !unconfined(profile)) { put_cred(cred); return -EPERM; } From b6a12bf4dd762236c7f637b19cfe10a268304b9b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 27 Jun 2014 15:16:33 -0700 Subject: [PATCH 59/69] seccomp: split filter prep from check and apply In preparation for adding seccomp locking, move filter creation away from where it is checked and applied. This will allow for locking where no memory allocation is happening. The validation, filter attachment, and seccomp mode setting can all happen under the future locks. For extreme defensiveness, I've added a BUG_ON check for the calculated size of the buffer allocation in case BPF_MAXINSN ever changes, which shouldn't ever happen. The compiler should actually optimize out this check since the test above it makes it impossible. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski Conflicts: kernel/seccomp.c --- kernel/seccomp.c | 89 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 66 insertions(+), 23 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 5390739066a6..1afc10b05eca 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include /* #define SECCOMP_DEBUG 1 */ @@ -27,7 +28,6 @@ #include #include #include -#include #include #include @@ -238,12 +238,12 @@ static inline void seccomp_assign_mode(unsigned long seccomp_mode) #ifdef CONFIG_SECCOMP_FILTER /** - * seccomp_attach_filter: Attaches a seccomp filter to current. + * seccomp_prepare_filter: Prepares a seccomp filter for use. * @fprog: BPF program to install * - * Returns 0 on success or an errno on failure. + * Returns filter on success or an ERR_PTR on failure. */ -static long seccomp_attach_filter(struct sock_fprog *fprog) +static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) { struct seccomp_filter *filter; unsigned long fp_size = fprog->len * sizeof(struct sock_filter); @@ -251,12 +251,13 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) long ret; if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) - return -EINVAL; + return ERR_PTR(-EINVAL); + BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter)); for (filter = current->seccomp.filter; filter; filter = filter->prev) total_insns += filter->len + 4; /* include a 4 instr penalty */ if (total_insns > MAX_INSNS_PER_PATH) - return -ENOMEM; + return ERR_PTR(-ENOMEM); /* * Installing a seccomp filter requires that the task have @@ -267,13 +268,13 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) if (!task_no_new_privs(current) && security_capable_noaudit(current_cred(), current_user_ns(), CAP_SYS_ADMIN) != 0) - return -EACCES; + return ERR_PTR(-EACCES); /* Allocate a new seccomp_filter */ filter = kzalloc(sizeof(struct seccomp_filter) + fp_size, GFP_KERNEL|__GFP_NOWARN); if (!filter) - return -ENOMEM; + return ERR_PTR(-ENOMEM);; atomic_set(&filter->usage, 1); filter->len = fprog->len; @@ -292,28 +293,24 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) if (ret) goto fail; - /* - * If there is an existing filter, make it the prev and don't drop its - * task reference. - */ - filter->prev = current->seccomp.filter; - current->seccomp.filter = filter; - return 0; + return filter; + fail: kfree(filter); - return ret; + return ERR_PTR(ret); } /** - * seccomp_attach_user_filter - attaches a user-supplied sock_fprog + * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog * @user_filter: pointer to the user data containing a sock_fprog. * * Returns 0 on success and non-zero otherwise. */ -static long seccomp_attach_user_filter(const char __user *user_filter) +static struct seccomp_filter * +seccomp_prepare_user_filter(const char __user *user_filter) { struct sock_fprog fprog; - long ret = -EFAULT; + struct seccomp_filter *filter = ERR_PTR(-EFAULT); #ifdef CONFIG_COMPAT if (is_compat_task()) { @@ -326,9 +323,39 @@ static long seccomp_attach_user_filter(const char __user *user_filter) #endif if (copy_from_user(&fprog, user_filter, sizeof(fprog))) goto out; - ret = seccomp_attach_filter(&fprog); + filter = seccomp_prepare_filter(&fprog); out: - return ret; + return filter; +} + +/** + * seccomp_attach_filter: validate and attach filter + * @flags: flags to change filter behavior + * @filter: seccomp filter to add to the current process + * + * Returns 0 on success, -ve on error. + */ +static long seccomp_attach_filter(unsigned int flags, + struct seccomp_filter *filter) +{ + unsigned long total_insns; + struct seccomp_filter *walker; + + /* Validate resulting filter length. */ + total_insns = filter->len; + for (walker = current->seccomp.filter; walker; walker = walker->prev) + total_insns += walker->len + 4; /* 4 instr penalty */ + if (total_insns > MAX_INSNS_PER_PATH) + return -ENOMEM; + + /* + * If there is an existing filter, make it the prev and don't drop its + * task reference. + */ + filter->prev = current->seccomp.filter; + current->seccomp.filter = filter; + + return 0; } /* get_seccomp_filter - increments the reference count of the filter on @tsk */ @@ -341,6 +368,13 @@ void get_seccomp_filter(struct task_struct *tsk) atomic_inc(&orig->usage); } +static inline void seccomp_filter_free(struct seccomp_filter *filter) +{ + if (filter) { + kfree(filter); + } +} + /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ void put_seccomp_filter(struct task_struct *tsk) { @@ -349,7 +383,7 @@ void put_seccomp_filter(struct task_struct *tsk) while (orig && atomic_dec_and_test(&orig->usage)) { struct seccomp_filter *freeme = orig; orig = orig->prev; - kfree(freeme); + seccomp_filter_free(freeme); } } @@ -527,21 +561,30 @@ static long seccomp_set_mode_filter(unsigned int flags, const char __user *filter) { const unsigned long seccomp_mode = SECCOMP_MODE_FILTER; + struct seccomp_filter *prepared = NULL; long ret = -EINVAL; /* Validate flags. */ if (flags != 0) goto out; + /* Prepare the new filter before holding any locks. */ + prepared = seccomp_prepare_user_filter(filter); + if (IS_ERR(prepared)) + return PTR_ERR(prepared); + if (!seccomp_may_assign_mode(seccomp_mode)) goto out; - ret = seccomp_attach_user_filter(filter); + ret = seccomp_attach_filter(flags, prepared); if (ret) goto out; + /* Do not free the successfully attached filter. */ + prepared = NULL; seccomp_assign_mode(seccomp_mode); out: + seccomp_filter_free(prepared); return ret; } #else From 61b6b882a0abfeb627d25a069cfa1d232b84c8eb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 27 Jun 2014 15:18:48 -0700 Subject: [PATCH 60/69] seccomp: introduce writer locking Normally, task_struct.seccomp.filter is only ever read or modified by the task that owns it (current). This property aids in fast access during system call filtering as read access is lockless. Updating the pointer from another task, however, opens up race conditions. To allow cross-thread filter pointer updates, writes to the seccomp fields are now protected by the sighand spinlock (which is shared by all threads in the thread group). Read access remains lockless because pointer updates themselves are atomic. However, writes (or cloning) often entail additional checking (like maximum instruction counts) which require locking to perform safely. In the case of cloning threads, the child is invisible to the system until it enters the task list. To make sure a child can't be cloned from a thread and left in a prior state, seccomp duplication is additionally moved under the sighand lock. Then parent and child are certain have the same seccomp state when they exit the lock. Based on patches by Will Drewry and David Drysdale. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski Conflicts: kernel/fork.c --- include/linux/seccomp.h | 6 ++--- kernel/fork.c | 49 ++++++++++++++++++++++++++++++++++++++++- kernel/seccomp.c | 16 +++++++++++++- 3 files changed, 66 insertions(+), 5 deletions(-) diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 6f19cfd1840e..9ab63a574d40 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -14,11 +14,11 @@ struct seccomp_filter; * * @mode: indicates one of the valid values above for controlled * system calls available to a process. - * @filter: The metadata and ruleset for determining what system calls - * are allowed for a task. + * @filter: must always point to a valid seccomp-filter or NULL as it is + * accessed without locking during system call entry. * * @filter must only be accessed from the context of current as there - * is no locking. + * is no read locking. */ struct seccomp { int mode; diff --git a/kernel/fork.c b/kernel/fork.c index 32a78ff21949..a0abbb536e9a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -327,6 +327,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) goto free_ti; tsk->stack = ti; +#ifdef CONFIG_SECCOMP + /* + * We must handle setting up seccomp filters once we're under + * the sighand lock in case orig has changed between now and + * then. Until then, filter must be NULL to avoid messing up + * the usage counts on the error path calling free_task. + */ + tsk->seccomp.filter = NULL; +#endif setup_thread_stack(tsk, orig); clear_user_return_notifier(tsk); @@ -1107,6 +1116,39 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) p->flags = new_flags; } +static void copy_seccomp(struct task_struct *p) +{ +#ifdef CONFIG_SECCOMP + /* + * Must be called with sighand->lock held, which is common to + * all threads in the group. Holding cred_guard_mutex is not + * needed because this new task is not yet running and cannot + * be racing exec. + */ + BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + + /* Ref-count the new filter user, and assign it. */ + get_seccomp_filter(current); + p->seccomp = current->seccomp; + + /* + * Explicitly enable no_new_privs here in case it got set + * between the task_struct being duplicated and holding the + * sighand lock. The seccomp state and nnp must be in sync. + */ + if (task_no_new_privs(current)) + task_set_no_new_privs(p); + + /* + * If the parent gained a seccomp mode after copying thread + * flags and between before we held the sighand lock, we have + * to manually enable the seccomp thread flag here. + */ + if (p->seccomp.mode != SECCOMP_MODE_DISABLED) + set_tsk_thread_flag(p, TIF_SECCOMP); +#endif +} + SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) { current->clear_child_tid = tidptr; @@ -1210,7 +1252,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto fork_out; ftrace_graph_init_task(p); - get_seccomp_filter(p); rt_mutex_init_task(p); @@ -1452,6 +1493,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, spin_lock(¤t->sighand->siglock); + /* + * Copy seccomp details explicitly here, in case they were changed + * before holding sighand lock. + */ + copy_seccomp(p); + /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 1afc10b05eca..421d0f87ffed 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -224,6 +224,8 @@ static u32 seccomp_run_filters(int syscall) static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) { + BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + if (current->seccomp.mode && current->seccomp.mode != seccomp_mode) return false; @@ -232,6 +234,8 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) static inline void seccomp_assign_mode(unsigned long seccomp_mode) { + BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + current->seccomp.mode = seccomp_mode; set_tsk_thread_flag(current, TIF_SECCOMP); } @@ -333,6 +337,8 @@ seccomp_prepare_user_filter(const char __user *user_filter) * @flags: flags to change filter behavior * @filter: seccomp filter to add to the current process * + * Caller must be holding current->sighand->siglock lock. + * * Returns 0 on success, -ve on error. */ static long seccomp_attach_filter(unsigned int flags, @@ -341,6 +347,8 @@ static long seccomp_attach_filter(unsigned int flags, unsigned long total_insns; struct seccomp_filter *walker; + BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + /* Validate resulting filter length. */ total_insns = filter->len; for (walker = current->seccomp.filter; walker; walker = walker->prev) @@ -529,6 +537,8 @@ static long seccomp_set_mode_strict(void) const unsigned long seccomp_mode = SECCOMP_MODE_STRICT; long ret = -EINVAL; + spin_lock_irq(¤t->sighand->siglock); + if (!seccomp_may_assign_mode(seccomp_mode)) goto out; @@ -539,6 +549,7 @@ static long seccomp_set_mode_strict(void) ret = 0; out: + spin_unlock_irq(¤t->sighand->siglock); return ret; } @@ -566,13 +577,15 @@ static long seccomp_set_mode_filter(unsigned int flags, /* Validate flags. */ if (flags != 0) - goto out; + return -EINVAL; /* Prepare the new filter before holding any locks. */ prepared = seccomp_prepare_user_filter(filter); if (IS_ERR(prepared)) return PTR_ERR(prepared); + spin_lock_irq(¤t->sighand->siglock); + if (!seccomp_may_assign_mode(seccomp_mode)) goto out; @@ -584,6 +597,7 @@ static long seccomp_set_mode_filter(unsigned int flags, seccomp_assign_mode(seccomp_mode); out: + spin_unlock_irq(¤t->sighand->siglock); seccomp_filter_free(prepared); return ret; } From c852ef778224ecf5fe995d74ad96087038778bca Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 27 Jun 2014 15:01:35 -0700 Subject: [PATCH 61/69] seccomp: allow mode setting across threads This changes the mode setting helper to allow threads to change the seccomp mode from another thread. We must maintain barriers to keep TIF_SECCOMP synchronized with the rest of the seccomp state. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski Conflicts: kernel/seccomp.c --- kernel/seccomp.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 421d0f87ffed..5f2962e4aee4 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -202,19 +202,23 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) */ static u32 seccomp_run_filters(int syscall) { - struct seccomp_filter *f; + struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter); u32 ret = SECCOMP_RET_ALLOW; /* Ensure unexpected behavior doesn't result in failing open. */ - if (WARN_ON(current->seccomp.filter == NULL)) + if (unlikely(WARN_ON(f == NULL))) return SECCOMP_RET_KILL; + /* Make sure cross-thread synced filter points somewhere sane. */ + smp_read_barrier_depends(); + /* * All filters in the list are evaluated and the lowest BPF return * value always takes priority (ignoring the DATA). */ - for (f = current->seccomp.filter; f; f = f->prev) { + for (; f; f = f->prev) { u32 cur_ret = sk_run_filter(NULL, f->insns); + if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) ret = cur_ret; } @@ -232,12 +236,18 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) return true; } -static inline void seccomp_assign_mode(unsigned long seccomp_mode) +static inline void seccomp_assign_mode(struct task_struct *task, + unsigned long seccomp_mode) { - BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + BUG_ON(!spin_is_locked(&task->sighand->siglock)); - current->seccomp.mode = seccomp_mode; - set_tsk_thread_flag(current, TIF_SECCOMP); + task->seccomp.mode = seccomp_mode; + /* + * Make sure TIF_SECCOMP cannot be set before the mode (and + * filter) is set. + */ + smp_mb(); + set_tsk_thread_flag(task, TIF_SECCOMP); } #ifdef CONFIG_SECCOMP_FILTER @@ -435,12 +445,17 @@ static int mode1_syscalls_32[] = { int __secure_computing(int this_syscall) { - int mode = current->seccomp.mode; int exit_sig = 0; int *syscall; u32 ret; - switch (mode) { + /* + * Make sure that any changes to mode from another thread have + * been seen after TIF_SECCOMP was seen. + */ + rmb(); + + switch (current->seccomp.mode) { case SECCOMP_MODE_STRICT: syscall = mode1_syscalls; #ifdef CONFIG_COMPAT @@ -545,7 +560,7 @@ static long seccomp_set_mode_strict(void) #ifdef TIF_NOTSC disable_TSC(); #endif - seccomp_assign_mode(seccomp_mode); + seccomp_assign_mode(current, seccomp_mode); ret = 0; out: @@ -595,7 +610,7 @@ static long seccomp_set_mode_filter(unsigned int flags, /* Do not free the successfully attached filter. */ prepared = NULL; - seccomp_assign_mode(seccomp_mode); + seccomp_assign_mode(current, seccomp_mode); out: spin_unlock_irq(¤t->sighand->siglock); seccomp_filter_free(prepared); From f14a5db2398afed8f416d244e6da6b23940997c6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 5 Jun 2014 00:23:17 -0700 Subject: [PATCH 62/69] seccomp: implement SECCOMP_FILTER_FLAG_TSYNC Applying restrictive seccomp filter programs to large or diverse codebases often requires handling threads which may be started early in the process lifetime (e.g., by code that is linked in). While it is possible to apply permissive programs prior to process start up, it is difficult to further restrict the kernel ABI to those threads after that point. This change adds a new seccomp syscall flag to SECCOMP_SET_MODE_FILTER for synchronizing thread group seccomp filters at filter installation time. When calling seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, filter) an attempt will be made to synchronize all threads in current's threadgroup to its new seccomp filter program. This is possible iff all threads are using a filter that is an ancestor to the filter current is attempting to synchronize to. NULL filters (where the task is running as SECCOMP_MODE_NONE) are also treated as ancestors allowing threads to be transitioned into SECCOMP_MODE_FILTER. If prctrl(PR_SET_NO_NEW_PRIVS, ...) has been set on the calling thread, no_new_privs will be set for all synchronized threads too. On success, 0 is returned. On failure, the pid of one of the failing threads will be returned and no filters will have been applied. The race conditions against another thread are: - requesting TSYNC (already handled by sighand lock) - performing a clone (already handled by sighand lock) - changing its filter (already handled by sighand lock) - calling exec (handled by cred_guard_mutex) The clone case is assisted by the fact that new threads will have their seccomp state duplicated from their parent before appearing on the tasklist. Holding cred_guard_mutex means that seccomp filters cannot be assigned while in the middle of another thread's exec (potentially bypassing no_new_privs or similar). The call to de_thread() may kill threads waiting for the mutex. Changes across threads to the filter pointer includes a barrier. Based on patches by Will Drewry. Suggested-by: Julien Tinnes Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski --- fs/exec.c | 2 +- include/linux/seccomp.h | 2 + include/uapi/linux/seccomp.h | 3 + kernel/seccomp.c | 135 ++++++++++++++++++++++++++++++++++- 4 files changed, 140 insertions(+), 2 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index cb5fb9aa320e..c568bdce6413 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1220,7 +1220,7 @@ EXPORT_SYMBOL(install_exec_creds); /* * determine how safe it is to execute the proposed program * - the caller must hold ->cred_guard_mutex to protect against - * PTRACE_ATTACH + * PTRACE_ATTACH or seccomp thread-sync */ static int check_unsafe_exec(struct linux_binprm *bprm) { diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 9ab63a574d40..9687691799ff 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -3,6 +3,8 @@ #include +#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC) + #ifdef CONFIG_SECCOMP #include diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index b258878ba754..0f238a43ff1e 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -14,6 +14,9 @@ #define SECCOMP_SET_MODE_STRICT 0 #define SECCOMP_SET_MODE_FILTER 1 +/* Valid flags for SECCOMP_SET_MODE_FILTER */ +#define SECCOMP_FILTER_FLAG_TSYNC 1 + /* * All BPF programs must return a 32-bit value. * The bottom 16-bits are for optional return data. diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 5f2962e4aee4..ebdaaf427de2 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -26,6 +26,7 @@ #ifdef CONFIG_SECCOMP_FILTER #include #include +#include #include #include #include @@ -251,6 +252,114 @@ static inline void seccomp_assign_mode(struct task_struct *task, } #ifdef CONFIG_SECCOMP_FILTER +/* Returns 1 if the parent is an ancestor of the child. */ +static int is_ancestor(struct seccomp_filter *parent, + struct seccomp_filter *child) +{ + /* NULL is the root ancestor. */ + if (parent == NULL) + return 1; + for (; child; child = child->prev) + if (child == parent) + return 1; + return 0; +} + +/** + * seccomp_can_sync_threads: checks if all threads can be synchronized + * + * Expects sighand and cred_guard_mutex locks to be held. + * + * Returns 0 on success, -ve on error, or the pid of a thread which was + * either not in the correct seccomp mode or it did not have an ancestral + * seccomp filter. + */ +static inline pid_t seccomp_can_sync_threads(void) +{ + struct task_struct *thread, *caller; + + BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); + BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + + /* Validate all threads being eligible for synchronization. */ + caller = current; + for_each_thread(caller, thread) { + pid_t failed; + + /* Skip current, since it is initiating the sync. */ + if (thread == caller) + continue; + + if (thread->seccomp.mode == SECCOMP_MODE_DISABLED || + (thread->seccomp.mode == SECCOMP_MODE_FILTER && + is_ancestor(thread->seccomp.filter, + caller->seccomp.filter))) + continue; + + /* Return the first thread that cannot be synchronized. */ + failed = task_pid_vnr(thread); + /* If the pid cannot be resolved, then return -ESRCH */ + if (unlikely(WARN_ON(failed == 0))) + failed = -ESRCH; + return failed; + } + + return 0; +} + +/** + * seccomp_sync_threads: sets all threads to use current's filter + * + * Expects sighand and cred_guard_mutex locks to be held, and for + * seccomp_can_sync_threads() to have returned success already + * without dropping the locks. + * + */ +static inline void seccomp_sync_threads(void) +{ + struct task_struct *thread, *caller; + + BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); + BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + + /* Synchronize all threads. */ + caller = current; + for_each_thread(caller, thread) { + /* Skip current, since it needs no changes. */ + if (thread == caller) + continue; + + /* Get a task reference for the new leaf node. */ + get_seccomp_filter(caller); + /* + * Drop the task reference to the shared ancestor since + * current's path will hold a reference. (This also + * allows a put before the assignment.) + */ + put_seccomp_filter(thread); + smp_store_release(&thread->seccomp.filter, + caller->seccomp.filter); + /* + * Opt the other thread into seccomp if needed. + * As threads are considered to be trust-realm + * equivalent (see ptrace_may_access), it is safe to + * allow one thread to transition the other. + */ + if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) { + /* + * Don't let an unprivileged task work around + * the no_new_privs restriction by creating + * a thread that sets it up, enters seccomp, + * then dies. + */ + if (task_no_new_privs(caller)) + task_set_no_new_privs(thread); + + seccomp_assign_mode(thread, SECCOMP_MODE_FILTER); + } + } +} + /** * seccomp_prepare_filter: Prepares a seccomp filter for use. * @fprog: BPF program to install @@ -366,6 +475,15 @@ static long seccomp_attach_filter(unsigned int flags, if (total_insns > MAX_INSNS_PER_PATH) return -ENOMEM; + /* If thread sync has been requested, check that it is possible. */ + if (flags & SECCOMP_FILTER_FLAG_TSYNC) { + int ret; + + ret = seccomp_can_sync_threads(); + if (ret) + return ret; + } + /* * If there is an existing filter, make it the prev and don't drop its * task reference. @@ -373,6 +491,10 @@ static long seccomp_attach_filter(unsigned int flags, filter->prev = current->seccomp.filter; current->seccomp.filter = filter; + /* Now that the new filter is in place, synchronize to all threads. */ + if (flags & SECCOMP_FILTER_FLAG_TSYNC) + seccomp_sync_threads(); + return 0; } @@ -591,7 +713,7 @@ static long seccomp_set_mode_filter(unsigned int flags, long ret = -EINVAL; /* Validate flags. */ - if (flags != 0) + if (flags & ~SECCOMP_FILTER_FLAG_MASK) return -EINVAL; /* Prepare the new filter before holding any locks. */ @@ -599,6 +721,14 @@ static long seccomp_set_mode_filter(unsigned int flags, if (IS_ERR(prepared)) return PTR_ERR(prepared); + /* + * Make sure we cannot change seccomp or nnp state via TSYNC + * while another thread is in the middle of calling exec. + */ + if (flags & SECCOMP_FILTER_FLAG_TSYNC && + mutex_lock_killable(¤t->signal->cred_guard_mutex)) + goto out_free; + spin_lock_irq(¤t->sighand->siglock); if (!seccomp_may_assign_mode(seccomp_mode)) @@ -613,6 +743,9 @@ static long seccomp_set_mode_filter(unsigned int flags, seccomp_assign_mode(current, seccomp_mode); out: spin_unlock_irq(¤t->sighand->siglock); + if (flags & SECCOMP_FILTER_FLAG_TSYNC) + mutex_unlock(¤t->signal->cred_guard_mutex); +out_free: seccomp_filter_free(prepared); return ret; } From 9ac860041db860a59bfd6ac82b31d6b6f76ebb52 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 10 Aug 2014 20:50:30 -0700 Subject: [PATCH 63/69] seccomp: Replace BUG(!spin_is_locked()) with assert_spin_lock Current upstream kernel hangs with mips and powerpc targets in uniprocessor mode if SECCOMP is configured. Bisect points to commit dbd952127d11 ("seccomp: introduce writer locking"). Turns out that code such as BUG_ON(!spin_is_locked(&list_lock)); can not be used in uniprocessor mode because spin_is_locked() always returns false in this configuration, and that assert_spin_locked() exists for that very purpose and must be used instead. Fixes: dbd952127d11 ("seccomp: introduce writer locking") Cc: Kees Cook Signed-off-by: Guenter Roeck Signed-off-by: Kees Cook --- kernel/fork.c | 2 +- kernel/seccomp.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index a0abbb536e9a..0ff07d94e07c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1125,7 +1125,7 @@ static void copy_seccomp(struct task_struct *p) * needed because this new task is not yet running and cannot * be racing exec. */ - BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + assert_spin_locked(¤t->sighand->siglock); /* Ref-count the new filter user, and assign it. */ get_seccomp_filter(current); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index ebdaaf427de2..1fbb1a2bc459 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -229,7 +229,7 @@ static u32 seccomp_run_filters(int syscall) static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) { - BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + assert_spin_locked(¤t->sighand->siglock); if (current->seccomp.mode && current->seccomp.mode != seccomp_mode) return false; @@ -240,7 +240,7 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) static inline void seccomp_assign_mode(struct task_struct *task, unsigned long seccomp_mode) { - BUG_ON(!spin_is_locked(&task->sighand->siglock)); + assert_spin_locked(&task->sighand->siglock); task->seccomp.mode = seccomp_mode; /* @@ -279,7 +279,7 @@ static inline pid_t seccomp_can_sync_threads(void) struct task_struct *thread, *caller; BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); - BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + assert_spin_locked(¤t->sighand->siglock); /* Validate all threads being eligible for synchronization. */ caller = current; @@ -320,7 +320,7 @@ static inline void seccomp_sync_threads(void) struct task_struct *thread, *caller; BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); - BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + assert_spin_locked(¤t->sighand->siglock); /* Synchronize all threads. */ caller = current; @@ -466,7 +466,7 @@ static long seccomp_attach_filter(unsigned int flags, unsigned long total_insns; struct seccomp_filter *walker; - BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + assert_spin_locked(¤t->sighand->siglock); /* Validate resulting filter length. */ total_insns = filter->len; From 900e9fd0d5d15c596cacfb89ce007c933cea6e1c Mon Sep 17 00:00:00 2001 From: Lee Campbell Date: Wed, 8 Oct 2014 14:40:22 -0700 Subject: [PATCH 64/69] seccomp: fix syscall numbers for x86 and x86_64 Correcting syscall numbers for seccomp Signed-off-by: Lee Campbell --- arch/x86/syscalls/syscall_32.tbl | 5 ++++- arch/x86/syscalls/syscall_64.tbl | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index 8605e9e0f19a..01ed50255473 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -357,4 +357,7 @@ 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev 349 i386 kcmp sys_kcmp 350 i386 finit_module sys_finit_module -351 i386 seccomp sys_seccomp +# 351 i386 sched_setattr sys_sched_setattr +# 352 i386 sched_getattr sys_sched_getattr +# 353 i386 renameat2 sys_renameat2 +354 i386 seccomp sys_seccomp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 4cb491567b85..a3c38bbd6f01 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -320,7 +320,10 @@ 311 64 process_vm_writev sys_process_vm_writev 312 common kcmp sys_kcmp 313 common finit_module sys_finit_module -314 common seccomp sys_seccomp +# 314 common sched_setattr sys_sched_setattr +# 315 common sched_getattr sys_sched_getattr +# 316 common renameat2 sys_renameat2 +317 common seccomp sys_seccomp # # x32-specific system call numbers start at 512 to avoid cache impact From a9ba4285aa5722a3b4d84888e78ba8adc0046b28 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 10 Jun 2014 15:40:23 -0700 Subject: [PATCH 65/69] ARM: add seccomp syscall Wires up the new seccomp syscall. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Conflicts: arch/arm/include/uapi/asm/unistd.h arch/arm/kernel/calls.S Signed-off-by: Lee Campbell --- arch/arm/include/asm/unistd.h | 2 +- arch/arm/include/uapi/asm/unistd.h | 6 ++++++ arch/arm/kernel/calls.S | 5 +++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 141baa3f9a72..acabef1a75df 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -15,7 +15,7 @@ #include -#define __NR_syscalls (380) +#define __NR_syscalls (384) #define __ARM_NR_cmpxchg (__ARM_NR_BASE+0x00fff0) #define __ARCH_WANT_STAT64 diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h index af33b44990ed..17407c92c0da 100644 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -406,6 +406,12 @@ #define __NR_process_vm_writev (__NR_SYSCALL_BASE+377) #define __NR_kcmp (__NR_SYSCALL_BASE+378) #define __NR_finit_module (__NR_SYSCALL_BASE+379) +/* Reserve for later +#define __NR_sched_setattr (__NR_SYSCALL_BASE+380) +#define __NR_sched_getattr (__NR_SYSCALL_BASE+381) +#define __NR_renameat2 (__NR_SYSCALL_BASE+382) +*/ +#define __NR_seccomp (__NR_SYSCALL_BASE+383) /* * This may need to be greater than __NR_last_syscall+1 in order to diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index c6ca7e376773..725f844926ea 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -389,6 +389,11 @@ CALL(sys_process_vm_writev) CALL(sys_kcmp) CALL(sys_finit_module) +/* 380 */ CALL(sys_ni_syscall) /* reserved sys_sched_setattr */ + CALL(sys_ni_syscall) /* reserved sys_sched_getattr */ + CALL(sys_ni_syscall) /* reserved sys_renameat2 */ + CALL(sys_seccomp) + #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted From 41900903483eb96602dd72e719a798c208118aad Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 27 Jun 2014 17:01:47 +0100 Subject: [PATCH 66/69] ARM: 8087/1: ptrace: reload syscall number after secure_computing() check On the syscall tracing path, we call out to secure_computing() to allow seccomp to check the syscall number being attempted. As part of this, a SIGTRAP may be sent to the tracer and the syscall could be re-written by a subsequent SET_SYSCALL ptrace request. Unfortunately, this new syscall is ignored by the current code unless TIF_SYSCALL_TRACE is also set on the current thread. This patch slightly reworks the enter path of the syscall tracing code so that we always reload the syscall number from current_thread_info()->syscall after the potential ptrace traps. Acked-by: Kees Cook Tested-by: Kees Cook Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/ptrace.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 03deeffd9f6d..394424b25254 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -916,7 +916,7 @@ enum ptrace_syscall_dir { PTRACE_SYSCALL_EXIT, }; -static int tracehook_report_syscall(struct pt_regs *regs, +static void tracehook_report_syscall(struct pt_regs *regs, enum ptrace_syscall_dir dir) { unsigned long ip; @@ -934,7 +934,6 @@ static int tracehook_report_syscall(struct pt_regs *regs, current_thread_info()->syscall = -1; regs->ARM_ip = ip; - return current_thread_info()->syscall; } asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) @@ -946,7 +945,9 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) return -1; if (test_thread_flag(TIF_SYSCALL_TRACE)) - scno = tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + + scno = current_thread_info()->syscall; if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, scno); From 5ffb57932ec09ebc9956a1c74f703ca8389e3a90 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Tue, 14 Oct 2014 17:43:21 -0700 Subject: [PATCH 67/69] power: Avoids bogus error messages for the suspend aborts. Avoids printing bogus error message "tasks refusing to freeze", in cases where pending wakeup source caused the suspend abort. Signed-off-by: Ruchi Kandoi Change-Id: I913ad290f501b31cd536d039834c8d24c6f16928 --- kernel/power/process.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/kernel/power/process.c b/kernel/power/process.c index fc0df8486449..d26dcb5dff87 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -82,23 +82,24 @@ static int try_to_freeze_tasks(bool user_only) do_div(elapsed_msecs64, NSEC_PER_MSEC); elapsed_msecs = elapsed_msecs64; - if (todo) { + if (wakeup) { printk("\n"); - printk(KERN_ERR "Freezing of tasks %s after %d.%03d seconds " - "(%d tasks refusing to freeze, wq_busy=%d):\n", - wakeup ? "aborted" : "failed", + printk(KERN_ERR "Freezing of tasks aborted after %d.%03d seconds", + elapsed_msecs / 1000, elapsed_msecs % 1000); + } else if (todo) { + printk("\n"); + printk(KERN_ERR "Freezing of tasks failed after %d.%03d seconds" + " (%d tasks refusing to freeze, wq_busy=%d):\n", elapsed_msecs / 1000, elapsed_msecs % 1000, todo - wq_busy, wq_busy); - if (!wakeup) { - read_lock(&tasklist_lock); - do_each_thread(g, p) { - if (p != current && !freezer_should_skip(p) - && freezing(p) && !frozen(p)) - sched_show_task(p); - } while_each_thread(g, p); - read_unlock(&tasklist_lock); - } + read_lock(&tasklist_lock); + do_each_thread(g, p) { + if (p != current && !freezer_should_skip(p) + && freezing(p) && !frozen(p)) + sched_show_task(p); + } while_each_thread(g, p); + read_unlock(&tasklist_lock); } else { printk("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000, elapsed_msecs % 1000); From 13224a7d4e4e4ac6ade52deec32e9f9c28533659 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Tue, 21 Oct 2014 13:55:04 -0700 Subject: [PATCH 68/69] cpufreq: Avoid using global variable total_cpus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The change is to compile on kernels where cpufreq stats are compiled as a module (CONFIG_CPU_FREQ_STAT=m), because total_cpus is not exported for module use. Reported-By: Emilio López Signed-off-by: Ruchi Kandoi Change-Id: I4f3c74f0fac5e8d9449655b26bf3b407b0fe4290 --- drivers/cpufreq/cpufreq_stats.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 7a2bcac3ad7f..d811f5d4b32b 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -282,19 +282,19 @@ static void cpufreq_stats_free_sysfs(unsigned int cpu) static void cpufreq_allstats_free(void) { - int i; + int cpu; struct all_cpufreq_stats *all_stat; sysfs_remove_file(cpufreq_global_kobject, &_attr_all_time_in_state.attr); - for (i = 0; i < total_cpus; i++) { - all_stat = per_cpu(all_cpufreq_stats, i); + for_each_possible_cpu(cpu) { + all_stat = per_cpu(all_cpufreq_stats, cpu); if (!all_stat) continue; kfree(all_stat->time_in_state); kfree(all_stat); - per_cpu(all_cpufreq_stats, i) = NULL; + per_cpu(all_cpufreq_stats, cpu) = NULL; } if (all_freq_table) { kfree(all_freq_table->freq_table); From 7af7a7d021416dfdbb30e6b31957297d484ebb97 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Wed, 29 Oct 2014 10:36:27 -0700 Subject: [PATCH 69/69] power: Adds functionality to log the last suspend abort reason. Extends the last_resume_reason to log suspend abort reason. The abort reasons will have "Abort:" appended at the start to distinguish itself from the resume reason. Signed-off-by: Ruchi Kandoi Change-Id: I3207f1844e3d87c706dfc298fb10e1c648814c5f --- drivers/base/power/main.c | 13 +++++++++++ drivers/base/power/wakeup.c | 16 +++++++++++++ drivers/base/syscore.c | 3 +++ include/linux/suspend.h | 2 +- include/linux/wakeup_reason.h | 4 +++- kernel/irq/pm.c | 7 +++++- kernel/power/process.c | 6 ++++- kernel/power/suspend.c | 19 +++++++++++++--- kernel/power/wakeup_reason.c | 42 ++++++++++++++++++++++++++++------- 9 files changed, 97 insertions(+), 15 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 6a33dd85c044..5131ad8ca17f 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "../base.h" #include "power.h" @@ -938,6 +939,7 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state) static int dpm_suspend_noirq(pm_message_t state) { ktime_t starttime = ktime_get(); + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; int error = 0; cpuidle_pause(); @@ -965,6 +967,9 @@ static int dpm_suspend_noirq(pm_message_t state) put_device(dev); if (pm_wakeup_pending()) { + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); error = -EBUSY; break; } @@ -1023,6 +1028,7 @@ static int device_suspend_late(struct device *dev, pm_message_t state) static int dpm_suspend_late(pm_message_t state) { ktime_t starttime = ktime_get(); + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; int error = 0; mutex_lock(&dpm_list_mtx); @@ -1048,6 +1054,9 @@ static int dpm_suspend_late(pm_message_t state) put_device(dev); if (pm_wakeup_pending()) { + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); error = -EBUSY; break; } @@ -1115,6 +1124,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) char *info = NULL; int error = 0; struct dpm_watchdog wd; + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; dpm_wait_for_children(dev, async); @@ -1131,6 +1141,9 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) pm_wakeup_event(dev, 0); if (pm_wakeup_pending()) { + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); async_error = -EBUSY; goto Complete; } diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 79715e7fa43e..bea700736f24 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -659,6 +659,22 @@ void pm_wakeup_event(struct device *dev, unsigned int msec) } EXPORT_SYMBOL_GPL(pm_wakeup_event); +void pm_get_active_wakeup_sources(char *pending_wakeup_source, size_t max) +{ + struct wakeup_source *ws; + int len = 0; + rcu_read_lock(); + len += snprintf(pending_wakeup_source, max, "Pending Wakeup Sources: "); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) { + if (ws->active) { + len += snprintf(pending_wakeup_source + len, max, + "%s ", ws->name); + } + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(pm_get_active_wakeup_sources); + static void print_active_wakeup_sources(void) { struct wakeup_source *ws; diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c index e8d11b6630ee..0ab546558c4e 100644 --- a/drivers/base/syscore.c +++ b/drivers/base/syscore.c @@ -10,6 +10,7 @@ #include #include #include +#include static LIST_HEAD(syscore_ops_list); static DEFINE_MUTEX(syscore_ops_lock); @@ -73,6 +74,8 @@ int syscore_suspend(void) return 0; err_out: + log_suspend_abort_reason("System core suspend callback %pF failed", + ops->suspend); pr_err("PM: System core suspend callback %pF failed.\n", ops->suspend); list_for_each_entry_continue(ops, &syscore_ops_list, node) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index d4e3f16d5e89..a34821358ae5 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -363,7 +363,7 @@ extern bool pm_wakeup_pending(void); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); extern void pm_wakep_autosleep_enabled(bool set); - +extern void pm_get_active_wakeup_sources(char *pending_sources, size_t max); static inline void lock_system_sleep(void) { current->flags |= PF_FREEZER_SKIP; diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h index 7ce50f0debc4..5f095da2c977 100644 --- a/include/linux/wakeup_reason.h +++ b/include/linux/wakeup_reason.h @@ -18,6 +18,8 @@ #ifndef _LINUX_WAKEUP_REASON_H #define _LINUX_WAKEUP_REASON_H -void log_wakeup_reason(int irq); +#define MAX_SUSPEND_ABORT_LEN 256 +void log_wakeup_reason(int irq); +void log_suspend_abort_reason(const char *fmt, ...); #endif /* _LINUX_WAKEUP_REASON_H */ diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index fe4b09cf829c..08d0916150d5 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -10,7 +10,7 @@ #include #include #include - +#include #include "internals.h" /** @@ -100,11 +100,16 @@ EXPORT_SYMBOL_GPL(resume_device_irqs); int check_wakeup_irqs(void) { struct irq_desc *desc; + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; int irq; for_each_irq_desc(irq, desc) { if (irqd_is_wakeup_set(&desc->irq_data)) { if (desc->istate & IRQS_PENDING) { + log_suspend_abort_reason("Wakeup IRQ %d %s pending", + irq, + desc->action && desc->action->name ? + desc->action->name : ""); pr_info("Wakeup IRQ %d %s pending, suspend aborted\n", irq, desc->action && desc->action->name ? diff --git a/kernel/power/process.c b/kernel/power/process.c index d26dcb5dff87..86a40fa35095 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -17,7 +17,7 @@ #include #include #include - +#include /* * Timeout for stopping processes */ @@ -34,6 +34,7 @@ static int try_to_freeze_tasks(bool user_only) unsigned int elapsed_msecs; bool wakeup = false; int sleep_usecs = USEC_PER_MSEC; + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; do_gettimeofday(&start); @@ -63,6 +64,9 @@ static int try_to_freeze_tasks(bool user_only) break; if (pm_wakeup_pending()) { + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); wakeup = true; break; } diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 454568e6c8d2..7c53fea31cba 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "power.h" @@ -147,7 +148,7 @@ static int suspend_prepare(suspend_state_t state) error = suspend_freeze_processes(); if (!error) return 0; - + log_suspend_abort_reason("One or more tasks refusing to freeze"); suspend_stats.failed_freeze++; dpm_save_failed_step(SUSPEND_FREEZE); Finish: @@ -177,7 +178,8 @@ void __attribute__ ((weak)) arch_suspend_enable_irqs(void) */ static int suspend_enter(suspend_state_t state, bool *wakeup) { - int error; + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; + int error, last_dev; if (need_suspend_ops(state) && suspend_ops->prepare) { error = suspend_ops->prepare(); @@ -187,7 +189,11 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) error = dpm_suspend_end(PMSG_SUSPEND); if (error) { + last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; + last_dev %= REC_FAILED_NUM; printk(KERN_ERR "PM: Some devices failed to power down\n"); + log_suspend_abort_reason("%s device failed to power down", + suspend_stats.failed_devs[last_dev]); goto Platform_finish; } @@ -212,8 +218,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) } error = disable_nonboot_cpus(); - if (error || suspend_test(TEST_CPUS)) + if (error || suspend_test(TEST_CPUS)) { + log_suspend_abort_reason("Disabling non-boot cpus failed"); goto Enable_cpus; + } arch_suspend_disable_irqs(); BUG_ON(!irqs_disabled()); @@ -224,6 +232,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) if (!(suspend_test(TEST_CORE) || *wakeup)) { error = suspend_ops->enter(state); events_check_enabled = false; + } else { + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); } syscore_resume(); } @@ -271,6 +283,7 @@ int suspend_devices_and_enter(suspend_state_t state) error = dpm_suspend_start(PMSG_SUSPEND); if (error) { printk(KERN_ERR "PM: Some devices failed to suspend\n"); + log_suspend_abort_reason("Some devices failed to suspend"); goto Recover_platform; } suspend_test_finish("suspend devices"); diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c index 187e4e9105fb..2aacc34ef17c 100644 --- a/kernel/power/wakeup_reason.c +++ b/kernel/power/wakeup_reason.c @@ -31,6 +31,8 @@ #define MAX_WAKEUP_REASON_IRQS 32 static int irq_list[MAX_WAKEUP_REASON_IRQS]; static int irqcount; +static bool suspend_abort; +static char abort_reason[MAX_SUSPEND_ABORT_LEN]; static struct kobject *wakeup_reason; static spinlock_t resume_reason_lock; @@ -40,14 +42,18 @@ static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribu int irq_no, buf_offset = 0; struct irq_desc *desc; spin_lock(&resume_reason_lock); - for (irq_no = 0; irq_no < irqcount; irq_no++) { - desc = irq_to_desc(irq_list[irq_no]); - if (desc && desc->action && desc->action->name) - buf_offset += sprintf(buf + buf_offset, "%d %s\n", - irq_list[irq_no], desc->action->name); - else - buf_offset += sprintf(buf + buf_offset, "%d\n", - irq_list[irq_no]); + if (suspend_abort) { + buf_offset = sprintf(buf, "Abort: %s", abort_reason); + } else { + for (irq_no = 0; irq_no < irqcount; irq_no++) { + desc = irq_to_desc(irq_list[irq_no]); + if (desc && desc->action && desc->action->name) + buf_offset += sprintf(buf + buf_offset, "%d %s\n", + irq_list[irq_no], desc->action->name); + else + buf_offset += sprintf(buf + buf_offset, "%d\n", + irq_list[irq_no]); + } } spin_unlock(&resume_reason_lock); return buf_offset; @@ -89,6 +95,25 @@ void log_wakeup_reason(int irq) spin_unlock(&resume_reason_lock); } +void log_suspend_abort_reason(const char *fmt, ...) +{ + va_list args; + + spin_lock(&resume_reason_lock); + + //Suspend abort reason has already been logged. + if (suspend_abort) { + spin_unlock(&resume_reason_lock); + return; + } + + suspend_abort = true; + va_start(args, fmt); + snprintf(abort_reason, MAX_SUSPEND_ABORT_LEN, fmt, args); + va_end(args); + spin_unlock(&resume_reason_lock); +} + /* Detects a suspend and clears all the previous wake up reasons*/ static int wakeup_reason_pm_event(struct notifier_block *notifier, unsigned long pm_event, void *unused) @@ -97,6 +122,7 @@ static int wakeup_reason_pm_event(struct notifier_block *notifier, case PM_SUSPEND_PREPARE: spin_lock(&resume_reason_lock); irqcount = 0; + suspend_abort = false; spin_unlock(&resume_reason_lock); break; default: