From 488ca7d72d974e3c00ae73ed9f947590680bdf00 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Fri, 21 Aug 2015 14:56:46 -0700 Subject: [PATCH 01/96] x86/cpufeatures: Enable cpuid for Intel SHA extensions Add Intel CPUID for Intel Secure Hash Algorithm Extensions. This feature provides new instructions for accelerated computation of SHA-1 and SHA-256. This allows the feature to be shown in the /proc/cpuinfo for cpus that support it. Refer to SHA extension programming guide in chapter 8.2 of the Intel Architecture Instruction Set Extensions Programming reference for definition of this feature's cpuid: CPUID.(EAX=07H, ECX=0):EBX.SHA [bit 29] = 1 https://software.intel.com/sites/default/files/managed/07/b7/319433-023.pdf Originally-by: Chandramouli Narayanan Signed-off-by: Tim Chen Cc: Borislav Petkov Cc: Dave Hansen Cc: Herbert Xu Link: http://lkml.kernel.org/r/1440194206.3940.6.camel@schen9-mobl2 Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/cpufeature.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 3d6606fb97d0..a94f83d4272a 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -239,6 +239,7 @@ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ /* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ #define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ From 31e09b18c863718939e3e9c30eee55f9011d85ee Mon Sep 17 00:00:00 2001 From: Linda Knippers Date: Tue, 1 Sep 2015 15:41:55 -0400 Subject: [PATCH 02/96] x86/mm/srat: Print non-volatile flag in SRAT With the addition of NVDIMM support, a question came up as to whether NVDIMM ranges should be in the SRAT with this bit set. I think the consensus was no because the ranges are in the NFIT with proximity domain information there. ACPI is not clear on the meaning of this bit in the SRAT. If someone is setting it, we might want to ask them what they expect to happen with it. Right now this bit is only printed if all the ACPI debug information is turned on. Signed-off-by: Linda Knippers Acked-by: Thomas Gleixner Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150901194154.GA4939@ljkz400 Signed-off-by: Ingo Molnar --- arch/x86/mm/srat.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 66338a60aa6e..c2aea63bee20 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -192,10 +192,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) node_set(node, numa_nodes_parsed); - pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s\n", + pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n", node, pxm, (unsigned long long) start, (unsigned long long) end - 1, - hotpluggable ? " hotplug" : ""); + hotpluggable ? " hotplug" : "", + ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : ""); /* Mark hotplug range in memblock. */ if (hotpluggable && memblock_mark_hotplug(start, ma->length)) From 9642d18eee2cd169b60c6ac0f20bda745b5a3d1e Mon Sep 17 00:00:00 2001 From: Vatika Harlalka Date: Tue, 1 Sep 2015 16:50:59 +0200 Subject: [PATCH 03/96] nohz: Affine unpinned timers to housekeepers The problem addressed in this patch is about affining unpinned timers. Adaptive or Full Dynticks CPUs are currently disturbed by unnecessary jitter due to firing of such timers on them. This patch will affine timers to online CPUs which are not full dynticks in NOHZ_FULL configured systems. It should not introduce overhead in nohz full off case due to static keys. Signed-off-by: Vatika Harlalka Signed-off-by: Frederic Weisbecker Reviewed-by: Preeti U Murthy Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1441119060-2230-2-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/tick.h | 9 +++++++++ kernel/sched/core.c | 7 +++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/include/linux/tick.h b/include/linux/tick.h index 48d901f83f92..e312219ff823 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -147,11 +147,20 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) cpumask_or(mask, mask, tick_nohz_full_mask); } +static inline int housekeeping_any_cpu(void) +{ + return cpumask_any_and(housekeeping_mask, cpu_online_mask); +} + extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_cpu(int cpu); extern void tick_nohz_full_kick_all(void); extern void __tick_nohz_task_switch(void); #else +static inline int housekeeping_any_cpu(void) +{ + return smp_processor_id(); +} static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8b864ecee0e1..0902e4d72671 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -623,18 +623,21 @@ int get_nohz_timer_target(void) int i, cpu = smp_processor_id(); struct sched_domain *sd; - if (!idle_cpu(cpu)) + if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu)) return cpu; rcu_read_lock(); for_each_domain(cpu, sd) { for_each_cpu(i, sched_domain_span(sd)) { - if (!idle_cpu(i)) { + if (!idle_cpu(i) && is_housekeeping_cpu(cpu)) { cpu = i; goto unlock; } } } + + if (!is_housekeeping_cpu(cpu)) + cpu = housekeeping_any_cpu(); unlock: rcu_read_unlock(); return cpu; From 7c8bb6cb95061b3143759459ed6c6b0c73bcfecb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 1 Sep 2015 16:51:00 +0200 Subject: [PATCH 04/96] nohz: Assert existing housekeepers when nohz full enabled The code ensures that when nohz full is running, at least the boot CPU serves as a housekeeper and it can't be later offlined. Let's assert this assumption to make sure that we have CPUs to handle unbound jobs like workqueues and timers while nohz full CPUs run undisturbed. Also improve the comments on housekeeper offlining prevention. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Vatika Harlalka Link: http://lkml.kernel.org/r/1441119060-2230-3-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3319e16f31e5..7c7ec4515983 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -290,16 +290,17 @@ static int __init tick_nohz_full_setup(char *str) __setup("nohz_full=", tick_nohz_full_setup); static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) + unsigned long action, + void *hcpu) { unsigned int cpu = (unsigned long)hcpu; switch (action & ~CPU_TASKS_FROZEN) { case CPU_DOWN_PREPARE: /* - * If we handle the timekeeping duty for full dynticks CPUs, - * we can't safely shutdown that CPU. + * The boot CPU handles housekeeping duty (unbound timers, + * workqueues, timekeeping, ...) on behalf of full dynticks + * CPUs. It must remain online when nohz full is enabled. */ if (tick_nohz_full_running && tick_do_timer_cpu == cpu) return NOTIFY_BAD; @@ -370,6 +371,12 @@ void __init tick_nohz_init(void) cpu_notifier(tick_nohz_cpu_down_callback, 0); pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n", cpumask_pr_args(tick_nohz_full_mask)); + + /* + * We need at least one CPU to handle housekeeping work such + * as timekeeping, unbound timers, workqueues, ... + */ + WARN_ON_ONCE(cpumask_empty(housekeeping_mask)); } #endif From ac0e137ab0da80e8fc0db2027598e2f7f82a5a02 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 31 Aug 2015 20:27:32 -0700 Subject: [PATCH 05/96] clk: h8s2678: Fix compile error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent cleanup removed some include files without checking if the cleaned up code still compiles. This results in the following compile error. drivers/clk/h8300/clk-h8s2678.c: In function ‘h8s2678_pll_clk_setup’: drivers/clk/h8300/clk-h8s2678.c:99:14: error: implicit declaration of function ‘kzalloc’ drivers/clk/h8300/clk-h8s2678.c:138:2: error: implicit declaration of function ‘kfree’ Cc: Yoshinori Sato Signed-off-by: Guenter Roeck Signed-off-by: Stephen Boyd --- drivers/clk/h8300/clk-h8s2678.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/h8300/clk-h8s2678.c b/drivers/clk/h8300/clk-h8s2678.c index 2a38eb4a2552..6cf38dc1c929 100644 --- a/drivers/clk/h8300/clk-h8s2678.c +++ b/drivers/clk/h8300/clk-h8s2678.c @@ -8,6 +8,7 @@ #include #include #include +#include static DEFINE_SPINLOCK(clklock); From 9f42a89da6b4dc015631e01ba990d3db2cae2a1b Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 2 Sep 2015 10:57:47 +0800 Subject: [PATCH 06/96] clk: Hi6220: separately build stub clock driver The previous code, kernel builds Hi6220's common clock driver and stub clock driver together. Stub clock driver has introduced the dependency with CONFIG_MAILBOX, so kernel will not build Hi6220's common clock driver due ARM64's defconfig have not enabled CONFIG_MAILBOX by default. So separately build stub clock driver and common clock driver for Hi6220; and only let stub clock driver has the dependency with CONFIG_MAILBOX. Signed-off-by: Leo Yan Tested-by: Kevin Hilman Signed-off-by: Stephen Boyd --- drivers/clk/hisilicon/Kconfig | 8 +++++++- drivers/clk/hisilicon/Makefile | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/clk/hisilicon/Kconfig b/drivers/clk/hisilicon/Kconfig index 2c16807341dc..e43485448612 100644 --- a/drivers/clk/hisilicon/Kconfig +++ b/drivers/clk/hisilicon/Kconfig @@ -1,6 +1,12 @@ config COMMON_CLK_HI6220 bool "Hi6220 Clock Driver" - depends on (ARCH_HISI || COMPILE_TEST) && MAILBOX + depends on ARCH_HISI || COMPILE_TEST default ARCH_HISI help Build the Hisilicon Hi6220 clock driver based on the common clock framework. + +config STUB_CLK_HI6220 + bool "Hi6220 Stub Clock Driver" + depends on COMMON_CLK_HI6220 && MAILBOX + help + Build the Hisilicon Hi6220 stub clock driver. diff --git a/drivers/clk/hisilicon/Makefile b/drivers/clk/hisilicon/Makefile index 4a1001a11f04..74dba31590f9 100644 --- a/drivers/clk/hisilicon/Makefile +++ b/drivers/clk/hisilicon/Makefile @@ -7,4 +7,5 @@ obj-y += clk.o clkgate-separated.o clkdivider-hi6220.o obj-$(CONFIG_ARCH_HI3xxx) += clk-hi3620.o obj-$(CONFIG_ARCH_HIP04) += clk-hip04.o obj-$(CONFIG_ARCH_HIX5HD2) += clk-hix5hd2.o -obj-$(CONFIG_COMMON_CLK_HI6220) += clk-hi6220.o clk-hi6220-stub.o +obj-$(CONFIG_COMMON_CLK_HI6220) += clk-hi6220.o +obj-$(CONFIG_STUB_CLK_HI6220) += clk-hi6220-stub.o From 66c117d7fa2ae429911e60d84bf31a90b2b96189 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 3 Sep 2015 12:34:55 +0200 Subject: [PATCH 07/96] x86/alternatives: Make optimize_nops() interrupt safe and synced Richard reported the following crash: [ 0.036000] BUG: unable to handle kernel paging request at 55501e06 [ 0.036000] IP: [] common_interrupt+0xb/0x38 [ 0.036000] Call Trace: [ 0.036000] [] ? add_nops+0x90/0xa0 [ 0.036000] [] apply_alternatives+0x274/0x630 Chuck decoded: " 0: 8d 90 90 83 04 24 lea 0x24048390(%eax),%edx 6: 80 fc 0f cmp $0xf,%ah 9: a8 0f test $0xf,%al >> b: a0 06 1e 50 55 mov 0x55501e06,%al 10: 57 push %edi 11: 56 push %esi Interrupt 0x30 occurred while the alternatives code was replacing the initial 0x90,0x90,0x90 NOPs (from the ASM_CLAC macro) with the optimized version, 0x8d,0x76,0x00. Only the first byte has been replaced so far, and it makes a mess out of the insn decoding." optimize_nops() is buggy in two aspects: - It's not disabling interrupts across the modification - It's lacking a sync_core() call Add both. Fixes: 4fd4b6e5537c 'x86/alternatives: Use optimized NOPs for padding' Reported-and-tested-by: "Richard W.M. Jones" Signed-off-by: Thomas Gleixner Cc: Richard W.M. Jones Cc: Chuck Ebbert Cc: Borislav Petkov Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1509031232340.15006@nanos Signed-off-by: Thomas Gleixner --- arch/x86/kernel/alternative.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c42827eb86cf..25f909362b7a 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -338,10 +338,15 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) { + unsigned long flags; + if (instr[0] != 0x90) return; + local_irq_save(flags); add_nops(instr + (a->instrlen - a->padlen), a->padlen); + sync_core(); + local_irq_restore(flags); DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", instr, a->instrlen - a->padlen, a->padlen); From dc3a04d551b5d21f1badbb39bfe8e5bc1289b184 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 2 Sep 2015 17:11:22 -0700 Subject: [PATCH 08/96] security/device_cgroup: Fix RCU_LOCKDEP_WARN() condition f78f5b90c4ff ("rcu: Rename rcu_lockdep_assert() to RCU_LOCKDEP_WARN()") introduced a bug by incorrectly inverting the condition when moving from rcu_lockdep_assert() to RCU_LOCKDEP_WARN(). This commit therefore fixes the inversion. Reported-by: Felipe Balbi Reported-by: Tejun Heo Signed-off-by: Paul E. McKenney Acked-by: Serge Hallyn Tested-by: Josh Boyer --- security/device_cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 73455089feef..03c1652c9a1f 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -401,7 +401,7 @@ static bool verify_new_ex(struct dev_cgroup *dev_cgroup, bool match = false; RCU_LOCKDEP_WARN(!rcu_read_lock_held() && - lockdep_is_held(&devcgroup_mutex), + !lockdep_is_held(&devcgroup_mutex), "device_cgroup:verify_new_ex called without proper synchronization"); if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) { From 76fc5e7b2355af167dea1a32e93c57fc37900a5b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 4 Sep 2015 17:00:43 -0700 Subject: [PATCH 09/96] x86/vm86: Block non-root vm86(old) if mmap_min_addr != 0 vm86 exposes an interesting attack surface against the entry code. Since vm86 is mostly useless anyway if mmap_min_addr != 0, just turn it off in that case. There are some reports that vbetool can work despite setting mmap_min_addr to zero. This shouldn't break that use case, as CAP_SYS_RAWIO already overrides mmap_min_addr. Suggested-by: Linus Torvalds Signed-off-by: Andy Lutomirski Cc: Arjan van de Ven Cc: Austin S Hemmelgarn Cc: Borislav Petkov Cc: Brian Gerst Cc: Josh Boyer Cc: Kees Cook Cc: Matthew Garrett Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stas Sergeev Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/vm86_32.c | 27 +++++++++++++++++++ tools/testing/selftests/x86/entry_from_vm86.c | 5 ++-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index abd8b856bd2b..524619351961 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -232,6 +233,32 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) struct pt_regs *regs = current_pt_regs(); unsigned long err = 0; + err = security_mmap_addr(0); + if (err) { + /* + * vm86 cannot virtualize the address space, so vm86 users + * need to manage the low 1MB themselves using mmap. Given + * that BIOS places important data in the first page, vm86 + * is essentially useless if mmap_min_addr != 0. DOSEMU, + * for example, won't even bother trying to use vm86 if it + * can't map a page at virtual address 0. + * + * To reduce the available kernel attack surface, simply + * disallow vm86(old) for users who cannot mmap at va 0. + * + * The implementation of security_mmap_addr will allow + * suitably privileged users to map va 0 even if + * vm.mmap_min_addr is set above 0, and we want this + * behavior for vm86 as well, as it ensures that legacy + * tools like vbetool will not fail just because of + * vm.mmap_min_addr. + */ + pr_info_once("Denied a call to vm86(old) from %s[%d] (uid: %d). Set the vm.mmap_min_addr sysctl to 0 and/or adjust LSM mmap_min_addr policy to enable vm86 if you are using a vm86-based DOS emulator.\n", + current->comm, task_pid_nr(current), + from_kuid_munged(&init_user_ns, current_uid())); + return -EPERM; + } + if (!vm86) { if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL))) return -ENOMEM; diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c index 9a43a59a9bb4..421c607a8856 100644 --- a/tools/testing/selftests/x86/entry_from_vm86.c +++ b/tools/testing/selftests/x86/entry_from_vm86.c @@ -116,8 +116,9 @@ static bool do_test(struct vm86plus_struct *v86, unsigned long eip, v86->regs.eip = eip; ret = vm86(VM86_ENTER, v86); - if (ret == -1 && errno == ENOSYS) { - printf("[SKIP]\tvm86 not supported\n"); + if (ret == -1 && (errno == ENOSYS || errno == EPERM)) { + printf("[SKIP]\tvm86 %s\n", + errno == ENOSYS ? "not supported" : "not allowed"); return false; } From 8901c18b6cafa51f7985f1031968bbfe9dc47735 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Wed, 2 Sep 2015 16:10:34 -0600 Subject: [PATCH 10/96] asus-nb-wmi: Add wapf=4 quirk for X456UA/X456UF These laptops boot with wifi as hard-blocked, with no obvious way to enable it. Using a quirk to set wapf=4 solves the problem. Signed-off-by: Daniel Drake Acked-by: Corentin Chary Signed-off-by: Darren Hart --- drivers/platform/x86/asus-nb-wmi.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index abdaed34c728..131fee2b093e 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -126,6 +126,24 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_wapf4, }, + { + .callback = dmi_matched, + .ident = "ASUSTeK COMPUTER INC. X456UA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X456UA"), + }, + .driver_data = &quirk_asus_wapf4, + }, + { + .callback = dmi_matched, + .ident = "ASUSTeK COMPUTER INC. X456UF", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X456UF"), + }, + .driver_data = &quirk_asus_wapf4, + }, { .callback = dmi_matched, .ident = "ASUSTeK COMPUTER INC. X501U", From a077224fd35b2f7fbc93f14cf67074fc792fbac2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 3 Sep 2015 13:24:40 +0100 Subject: [PATCH 11/96] ARM: 8429/1: disable GCC SRA optimization While working on the 32-bit ARM port of UEFI, I noticed a strange corruption in the kernel log. The following snprintf() statement (in drivers/firmware/efi/efi.c:efi_md_typeattr_format()) snprintf(pos, size, "|%3s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", was producing the following output in the log: | | | | | |WB|WT|WC|UC] | | | | | |WB|WT|WC|UC] | | | | | |WB|WT|WC|UC] |RUN| | | | |WB|WT|WC|UC]* |RUN| | | | |WB|WT|WC|UC]* | | | | | |WB|WT|WC|UC] |RUN| | | | |WB|WT|WC|UC]* | | | | | |WB|WT|WC|UC] |RUN| | | | | | | |UC] |RUN| | | | | | | |UC] As it turns out, this is caused by incorrect code being emitted for the string() function in lib/vsprintf.c. The following code if (!(spec.flags & LEFT)) { while (len < spec.field_width--) { if (buf < end) *buf = ' '; ++buf; } } for (i = 0; i < len; ++i) { if (buf < end) *buf = *s; ++buf; ++s; } while (len < spec.field_width--) { if (buf < end) *buf = ' '; ++buf; } when called with len == 0, triggers an issue in the GCC SRA optimization pass (Scalar Replacement of Aggregates), which handles promotion of signed struct members incorrectly. This is a known but as yet unresolved issue. (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65932). In this particular case, it is causing the second while loop to be executed erroneously a single time, causing the additional space characters to be printed. So disable the optimization by passing -fno-ipa-sra. Cc: Acked-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 7451b447cc2d..2c2b28ee4811 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -54,6 +54,14 @@ AS += -EL LD += -EL endif +# +# The Scalar Replacement of Aggregates (SRA) optimization pass in GCC 4.9 and +# later may result in code being generated that handles signed short and signed +# char struct members incorrectly. So disable it. +# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65932) +# +KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra) + # This selects which instruction set is used. # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes From edcd591c77a48da753456f92daf8bb50fe9bac93 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 7 Sep 2015 13:18:03 -0600 Subject: [PATCH 12/96] locking/static_keys: Fix a silly typo Commit: 412758cb2670 ("jump label, locking/static_keys: Update docs") introduced a typo that might as well get fixed. Signed-off-by: Jonathan Corbet Cc: Andrew Morton Cc: Jason Baron Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150907131803.54c027e1@lwn.net Signed-off-by: Ingo Molnar --- Documentation/static-keys.txt | 2 +- include/linux/jump_label.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt index f4cb0b2d5cd7..ec911583f6c5 100644 --- a/Documentation/static-keys.txt +++ b/Documentation/static-keys.txt @@ -16,7 +16,7 @@ The updated API replacements are: DEFINE_STATIC_KEY_TRUE(key); DEFINE_STATIC_KEY_FALSE(key); static_key_likely() -statick_key_unlikely() +static_key_unlikely() 0) Abstract diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 7f653e8f6690..0684bd3a48fc 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -22,7 +22,7 @@ * DEFINE_STATIC_KEY_TRUE(key); * DEFINE_STATIC_KEY_FALSE(key); * static_key_likely() - * statick_key_unlikely() + * static_key_unlikely() * * Jump labels provide an interface to generate dynamic branches using * self-modifying code. Assuming toolchain and architecture support, if we From 39dc53deff30d9b239ac36cfeb0ef2022d03a449 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 7 Sep 2015 00:29:15 +0100 Subject: [PATCH 13/96] ARM: swpan: fix nwfpe for uaccess changes NWFPE needs to access userspace to check whether the next instruction is another FP instruction. Allow userspace access for this read. Signed-off-by: Russell King --- arch/arm/nwfpe/entry.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S index 71df43547659..39c20afad7ed 100644 --- a/arch/arm/nwfpe/entry.S +++ b/arch/arm/nwfpe/entry.S @@ -95,9 +95,10 @@ emulate: reteq r4 @ no, return failure next: + uaccess_enable r3 .Lx1: ldrt r6, [r5], #4 @ get the next instruction and @ increment PC - + uaccess_disable r3 and r2, r6, #0x0F000000 @ test for FP insns teq r2, #0x0C000000 teqne r2, #0x0D000000 From 296254f3223d201f2aa53f5f717eedfdc63f3db8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 7 Sep 2015 00:30:06 +0100 Subject: [PATCH 14/96] ARM: uaccess: remove unneeded uaccess_save_and_disable macro This macro is never referenced, remove it. Signed-off-by: Russell King --- arch/arm/include/asm/assembler.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 3ae0eda5e64f..9007c518d1d8 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -487,11 +487,6 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #endif .endm - .macro uaccess_save_and_disable, tmp - uaccess_save \tmp - uaccess_disable \tmp - .endm - .irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo .macro ret\c, reg #if __LINUX_ARM_ARCH__ < 6 From af4cb25df93d2e7a97d65db2bfacaa4400988dea Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 9 Sep 2015 21:19:49 +0100 Subject: [PATCH 15/96] ARM: uaccess: fix undefined instruction on ARMv7M/noMMU The use of get_domain() in copy_thread() results in an oops on ARMv7M/noMMU systems. The thread cpu_domain value is only used when CONFIG_CPU_USE_DOMAINS is enabled, so there's no need to save the value in copy_thread() except when this is enabled, and this option will never be enabled on these platforms. Unhandled exception: IPSR = 00000006 LR = fffffff1 CPU: 0 PID: 0 Comm: swapper Not tainted 4.2.0-next-20150909-00001-gb8ec5ad #41 Hardware name: NXP LPC18xx/43xx (Device Tree) task: 2823fbe0 ti: 2823c000 task.ti: 2823c000 PC is at copy_thread+0x18/0x92 LR is at copy_thread+0x19/0x92 pc : [<2800a46e>] lr : [<2800a46f>] psr: 4100000b sp : 2823df00 ip : 00000000 fp : 287c81c0 r10: 00000000 r9 : 00800300 r8 : 287c8000 r7 : 287c8000 r6 : 2818908d r5 : 00000000 r4 : 287ca000 r3 : 00000000 r2 : 00000000 r1 : fffffff0 r0 : 287ca048 xPSR: 4100000b Reported-by: Ariel D'Alessandro Signed-off-by: Russell King --- arch/arm/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 3f18098dfd08..e550a4541f48 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -233,6 +233,7 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save)); +#ifdef CONFIG_CPU_USE_DOMAINS /* * Copy the initial value of the domain access control register * from the current thread: thread->addr_limit will have been @@ -240,6 +241,7 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, * kernel/fork.c */ thread->cpu_domain = get_domain(); +#endif if (likely(!(p->flags & PF_KTHREAD))) { *childregs = *current_pt_regs(); From f1ab428711358fbb747ba392c3448462494e6c6a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 9 Sep 2015 18:22:35 +1000 Subject: [PATCH 16/96] crypto: vmx - VMX crypto should depend on CONFIG_VSX This code uses FP (floating point), Altivec and VSX (Vector-Scalar Extension). It can just depend on CONFIG_VSX though, because that already depends on FP and Altivec. Otherwise we get lots of link errors such as: drivers/built-in.o: In function `.p8_aes_setkey': aes.c:(.text+0x2d325c): undefined reference to `.enable_kernel_altivec' aes.c:(.text+0x2d326c): undefined reference to `.enable_kernel_vsx' Signed-off-by: Michael Ellerman Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 07bc7aa6b224..d234719065a5 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -461,7 +461,7 @@ config CRYPTO_DEV_QCE config CRYPTO_DEV_VMX bool "Support for VMX cryptographic acceleration instructions" - depends on PPC64 + depends on PPC64 && VSX help Support for VMX cryptographic acceleration instructions. From 9da75de030bb6e49475ef37c8495d07e98cfeb33 Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Wed, 9 Sep 2015 14:27:07 +0200 Subject: [PATCH 17/96] crypto: sunxi-ss - Fix a possible driver hang with ciphers The sun4i_ss_opti_poll function cipher data until the output miter have a length of 0. If the crypto API client, give more SGs than necessary this could result in an infinite loop. Fix it by checking for remaining bytes, just like sun4i_ss_cipher_poll(). Signed-off-by: LABBE Corentin Signed-off-by: Herbert Xu --- drivers/crypto/sunxi-ss/sun4i-ss-cipher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c index e070c316e8b7..a19ee127edca 100644 --- a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c +++ b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c @@ -104,7 +104,7 @@ static int sun4i_ss_opti_poll(struct ablkcipher_request *areq) sg_miter_next(&mo); oo = 0; } - } while (mo.length > 0); + } while (oleft > 0); if (areq->info) { for (i = 0; i < 4 && i < ivsize / 4; i++) { From 85b4e4eb2f9ac29ee8ec47f1f055f251cb251a3c Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 10 Sep 2015 00:08:45 +0200 Subject: [PATCH 18/96] wmi: Remove private %pUL implementation The work performed by wmi_gtoa is equivalent to simply sprintf(out, "%pUL", in), so one could replace its body by this. However, most users feed the result directly as a %s argument to some other function which also understands the %p extensions (they all ultimately use vsnprintf), so we can eliminate some stack buffers and quite a bit of code by just using %pUL directly. In wmi_dev_uevent I'm not sure whether there's room for a nul-terminator in env->buf, so I've just replaced wmi_gtoa with the equivalent sprintf call. Signed-off-by: Rasmus Villemoes Signed-off-by: Darren Hart --- drivers/platform/x86/wmi.c | 51 +++++--------------------------------- 1 file changed, 6 insertions(+), 45 deletions(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index aac47573f9ed..eb391a281833 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -194,34 +194,6 @@ static bool wmi_parse_guid(const u8 *src, u8 *dest) return true; } -/* - * Convert a raw GUID to the ACII string representation - */ -static int wmi_gtoa(const char *in, char *out) -{ - int i; - - for (i = 3; i >= 0; i--) - out += sprintf(out, "%02X", in[i] & 0xFF); - - out += sprintf(out, "-"); - out += sprintf(out, "%02X", in[5] & 0xFF); - out += sprintf(out, "%02X", in[4] & 0xFF); - out += sprintf(out, "-"); - out += sprintf(out, "%02X", in[7] & 0xFF); - out += sprintf(out, "%02X", in[6] & 0xFF); - out += sprintf(out, "-"); - out += sprintf(out, "%02X", in[8] & 0xFF); - out += sprintf(out, "%02X", in[9] & 0xFF); - out += sprintf(out, "-"); - - for (i = 10; i <= 15; i++) - out += sprintf(out, "%02X", in[i] & 0xFF); - - *out = '\0'; - return 0; -} - static bool find_guid(const char *guid_string, struct wmi_block **out) { char tmp[16], guid_input[16]; @@ -457,11 +429,7 @@ EXPORT_SYMBOL_GPL(wmi_set_block); static void wmi_dump_wdg(const struct guid_block *g) { - char guid_string[37]; - - wmi_gtoa(g->guid, guid_string); - - pr_info("%s:\n", guid_string); + pr_info("%pUL:\n", g->guid); pr_info("\tobject_id: %c%c\n", g->object_id[0], g->object_id[1]); pr_info("\tnotify_id: %02X\n", g->notify_id); pr_info("\treserved: %02X\n", g->reserved); @@ -661,7 +629,6 @@ EXPORT_SYMBOL_GPL(wmi_has_guid); static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { - char guid_string[37]; struct wmi_block *wblock; wblock = dev_get_drvdata(dev); @@ -670,9 +637,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, return strlen(buf); } - wmi_gtoa(wblock->gblock.guid, guid_string); - - return sprintf(buf, "wmi:%s\n", guid_string); + return sprintf(buf, "wmi:%pUL\n", wblock->gblock.guid); } static DEVICE_ATTR_RO(modalias); @@ -695,7 +660,7 @@ static int wmi_dev_uevent(struct device *dev, struct kobj_uevent_env *env) if (!wblock) return -ENOMEM; - wmi_gtoa(wblock->gblock.guid, guid_string); + sprintf(guid_string, "%pUL", wblock->gblock.guid); strcpy(&env->buf[env->buflen - 1], "wmi:"); memcpy(&env->buf[env->buflen - 1 + 4], guid_string, 36); @@ -721,12 +686,9 @@ static struct class wmi_class = { static int wmi_create_device(const struct guid_block *gblock, struct wmi_block *wblock, acpi_handle handle) { - char guid_string[37]; - wblock->dev.class = &wmi_class; - wmi_gtoa(gblock->guid, guid_string); - dev_set_name(&wblock->dev, "%s", guid_string); + dev_set_name(&wblock->dev, "%pUL", gblock->guid); dev_set_drvdata(&wblock->dev, wblock); @@ -877,7 +839,6 @@ static void acpi_wmi_notify(struct acpi_device *device, u32 event) struct guid_block *block; struct wmi_block *wblock; struct list_head *p; - char guid_string[37]; list_for_each(p, &wmi_block_list) { wblock = list_entry(p, struct wmi_block, list); @@ -888,8 +849,8 @@ static void acpi_wmi_notify(struct acpi_device *device, u32 event) if (wblock->handler) wblock->handler(event, wblock->handler_data); if (debug_event) { - wmi_gtoa(wblock->gblock.guid, guid_string); - pr_info("DEBUG Event GUID: %s\n", guid_string); + pr_info("DEBUG Event GUID: %pUL\n", + wblock->gblock.guid); } acpi_bus_generate_netlink_event( From aefc574bbbbe74bb891ba392d98f2d59a417c774 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 10 Sep 2015 10:00:55 +0200 Subject: [PATCH 19/96] Revert "twl4030_charger: correctly handle -EPROBE_DEFER from devm_usb_get_phy_by_node" Revert commit 3fc3895e4fe17ee92ae1d1bb9f04da6069e8c370, since it introduced a boot failure on some OMAP platforms. Reported-by: Kevin Hilman Signed-off-by: Sebastian Reichel --- drivers/power/twl4030_charger.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/power/twl4030_charger.c b/drivers/power/twl4030_charger.c index f4f2c1f76c32..54b0325362db 100644 --- a/drivers/power/twl4030_charger.c +++ b/drivers/power/twl4030_charger.c @@ -1057,13 +1057,9 @@ static int twl4030_bci_probe(struct platform_device *pdev) phynode = of_find_compatible_node(bci->dev->of_node->parent, NULL, "ti,twl4030-usb"); - if (phynode) { + if (phynode) bci->transceiver = devm_usb_get_phy_by_node( bci->dev, phynode, &bci->usb_nb); - if (IS_ERR(bci->transceiver) && - PTR_ERR(bci->transceiver) == -EPROBE_DEFER) - return -EPROBE_DEFER; - } } /* Enable interrupts now. */ From e11fc21e756e662e465cac3da6547d438d0b1446 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sat, 5 Sep 2015 02:32:34 +0300 Subject: [PATCH 20/96] twl4030_charger: fix another compile error When CONFIG_CHARGER_TWL4030=y and CONFIG_TWL4030_MADC=m we get a compile error: drivers/built-in.o: In function `twl4030_charger_update_current': twl4030_charger.c:(.text+0x504681): undefined reference to `twl4030_get_madc_conversion' Use IS_REACHABLE to fix it. Cc: NeilBrown Reported-by: Randy Dunlap Signed-off-by: Grazvydas Ignotas Acked-by: Tony Lindgren Signed-off-by: Sebastian Reichel --- drivers/power/twl4030_charger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/twl4030_charger.c b/drivers/power/twl4030_charger.c index 54b0325362db..74f2d3ff1d7c 100644 --- a/drivers/power/twl4030_charger.c +++ b/drivers/power/twl4030_charger.c @@ -91,7 +91,7 @@ #define TWL4030_MSTATEC_COMPLETE1 0x0b #define TWL4030_MSTATEC_COMPLETE4 0x0e -#if IS_ENABLED(CONFIG_TWL4030_MADC) +#if IS_REACHABLE(CONFIG_TWL4030_MADC) /* * If AC (Accessory Charger) voltage exceeds 4.5V (MADC 11) * then AC is available. From bb0f73616396e7929b68d3bdea70064003599d33 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 21 Aug 2015 19:51:41 +0200 Subject: [PATCH 21/96] clk: rockchip: handle critical clocks after registering all clocks Currently the registration of critical clocks is done in the function shared between rk3066 and rk3188 clock trees. That results in them getting handled maybe before all of them are registered. Therefore move the critical clock handling down to the end of the soc- specific clock registration function, so that all clocks are registered before they're maybe handled as critical clock. Signed-off-by: Heiko Stuebner Tested-by: Michael Niewoehner Signed-off-by: Stephen Boyd --- drivers/clk/rockchip/clk-rk3188.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c index ed02bbc7b11f..f26e3edaab2f 100644 --- a/drivers/clk/rockchip/clk-rk3188.c +++ b/drivers/clk/rockchip/clk-rk3188.c @@ -744,8 +744,6 @@ static void __init rk3188_common_clk_init(struct device_node *np) rockchip_clk_register_branches(common_clk_branches, ARRAY_SIZE(common_clk_branches)); - rockchip_clk_protect_critical(rk3188_critical_clocks, - ARRAY_SIZE(rk3188_critical_clocks)); rockchip_register_softrst(np, 9, reg_base + RK2928_SOFTRST_CON(0), ROCKCHIP_SOFTRST_HIWORD_MASK); @@ -765,6 +763,8 @@ static void __init rk3066a_clk_init(struct device_node *np) mux_armclk_p, ARRAY_SIZE(mux_armclk_p), &rk3066_cpuclk_data, rk3066_cpuclk_rates, ARRAY_SIZE(rk3066_cpuclk_rates)); + rockchip_clk_protect_critical(rk3188_critical_clocks, + ARRAY_SIZE(rk3188_critical_clocks)); } CLK_OF_DECLARE(rk3066a_cru, "rockchip,rk3066a-cru", rk3066a_clk_init); @@ -801,6 +801,9 @@ static void __init rk3188a_clk_init(struct device_node *np) pr_warn("%s: missing clocks to reparent aclk_cpu_pre to gpll\n", __func__); } + + rockchip_clk_protect_critical(rk3188_critical_clocks, + ARRAY_SIZE(rk3188_critical_clocks)); } CLK_OF_DECLARE(rk3188a_cru, "rockchip,rk3188a-cru", rk3188a_clk_init); From 1166160ab531198f7abc773992c0e04d0f9b7600 Mon Sep 17 00:00:00 2001 From: Michael Niewoehner Date: Tue, 25 Aug 2015 22:22:07 +0200 Subject: [PATCH 22/96] clk: rockchip: add pclk_cpu to the list of rk3188 critical clocks pclk_cpu needs to keep running because it is needed for devices like the act8865 regulator but with the recent gpio clock handling this is not always the case anymore. So add it to the list of critical clocks. Signed-off-by: Michael Niewoehner Reviewed-by: Heiko Stuebner Signed-off-by: Stephen Boyd --- drivers/clk/rockchip/clk-rk3188.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c index f26e3edaab2f..fa2f36b2abe7 100644 --- a/drivers/clk/rockchip/clk-rk3188.c +++ b/drivers/clk/rockchip/clk-rk3188.c @@ -716,6 +716,7 @@ static const char *const rk3188_critical_clocks[] __initconst = { "aclk_cpu", "aclk_peri", "hclk_peri", + "pclk_cpu", }; static void __init rk3188_common_clk_init(struct device_node *np) From 3bba75a2ec32bd5fa7024a4de3b8cf9ee113a76a Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Sun, 23 Aug 2015 11:32:37 +0200 Subject: [PATCH 23/96] clk: rockchip: Add pclk_peri to critical clocks on RK3066/RK3188 Now that the rockchip clock subsystem does clock gating with GPIO banks, these are no longer enabled once during probe and no longer stay enabled for eternity. When all these clocks are disabled, the parent clock pclk_peri might be disabled too, as no other child claims it. So, we need to add pclk_peri to the critical clocks. Signed-off-by: Romain Perier Tested-by: Michael Niewoehner Signed-off-by: Stephen Boyd --- drivers/clk/rockchip/clk-rk3188.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c index fa2f36b2abe7..abb47608713b 100644 --- a/drivers/clk/rockchip/clk-rk3188.c +++ b/drivers/clk/rockchip/clk-rk3188.c @@ -717,6 +717,7 @@ static const char *const rk3188_critical_clocks[] __initconst = { "aclk_peri", "hclk_peri", "pclk_cpu", + "pclk_peri", }; static void __init rk3188_common_clk_init(struct device_node *np) From cf680eae34d26bd474c2ed3bd7d3aff59054aed5 Mon Sep 17 00:00:00 2001 From: Azael Avalos Date: Wed, 9 Sep 2015 11:25:44 -0600 Subject: [PATCH 24/96] toshiba_acpi: Fix USB Sleep and Music always disabled Commit e1a949c1b988 ("toshiba_acpi: Refactor *{get, set} functions return value") made changes on the return type of the HCI/SCI functions, but a typo on the USB Sleep and Music code is always reporting non existent support for such feature. This patch corrects the typo, changing an assignment to a comparison, making the laptops with actual support for such feature to work again. Signed-off-by: Azael Avalos Signed-off-by: Darren Hart --- drivers/platform/x86/toshiba_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index 6740c513919c..c434b53323ed 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -938,7 +938,7 @@ static int toshiba_usb_sleep_music_get(struct toshiba_acpi_dev *dev, u32 *state) else if (result == TOS_NOT_SUPPORTED) return -ENODEV; - return result = TOS_SUCCESS ? 0 : -EIO; + return result == TOS_SUCCESS ? 0 : -EIO; } static int toshiba_usb_sleep_music_set(struct toshiba_acpi_dev *dev, u32 state) From 53147b6cabee5e8d1997b5682fcc0c3b72ddf9c2 Mon Sep 17 00:00:00 2001 From: Azael Avalos Date: Wed, 9 Sep 2015 11:25:45 -0600 Subject: [PATCH 25/96] toshiba_acpi: Fix hotkeys registration on some toshiba models Commit a2b3471b5b13 ("toshiba_acpi: Use the Hotkey Event Type function for keymap choosing") changed the *setup_keyboard function to query for the Hotkey Event Type to help choose the correct keymap, but turns out that here are certain Toshiba models out there not implementing this feature, and thus, failing to continue the input device registration and leaving such laptops without hotkey support. This patch changes such check, and instead of returning an error if the Hotkey Event Type is not present, we simply inform userspace about it, changing the message printed from err to notice, making the function responsible for registering the input device to continue. This issue was found on a Toshiba Portege Z30-B, but there might be some other models out there affected by this regression as well. Cc: # 4.1+ Signed-off-by: Azael Avalos Signed-off-by: Darren Hart --- drivers/platform/x86/toshiba_acpi.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index c434b53323ed..f2372f400ddb 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -2398,11 +2398,9 @@ static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev) if (error) return error; - error = toshiba_hotkey_event_type_get(dev, &events_type); - if (error) { - pr_err("Unable to query Hotkey Event Type\n"); - return error; - } + if (toshiba_hotkey_event_type_get(dev, &events_type)) + pr_notice("Unable to query Hotkey Event Type\n"); + dev->hotkey_event_type = events_type; dev->hotkey_dev = input_allocate_device(); From 43b3f02899f74ae9914a39547cc5492156f0027a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 4 Sep 2015 17:25:23 +0200 Subject: [PATCH 26/96] locking/qspinlock/x86: Fix performance regression under unaccelerated VMs Dave ran into horrible performance on a VM without PARAVIRT_SPINLOCKS set and Linus noted that the test-and-set implementation was retarded. One should spin on the variable with a load, not a RMW. While there, remove 'queued' from the name, as the lock isn't queued at all, but a simple test-and-set. Suggested-by: Linus Torvalds Reported-by: Dave Chinner Tested-by: Dave Chinner Signed-off-by: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman Long Cc: stable@vger.kernel.org # v4.2+ Link: http://lkml.kernel.org/r/20150904152523.GR18673@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/qspinlock.h | 16 ++++++++++++---- include/asm-generic/qspinlock.h | 4 ++-- kernel/locking/qspinlock.c | 2 +- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 9d51fae1cba3..8dde3bdc4a05 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -39,15 +39,23 @@ static inline void queued_spin_unlock(struct qspinlock *lock) } #endif -#define virt_queued_spin_lock virt_queued_spin_lock +#define virt_spin_lock virt_spin_lock -static inline bool virt_queued_spin_lock(struct qspinlock *lock) +static inline bool virt_spin_lock(struct qspinlock *lock) { if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) return false; - while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0) - cpu_relax(); + /* + * On hypervisors without PARAVIRT_SPINLOCKS support we fall + * back to a Test-and-Set spinlock, because fair locks have + * horrible lock 'holder' preemption issues. + */ + + do { + while (atomic_read(&lock->val) != 0) + cpu_relax(); + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); return true; } diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index 83bfb87f5bf1..e2aadbc7151f 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -111,8 +111,8 @@ static inline void queued_spin_unlock_wait(struct qspinlock *lock) cpu_relax(); } -#ifndef virt_queued_spin_lock -static __always_inline bool virt_queued_spin_lock(struct qspinlock *lock) +#ifndef virt_spin_lock +static __always_inline bool virt_spin_lock(struct qspinlock *lock) { return false; } diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 337c8818541d..87e9ce6a63c5 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -289,7 +289,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) if (pv_enabled()) goto queue; - if (virt_queued_spin_lock(lock)) + if (virt_spin_lock(lock)) return; /* From a6b277857fd2c990bc208ca1958d3f34d26052f7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 5 Sep 2015 16:55:05 +0200 Subject: [PATCH 27/96] locking/qspinlock/x86: Only emit the test-and-set fallback when building guest support Only emit the test-and-set fallback for Hypervisors lacking PARAVIRT_SPINLOCKS support when building for guests. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # 4.2 Signed-off-by: Ingo Molnar --- arch/x86/include/asm/qspinlock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 8dde3bdc4a05..eaba08076030 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -39,8 +39,8 @@ static inline void queued_spin_unlock(struct qspinlock *lock) } #endif +#ifdef CONFIG_PARAVIRT #define virt_spin_lock virt_spin_lock - static inline bool virt_spin_lock(struct qspinlock *lock) { if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) @@ -59,6 +59,7 @@ static inline bool virt_spin_lock(struct qspinlock *lock) return true; } +#endif /* CONFIG_PARAVIRT */ #include From 5473e0cc37c03c576adbda7591a6cc8e37c1bb7f Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 28 Aug 2015 14:55:56 +0800 Subject: [PATCH 28/96] sched: 'Annotate' migrate_tasks() Kernel testing triggered this warning: | WARNING: CPU: 0 PID: 13 at kernel/sched/core.c:1156 do_set_cpus_allowed+0x7e/0x80() | Modules linked in: | CPU: 0 PID: 13 Comm: migration/0 Not tainted 4.2.0-rc1-00049-g25834c7 #2 | Call Trace: | dump_stack+0x4b/0x75 | warn_slowpath_common+0x8b/0xc0 | warn_slowpath_null+0x22/0x30 | do_set_cpus_allowed+0x7e/0x80 | cpuset_cpus_allowed_fallback+0x7c/0x170 | select_fallback_rq+0x221/0x280 | migration_call+0xe3/0x250 | notifier_call_chain+0x53/0x70 | __raw_notifier_call_chain+0x1e/0x30 | cpu_notify+0x28/0x50 | take_cpu_down+0x22/0x40 | multi_cpu_stop+0xd5/0x140 | cpu_stopper_thread+0xbc/0x170 | smpboot_thread_fn+0x174/0x2f0 | kthread+0xc4/0xe0 | ret_from_kernel_thread+0x21/0x30 As Peterz pointed out: | So the normal rules for changing task_struct::cpus_allowed are holding | both pi_lock and rq->lock, such that holding either stabilizes the mask. | | This is so that wakeup can happen without rq->lock and load-balance | without pi_lock. | | From this we already get the relaxation that we can omit acquiring | rq->lock if the task is not on the rq, because in that case | load-balancing will not apply to it. | | ** these are the rules currently tested in do_set_cpus_allowed() ** | | Now, since __set_cpus_allowed_ptr() uses task_rq_lock() which | unconditionally acquires both locks, we could get away with holding just | rq->lock when on_rq for modification because that'd still exclude | __set_cpus_allowed_ptr(), it would also work against | __kthread_bind_mask() because that assumes !on_rq. | | That said, this is all somewhat fragile. | | Now, I don't think dropping rq->lock is quite as disastrous as it | usually is because !cpu_active at this point, which means load-balance | will not interfere, but that too is somewhat fragile. | | So we end up with a choice of two fragile.. This patch fixes it by following the rules for changing task_struct::cpus_allowed with both pi_lock and rq->lock held. Reported-by: kernel test robot Reported-by: Sasha Levin Signed-off-by: Wanpeng Li [ Modified changelog and patch. ] Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/BLU436-SMTP1660820490DE202E3934ED3806E0@phx.gbl Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0902e4d72671..9b786704d34b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5183,24 +5183,47 @@ static void migrate_tasks(struct rq *dead_rq) break; /* - * Ensure rq->lock covers the entire task selection - * until the migration. + * pick_next_task assumes pinned rq->lock. */ lockdep_pin_lock(&rq->lock); next = pick_next_task(rq, &fake_task); BUG_ON(!next); next->sched_class->put_prev_task(rq, next); + /* + * Rules for changing task_struct::cpus_allowed are holding + * both pi_lock and rq->lock, such that holding either + * stabilizes the mask. + * + * Drop rq->lock is not quite as disastrous as it usually is + * because !cpu_active at this point, which means load-balance + * will not interfere. Also, stop-machine. + */ + lockdep_unpin_lock(&rq->lock); + raw_spin_unlock(&rq->lock); + raw_spin_lock(&next->pi_lock); + raw_spin_lock(&rq->lock); + + /* + * Since we're inside stop-machine, _nothing_ should have + * changed the task, WARN if weird stuff happened, because in + * that case the above rq->lock drop is a fail too. + */ + if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) { + raw_spin_unlock(&next->pi_lock); + continue; + } + /* Find suitable destination for @next, with force if needed. */ dest_cpu = select_fallback_rq(dead_rq->cpu, next); - lockdep_unpin_lock(&rq->lock); rq = __migrate_task(rq, next, dest_cpu); if (rq != dead_rq) { raw_spin_unlock(&rq->lock); rq = dead_rq; raw_spin_lock(&rq->lock); } + raw_spin_unlock(&next->pi_lock); } rq->stop = stop; From 84cba178a3b88efe2668a9039f70abda072faa21 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 10 Sep 2015 13:11:55 +0300 Subject: [PATCH 29/96] crypto: testmgr - don't copy from source IV too much While the destination buffer 'iv' is MAX_IVLEN size, the source 'template[i].iv' could be smaller, thus memcpy may read read invalid memory. Use crypto_skcipher_ivsize() to get real ivsize and pass it to memcpy. Signed-off-by: Andrey Ryabinin Signed-off-by: Herbert Xu --- crypto/testmgr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 35c2de136971..fa18753f5c34 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -940,6 +940,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc, char *xbuf[XBUFSIZE]; char *xoutbuf[XBUFSIZE]; int ret = -ENOMEM; + unsigned int ivsize = crypto_skcipher_ivsize(tfm); if (testmgr_alloc_buf(xbuf)) goto out_nobuf; @@ -975,7 +976,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc, continue; if (template[i].iv) - memcpy(iv, template[i].iv, MAX_IVLEN); + memcpy(iv, template[i].iv, ivsize); else memset(iv, 0, MAX_IVLEN); @@ -1051,7 +1052,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc, continue; if (template[i].iv) - memcpy(iv, template[i].iv, MAX_IVLEN); + memcpy(iv, template[i].iv, ivsize); else memset(iv, 0, MAX_IVLEN); From 4c17a6d56bb0cad3066a714e94f7185a24b40f49 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 11 Sep 2015 16:27:27 +0200 Subject: [PATCH 30/96] CIFS: fix type confusion in copy offload ioctl This might lead to local privilege escalation (code execution as kernel) for systems where the following conditions are met: - CONFIG_CIFS_SMB2 and CONFIG_CIFS_POSIX are enabled - a cifs filesystem is mounted where: - the mount option "vers" was used and set to a value >=2.0 - the attacker has write access to at least one file on the filesystem To attack this, an attacker would have to guess the target_tcon pointer (but guessing wrong doesn't cause a crash, it just returns an error code) and win a narrow race. CC: Stable Signed-off-by: Jann Horn Signed-off-by: Steve French --- fs/cifs/ioctl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index c63f5227b681..28a77bf1d559 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -67,6 +67,12 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, goto out_drop_write; } + if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) { + rc = -EBADF; + cifs_dbg(VFS, "src file seems to be from a different filesystem type\n"); + goto out_fput; + } + if ((!src_file.file->private_data) || (!dst_file->private_data)) { rc = -EBADF; cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); From 716ff1921a86c637b8875c7bb312fc6755fa9300 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 11 Sep 2015 08:17:39 +0100 Subject: [PATCH 31/96] ARM: domains: thread_info.h no longer needs asm/domains.h As of 1eef5d2f1b46 ("ARM: domains: switch to keeping domain value in register") we no longer need to include asm/domains.h into asm/thread_info.h. Remove it. Tested-by: Robert Jarzmik Signed-off-by: Russell King --- arch/arm/include/asm/thread_info.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 0a0aec410d8c..ae02e68b61fc 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -25,7 +25,6 @@ struct task_struct; #include -#include typedef unsigned long mm_segment_t; From 6e8f580d1fcc18e290713984c379cb97131c015a Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 11 Sep 2015 08:34:52 +0100 Subject: [PATCH 32/96] ARM: domains: add memory dependencies to get_domain/set_domain We need to have memory dependencies on get_domain/set_domain to avoid the compiler over-optimising these inline assembly instructions. Loads/stores must not be reordered across a set_domain(), so introduce a compiler barrier for that assembly. The value of get_domain() must not be cached across a set_domain(), but we still want to allow the compiler to optimise it away. Introduce a dependency on current_thread_info()->cpu_domain to avoid this; the new memory clobber in set_domain() should therefore cause the compiler to re-load this. The other advantage of using this is we should have its address in the register set already, or very soon after at most call sites. Tested-by: Robert Jarzmik Signed-off-by: Russell King --- arch/arm/include/asm/domain.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index e878129f2fee..fc8ba1663601 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -12,6 +12,7 @@ #ifndef __ASSEMBLY__ #include +#include #endif /* @@ -89,7 +90,8 @@ static inline unsigned int get_domain(void) asm( "mrc p15, 0, %0, c3, c0 @ get domain" - : "=r" (domain)); + : "=r" (domain) + : "m" (current_thread_info()->cpu_domain)); return domain; } @@ -98,7 +100,7 @@ static inline void set_domain(unsigned val) { asm volatile( "mcr p15, 0, %0, c3, c0 @ set domain" - : : "r" (val)); + : : "r" (val) : "memory"); isb(); } From 0b61f2c0f37983c98ed4207f3f5e265938371b68 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 11 Sep 2015 17:25:59 +0100 Subject: [PATCH 33/96] arm/xen: Enable user access to the kernel before issuing a privcmd call When Xen is copying data to/from the guest it will check if the kernel has the right to do the access. If not, the hypercall will return an error. After the commit a5e090acbf545c0a3b04080f8a488b17ec41fe02 "ARM: software-based privileged-no-access support", the kernel can't access any longer the user space by default. This will result to fail on every hypercall made by the userspace (i.e via privcmd). We have to enable the userspace access and then restore the correct permission every time the privcmd is used to made an hypercall. I didn't find generic helpers to do a these operations, so the change is only arm32 specific. Reported-by: Riku Voipio Signed-off-by: Julien Grall Signed-off-by: Russell King --- arch/arm/xen/hypercall.S | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S index f00e08075938..10fd99c568c6 100644 --- a/arch/arm/xen/hypercall.S +++ b/arch/arm/xen/hypercall.S @@ -98,8 +98,23 @@ ENTRY(privcmd_call) mov r1, r2 mov r2, r3 ldr r3, [sp, #8] + /* + * Privcmd calls are issued by the userspace. We need to allow the + * kernel to access the userspace memory before issuing the hypercall. + */ + uaccess_enable r4 + + /* r4 is loaded now as we use it as scratch register before */ ldr r4, [sp, #4] __HVC(XEN_IMM) + + /* + * Disable userspace access from kernel. This is fine to do it + * unconditionally as no set_fs(KERNEL_DS)/set_fs(get_ds()) is + * called before. + */ + uaccess_disable r4 + ldm sp!, {r4} ret lr ENDPROC(privcmd_call); From a4a5a7379e4ca03c192b732d61e446994eb67bbc Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Fri, 11 Sep 2015 17:12:27 +0100 Subject: [PATCH 34/96] ARM: 8431/1: fix alignement of __bug_table section entries On old ARM chips, unaligned accesses to memory are not trapped and fixed. On module load, symbols are relocated, and the relocation of __bug_table symbols is done on a u32 basis. Yet the section is not aligned to a multiple of 4 address, but to a multiple of 2. This triggers an Oops on pxa architecture, where address 0xbf0021ea is the first relocation in the __bug_table section : apply_relocate(): pxa3xx_nand: section 13 reloc 0 sym '' Unable to handle kernel paging request at virtual address bf0021ea pgd = e1cd0000 [bf0021ea] *pgd=c1cce851, *pte=c1cde04f, *ppte=c1cde01f Internal error: Oops: 23 [#1] ARM Modules linked in: CPU: 0 PID: 606 Comm: insmod Not tainted 4.2.0-rc8-next-20150828-cm-x300+ #887 Hardware name: CM-X300 module task: e1c68700 ti: e1c3e000 task.ti: e1c3e000 PC is at apply_relocate+0x2f4/0x3d4 LR is at 0xbf0021ea pc : [] lr : [] psr: 80000013 sp : e1c3fe30 ip : 60000013 fp : e49e8c60 r10: e49e8fa8 r9 : 00000000 r8 : e49e7c58 r7 : e49e8c38 r6 : e49e8a58 r5 : e49e8920 r4 : e49e8918 r3 : bf0021ea r2 : bf007034 r1 : 00000000 r0 : bf000000 Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 0000397f Table: c1cd0018 DAC: 00000051 Process insmod (pid: 606, stack limit = 0xe1c3e198) [] (apply_relocate) from [] (load_module+0x1248/0x1f5c) [] (load_module) from [] (SyS_init_module+0xe4/0x170) [] (SyS_init_module) from [] (ret_fast_syscall+0x0/0x38) Fix this by ensuring entries in __bug_table are all aligned to at least of multiple of 4. This transforms a module section __bug_table as : - [12] __bug_table PROGBITS 00000000 002232 000018 00 A 0 0 1 + [12] __bug_table PROGBITS 00000000 002232 000018 00 A 0 0 4 Signed-off-by: Robert Jarzmik Reviewed-by: Dave Martin Signed-off-by: Russell King --- arch/arm/include/asm/bug.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h index b274bde24905..e7335a92144e 100644 --- a/arch/arm/include/asm/bug.h +++ b/arch/arm/include/asm/bug.h @@ -40,6 +40,7 @@ do { \ "2:\t.asciz " #__file "\n" \ ".popsection\n" \ ".pushsection __bug_table,\"a\"\n" \ + ".align 2\n" \ "3:\t.word 1b, 2b\n" \ "\t.hword " #__line ", 0\n" \ ".popsection"); \ From 447e9a4d27484175a84daaa8e03d35c650f443b7 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Fri, 11 Sep 2015 12:52:26 -0400 Subject: [PATCH 35/96] IB/ehca: Deprecate driver, move to staging, schedule deletion The ehca driver is only supported on IBM machines with a custom EBus. As they have opted to build their newer machines using more industry standard technology and haven't really been pushing EBus capable machines for a while, this driver can now safely be moved to the staging area and scheduled for eventual removal. This plan was brought to IBM's attention and received their sign-off. Cc: alexs@linux.vnet.ibm.com Cc: hnguyen@de.ibm.com Cc: raisch@de.ibm.com Cc: stefan.roscher@de.ibm.com Signed-off-by: Doug Ledford --- drivers/infiniband/Kconfig | 1 - drivers/infiniband/hw/Makefile | 1 - drivers/staging/rdma/Kconfig | 2 ++ drivers/staging/rdma/Makefile | 1 + drivers/{infiniband/hw => staging/rdma}/ehca/Kconfig | 3 ++- drivers/{infiniband/hw => staging/rdma}/ehca/Makefile | 0 drivers/staging/rdma/ehca/TODO | 4 ++++ drivers/{infiniband/hw => staging/rdma}/ehca/ehca_av.c | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ehca_classes.h | 0 .../hw => staging/rdma}/ehca/ehca_classes_pSeries.h | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ehca_cq.c | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ehca_eq.c | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ehca_hca.c | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ehca_irq.c | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ehca_irq.h | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ehca_iverbs.h | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ehca_main.c | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ehca_mcast.c | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ehca_mrmw.c | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ehca_mrmw.h | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ehca_pd.c | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ehca_qes.h | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ehca_qp.c | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ehca_reqs.c | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ehca_sqp.c | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ehca_tools.h | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ehca_uverbs.c | 0 drivers/{infiniband/hw => staging/rdma}/ehca/hcp_if.c | 0 drivers/{infiniband/hw => staging/rdma}/ehca/hcp_if.h | 0 drivers/{infiniband/hw => staging/rdma}/ehca/hcp_phyp.c | 0 drivers/{infiniband/hw => staging/rdma}/ehca/hcp_phyp.h | 0 drivers/{infiniband/hw => staging/rdma}/ehca/hipz_fns.h | 0 drivers/{infiniband/hw => staging/rdma}/ehca/hipz_fns_core.h | 0 drivers/{infiniband/hw => staging/rdma}/ehca/hipz_hw.h | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ipz_pt_fn.c | 0 drivers/{infiniband/hw => staging/rdma}/ehca/ipz_pt_fn.h | 0 36 files changed, 9 insertions(+), 3 deletions(-) rename drivers/{infiniband/hw => staging/rdma}/ehca/Kconfig (69%) rename drivers/{infiniband/hw => staging/rdma}/ehca/Makefile (100%) create mode 100644 drivers/staging/rdma/ehca/TODO rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_av.c (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_classes.h (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_classes_pSeries.h (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_cq.c (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_eq.c (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_hca.c (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_irq.c (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_irq.h (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_iverbs.h (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_main.c (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_mcast.c (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_mrmw.c (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_mrmw.h (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_pd.c (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_qes.h (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_qp.c (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_reqs.c (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_sqp.c (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_tools.h (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ehca_uverbs.c (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/hcp_if.c (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/hcp_if.h (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/hcp_phyp.c (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/hcp_phyp.h (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/hipz_fns.h (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/hipz_fns_core.h (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/hipz_hw.h (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ipz_pt_fn.c (100%) rename drivers/{infiniband/hw => staging/rdma}/ehca/ipz_pt_fn.h (100%) diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index da4c6979fbb8..aa26f3c3416b 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -56,7 +56,6 @@ config INFINIBAND_ADDR_TRANS source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/hw/qib/Kconfig" -source "drivers/infiniband/hw/ehca/Kconfig" source "drivers/infiniband/hw/cxgb3/Kconfig" source "drivers/infiniband/hw/cxgb4/Kconfig" source "drivers/infiniband/hw/mlx4/Kconfig" diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index 1bdb9996d371..aded2a5cc2d5 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -1,6 +1,5 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/ obj-$(CONFIG_INFINIBAND_QIB) += qib/ -obj-$(CONFIG_INFINIBAND_EHCA) += ehca/ obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/ obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/ obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/ diff --git a/drivers/staging/rdma/Kconfig b/drivers/staging/rdma/Kconfig index cf5fe9bb87a1..d7f62359d743 100644 --- a/drivers/staging/rdma/Kconfig +++ b/drivers/staging/rdma/Kconfig @@ -24,6 +24,8 @@ if STAGING_RDMA source "drivers/staging/rdma/amso1100/Kconfig" +source "drivers/staging/rdma/ehca/Kconfig" + source "drivers/staging/rdma/hfi1/Kconfig" source "drivers/staging/rdma/ipath/Kconfig" diff --git a/drivers/staging/rdma/Makefile b/drivers/staging/rdma/Makefile index cbd915ac7f20..139d78ef2c24 100644 --- a/drivers/staging/rdma/Makefile +++ b/drivers/staging/rdma/Makefile @@ -1,4 +1,5 @@ # Entries for RDMA_STAGING tree obj-$(CONFIG_INFINIBAND_AMSO1100) += amso1100/ +obj-$(CONFIG_INFINIBAND_EHCA) += ehca/ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ obj-$(CONFIG_INFINIBAND_IPATH) += ipath/ diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/staging/rdma/ehca/Kconfig similarity index 69% rename from drivers/infiniband/hw/ehca/Kconfig rename to drivers/staging/rdma/ehca/Kconfig index 59f807d8d58e..3fadd2ad6426 100644 --- a/drivers/infiniband/hw/ehca/Kconfig +++ b/drivers/staging/rdma/ehca/Kconfig @@ -2,7 +2,8 @@ config INFINIBAND_EHCA tristate "eHCA support" depends on IBMEBUS ---help--- - This driver supports the IBM pSeries eHCA InfiniBand adapter. + This driver supports the deprecated IBM pSeries eHCA InfiniBand + adapter. To compile the driver as a module, choose M here. The module will be called ib_ehca. diff --git a/drivers/infiniband/hw/ehca/Makefile b/drivers/staging/rdma/ehca/Makefile similarity index 100% rename from drivers/infiniband/hw/ehca/Makefile rename to drivers/staging/rdma/ehca/Makefile diff --git a/drivers/staging/rdma/ehca/TODO b/drivers/staging/rdma/ehca/TODO new file mode 100644 index 000000000000..199a4a600142 --- /dev/null +++ b/drivers/staging/rdma/ehca/TODO @@ -0,0 +1,4 @@ +9/2015 + +The ehca driver has been deprecated and moved to drivers/staging/rdma. +It will be removed in the 4.6 merge window. diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/staging/rdma/ehca/ehca_av.c similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_av.c rename to drivers/staging/rdma/ehca/ehca_av.c diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/staging/rdma/ehca/ehca_classes.h similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_classes.h rename to drivers/staging/rdma/ehca/ehca_classes.h diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/staging/rdma/ehca/ehca_classes_pSeries.h similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_classes_pSeries.h rename to drivers/staging/rdma/ehca/ehca_classes_pSeries.h diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/staging/rdma/ehca/ehca_cq.c similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_cq.c rename to drivers/staging/rdma/ehca/ehca_cq.c diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/staging/rdma/ehca/ehca_eq.c similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_eq.c rename to drivers/staging/rdma/ehca/ehca_eq.c diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/staging/rdma/ehca/ehca_hca.c similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_hca.c rename to drivers/staging/rdma/ehca/ehca_hca.c diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/staging/rdma/ehca/ehca_irq.c similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_irq.c rename to drivers/staging/rdma/ehca/ehca_irq.c diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/staging/rdma/ehca/ehca_irq.h similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_irq.h rename to drivers/staging/rdma/ehca/ehca_irq.h diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/staging/rdma/ehca/ehca_iverbs.h similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_iverbs.h rename to drivers/staging/rdma/ehca/ehca_iverbs.h diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/staging/rdma/ehca/ehca_main.c similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_main.c rename to drivers/staging/rdma/ehca/ehca_main.c diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/staging/rdma/ehca/ehca_mcast.c similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_mcast.c rename to drivers/staging/rdma/ehca/ehca_mcast.c diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/staging/rdma/ehca/ehca_mrmw.c similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_mrmw.c rename to drivers/staging/rdma/ehca/ehca_mrmw.c diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/staging/rdma/ehca/ehca_mrmw.h similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_mrmw.h rename to drivers/staging/rdma/ehca/ehca_mrmw.h diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/staging/rdma/ehca/ehca_pd.c similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_pd.c rename to drivers/staging/rdma/ehca/ehca_pd.c diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/staging/rdma/ehca/ehca_qes.h similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_qes.h rename to drivers/staging/rdma/ehca/ehca_qes.h diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/staging/rdma/ehca/ehca_qp.c similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_qp.c rename to drivers/staging/rdma/ehca/ehca_qp.c diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/staging/rdma/ehca/ehca_reqs.c similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_reqs.c rename to drivers/staging/rdma/ehca/ehca_reqs.c diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/staging/rdma/ehca/ehca_sqp.c similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_sqp.c rename to drivers/staging/rdma/ehca/ehca_sqp.c diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/staging/rdma/ehca/ehca_tools.h similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_tools.h rename to drivers/staging/rdma/ehca/ehca_tools.h diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/staging/rdma/ehca/ehca_uverbs.c similarity index 100% rename from drivers/infiniband/hw/ehca/ehca_uverbs.c rename to drivers/staging/rdma/ehca/ehca_uverbs.c diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/staging/rdma/ehca/hcp_if.c similarity index 100% rename from drivers/infiniband/hw/ehca/hcp_if.c rename to drivers/staging/rdma/ehca/hcp_if.c diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/staging/rdma/ehca/hcp_if.h similarity index 100% rename from drivers/infiniband/hw/ehca/hcp_if.h rename to drivers/staging/rdma/ehca/hcp_if.h diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/staging/rdma/ehca/hcp_phyp.c similarity index 100% rename from drivers/infiniband/hw/ehca/hcp_phyp.c rename to drivers/staging/rdma/ehca/hcp_phyp.c diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/staging/rdma/ehca/hcp_phyp.h similarity index 100% rename from drivers/infiniband/hw/ehca/hcp_phyp.h rename to drivers/staging/rdma/ehca/hcp_phyp.h diff --git a/drivers/infiniband/hw/ehca/hipz_fns.h b/drivers/staging/rdma/ehca/hipz_fns.h similarity index 100% rename from drivers/infiniband/hw/ehca/hipz_fns.h rename to drivers/staging/rdma/ehca/hipz_fns.h diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/staging/rdma/ehca/hipz_fns_core.h similarity index 100% rename from drivers/infiniband/hw/ehca/hipz_fns_core.h rename to drivers/staging/rdma/ehca/hipz_fns_core.h diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/staging/rdma/ehca/hipz_hw.h similarity index 100% rename from drivers/infiniband/hw/ehca/hipz_hw.h rename to drivers/staging/rdma/ehca/hipz_hw.h diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/staging/rdma/ehca/ipz_pt_fn.c similarity index 100% rename from drivers/infiniband/hw/ehca/ipz_pt_fn.c rename to drivers/staging/rdma/ehca/ipz_pt_fn.c diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/staging/rdma/ehca/ipz_pt_fn.h similarity index 100% rename from drivers/infiniband/hw/ehca/ipz_pt_fn.h rename to drivers/staging/rdma/ehca/ipz_pt_fn.h From eda2116f4ab6d79cfcffc202b5d2bbb0797ba013 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 11 Sep 2015 19:24:19 -0500 Subject: [PATCH 36/96] [CIFS] mount option sec=none not displayed properly in /proc/mounts When the user specifies "sec=none" in a cifs mount, we set sec_type as unspecified (and set a flag and the username will be null) rather than setting sectype as "none" so cifs_show_security was not properly displaying it in cifs /proc/mounts entries. Signed-off-by: Steve French Reviewed-by: Jeff Layton --- fs/cifs/cifsfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 6a1119e87fbb..e739950ca084 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -325,8 +325,11 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) static void cifs_show_security(struct seq_file *s, struct cifs_ses *ses) { - if (ses->sectype == Unspecified) + if (ses->sectype == Unspecified) { + if (ses->user_name == NULL) + seq_puts(s, ",sec=none"); return; + } seq_puts(s, ",sec="); From 728d29400488d54974d3317fe8a232b45fdb42ee Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 31 Aug 2015 16:13:47 -0700 Subject: [PATCH 37/96] hwmon: (nct6775) Swap STEP_UP_TIME and STEP_DOWN_TIME registers for most chips The STEP_UP_TIME and STEP_DOWN_TIME registers are swapped for all chips but NCT6775. Reported-by: Grazvydas Ignotas Reviewed-by: Jean Delvare Cc: stable@vger.kernel.org # v3.10+ Signed-off-by: Guenter Roeck --- drivers/hwmon/nct6775.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index bd1c99deac71..2aaedbe0b023 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -354,6 +354,10 @@ static const u16 NCT6775_REG_TEMP_CRIT[ARRAY_SIZE(nct6775_temp_label) - 1] /* NCT6776 specific data */ +/* STEP_UP_TIME and STEP_DOWN_TIME regs are swapped for all chips but NCT6775 */ +#define NCT6776_REG_FAN_STEP_UP_TIME NCT6775_REG_FAN_STEP_DOWN_TIME +#define NCT6776_REG_FAN_STEP_DOWN_TIME NCT6775_REG_FAN_STEP_UP_TIME + static const s8 NCT6776_ALARM_BITS[] = { 0, 1, 2, 3, 8, 21, 20, 16, /* in0.. in7 */ 17, -1, -1, -1, -1, -1, -1, /* in8..in14 */ @@ -3528,8 +3532,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_PULSES = NCT6776_REG_FAN_PULSES; data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT; data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME; - data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME; - data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME; + data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME; + data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME; data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; @@ -3600,8 +3604,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_PULSES = NCT6779_REG_FAN_PULSES; data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT; data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME; - data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME; - data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME; + data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME; + data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME; data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; @@ -3677,8 +3681,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_PULSES = NCT6779_REG_FAN_PULSES; data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT; data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME; - data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME; - data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME; + data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME; + data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME; data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; From cd1faefa66425c3fa338777773c5c017edea3439 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 30 Aug 2015 19:45:19 -0700 Subject: [PATCH 38/96] hwmon: (nct6775) Add support for NCT6793D NCT6793D is register compatible with NCT6792D. Also move nct6775_sio_names[] closer to enum kinds to simplify adding new chips. Tested-by: Grazvydas Ignotas Reviewed-by: Jean Delvare Signed-off-by: Guenter Roeck --- Documentation/hwmon/nct6775 | 4 ++++ drivers/hwmon/Kconfig | 4 ++-- drivers/hwmon/nct6775.c | 48 ++++++++++++++++++++++++------------- 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/Documentation/hwmon/nct6775 b/Documentation/hwmon/nct6775 index f0dd3d2fec96..76add4c9cd68 100644 --- a/Documentation/hwmon/nct6775 +++ b/Documentation/hwmon/nct6775 @@ -32,6 +32,10 @@ Supported chips: Prefix: 'nct6792' Addresses scanned: ISA address retrieved from Super I/O registers Datasheet: Available from Nuvoton upon request + * Nuvoton NCT6793D + Prefix: 'nct6793' + Addresses scanned: ISA address retrieved from Super I/O registers + Datasheet: Available from Nuvoton upon request Authors: Guenter Roeck diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 500b262b89bb..e13c902e8966 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -1140,8 +1140,8 @@ config SENSORS_NCT6775 help If you say yes here you get support for the hardware monitoring functionality of the Nuvoton NCT6106D, NCT6775F, NCT6776F, NCT6779D, - NCT6791D, NCT6792D and compatible Super-I/O chips. This driver - replaces the w83627ehf driver for NCT6775F and NCT6776F. + NCT6791D, NCT6792D, NCT6793D, and compatible Super-I/O chips. This + driver replaces the w83627ehf driver for NCT6775F and NCT6776F. This driver can also be built as a module. If so, the module will be called nct6775. diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 2aaedbe0b023..8b4fa55e46c6 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -39,6 +39,7 @@ * nct6779d 15 5 5 2+6 0xc560 0xc1 0x5ca3 * nct6791d 15 6 6 2+6 0xc800 0xc1 0x5ca3 * nct6792d 15 6 6 2+6 0xc910 0xc1 0x5ca3 + * nct6793d 15 6 6 2+6 0xd120 0xc1 0x5ca3 * * #temp lists the number of monitored temperature sources (first value) plus * the number of directly connectable temperature sensors (second value). @@ -63,7 +64,7 @@ #define USE_ALTERNATE -enum kinds { nct6106, nct6775, nct6776, nct6779, nct6791, nct6792 }; +enum kinds { nct6106, nct6775, nct6776, nct6779, nct6791, nct6792, nct6793 }; /* used to set data->name = nct6775_device_names[data->sio_kind] */ static const char * const nct6775_device_names[] = { @@ -73,6 +74,17 @@ static const char * const nct6775_device_names[] = { "nct6779", "nct6791", "nct6792", + "nct6793", +}; + +static const char * const nct6775_sio_names[] __initconst = { + "NCT6106D", + "NCT6775F", + "NCT6776D/F", + "NCT6779D", + "NCT6791D", + "NCT6792D", + "NCT6793D", }; static unsigned short force_id; @@ -104,6 +116,7 @@ MODULE_PARM_DESC(fan_debounce, "Enable debouncing for fan RPM signal"); #define SIO_NCT6779_ID 0xc560 #define SIO_NCT6791_ID 0xc800 #define SIO_NCT6792_ID 0xc910 +#define SIO_NCT6793_ID 0xd120 #define SIO_ID_MASK 0xFFF0 enum pwm_enable { off, manual, thermal_cruise, speed_cruise, sf3, sf4 }; @@ -537,7 +550,7 @@ static const s8 NCT6791_ALARM_BITS[] = { 4, 5, 13, -1, -1, -1, /* temp1..temp6 */ 12, 9 }; /* intrusion0, intrusion1 */ -/* NCT6792 specific data */ +/* NCT6792/NCT6793 specific data */ static const u16 NCT6792_REG_TEMP_MON[] = { 0x73, 0x75, 0x77, 0x79, 0x7b, 0x7d }; @@ -1060,6 +1073,7 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg) case nct6779: case nct6791: case nct6792: + case nct6793: return reg == 0x150 || reg == 0x153 || reg == 0x155 || ((reg & 0xfff0) == 0x4b0 && (reg & 0x000f) < 0x0b) || reg == 0x402 || @@ -1411,6 +1425,7 @@ static void nct6775_update_pwm_limits(struct device *dev) case nct6779: case nct6791: case nct6792: + case nct6793: reg = nct6775_read_value(data, data->REG_CRITICAL_PWM_ENABLE[i]); if (reg & data->CRITICAL_PWM_ENABLE_MASK) @@ -2826,6 +2841,7 @@ store_auto_pwm(struct device *dev, struct device_attribute *attr, case nct6779: case nct6791: case nct6792: + case nct6793: nct6775_write_value(data, data->REG_CRITICAL_PWM[nr], val); reg = nct6775_read_value(data, @@ -3260,7 +3276,7 @@ nct6775_check_fan_inputs(struct nct6775_data *data) pwm4pin = false; pwm5pin = false; pwm6pin = false; - } else { /* NCT6779D, NCT6791D, or NCT6792D */ + } else { /* NCT6779D, NCT6791D, NCT6792D, or NCT6793D */ regval = superio_inb(sioreg, 0x1c); fan3pin = !(regval & (1 << 5)); @@ -3273,7 +3289,8 @@ nct6775_check_fan_inputs(struct nct6775_data *data) fan4min = fan4pin; - if (data->kind == nct6791 || data->kind == nct6792) { + if (data->kind == nct6791 || data->kind == nct6792 || + data->kind == nct6793) { regval = superio_inb(sioreg, 0x2d); fan6pin = (regval & (1 << 1)); pwm6pin = (regval & (1 << 0)); @@ -3647,6 +3664,7 @@ static int nct6775_probe(struct platform_device *pdev) break; case nct6791: case nct6792: + case nct6793: data->in_num = 15; data->pwm_num = 6; data->auto_pwm_num = 4; @@ -3922,6 +3940,7 @@ static int nct6775_probe(struct platform_device *pdev) case nct6779: case nct6791: case nct6792: + case nct6793: break; } @@ -3954,6 +3973,7 @@ static int nct6775_probe(struct platform_device *pdev) break; case nct6791: case nct6792: + case nct6793: tmp |= 0x7e; break; } @@ -4051,7 +4071,8 @@ static int __maybe_unused nct6775_resume(struct device *dev) if (reg != data->sio_reg_enable) superio_outb(sioreg, SIO_REG_ENABLE, data->sio_reg_enable); - if (data->kind == nct6791 || data->kind == nct6792) + if (data->kind == nct6791 || data->kind == nct6792 || + data->kind == nct6793) nct6791_enable_io_mapping(sioreg); superio_exit(sioreg); @@ -4110,15 +4131,6 @@ static struct platform_driver nct6775_driver = { .probe = nct6775_probe, }; -static const char * const nct6775_sio_names[] __initconst = { - "NCT6106D", - "NCT6775F", - "NCT6776D/F", - "NCT6779D", - "NCT6791D", - "NCT6792D", -}; - /* nct6775_find() looks for a '627 in the Super-I/O config space */ static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data) { @@ -4154,6 +4166,9 @@ static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data) case SIO_NCT6792_ID: sio_data->kind = nct6792; break; + case SIO_NCT6793_ID: + sio_data->kind = nct6793; + break; default: if (val != 0xffff) pr_debug("unsupported chip ID: 0x%04x\n", val); @@ -4179,7 +4194,8 @@ static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data) superio_outb(sioaddr, SIO_REG_ENABLE, val | 0x01); } - if (sio_data->kind == nct6791 || sio_data->kind == nct6792) + if (sio_data->kind == nct6791 || sio_data->kind == nct6792 || + sio_data->kind == nct6793) nct6791_enable_io_mapping(sioaddr); superio_exit(sioaddr); @@ -4289,7 +4305,7 @@ static void __exit sensors_nct6775_exit(void) } MODULE_AUTHOR("Guenter Roeck "); -MODULE_DESCRIPTION("NCT6106D/NCT6775F/NCT6776F/NCT6779D/NCT6791D/NCT6792D driver"); +MODULE_DESCRIPTION("Driver for NCT6775F and compatible chips"); MODULE_LICENSE("GPL"); module_init(sensors_nct6775_init); From 7c5b190e115a2f7a51a85f261e7d7dca4b4bbe64 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 10 Sep 2015 21:55:27 +0200 Subject: [PATCH 39/96] x86/cpu: Print family/model/stepping in hex 924e101a7ab6 ("x86/debug: Dump family, model, stepping of the boot CPU") had its good intentions to dump the exact F/M/S as an aid during debugging sessions but its output can be ambiguous. Fix that: -smpboot: CPU0: Intel Core Processor (Broadwell) (fam: 06, model: 47, stepping: 02) +smpboot: CPU0: Intel Core Processor (Broadwell) (family: 0x6, model: 0x47, stepping: 0x2) Also, spell out "family". Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1441914927-32037-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 07ce52c22ec8..de22ea7ff82f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1110,10 +1110,10 @@ void print_cpu_info(struct cpuinfo_x86 *c) else printk(KERN_CONT "%d86", c->x86); - printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model); + printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model); if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask); + printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask); else printk(KERN_CONT ")\n"); From 2619d7e9c92d524cb155ec89fd72875321512e5b Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 9 Sep 2015 16:07:30 -0700 Subject: [PATCH 40/96] time: Fix timekeeping_freqadjust()'s incorrect use of abs() instead of abs64() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The internal clocksteering done for fine-grained error correction uses a logarithmic approximation, so any time adjtimex() adjusts the clock steering, timekeeping_freqadjust() quickly approximates the correct clock frequency over a series of ticks. Unfortunately, the logic in timekeeping_freqadjust(), introduced in commit: dc491596f639 ("timekeeping: Rework frequency adjustments to work better w/ nohz") used the abs() function with a s64 error value to calculate the size of the approximated adjustment to be made. Per include/linux/kernel.h: "abs() should not be used for 64-bit types (s64, u64, long long) - use abs64()". Thus on 32-bit platforms, this resulted in the clocksteering to take a quite dampended random walk trying to converge on the proper frequency, which caused the adjustments to be made much slower then intended (most easily observed when large adjustments are made). This patch fixes the issue by using abs64() instead. Reported-by: Nuno Gonçalves Tested-by: Nuno Goncalves Signed-off-by: John Stultz Cc: # v3.17+ Cc: Linus Torvalds Cc: Miroslav Lichvar Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1441840051-20244-1-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f6ee2e6b6f5d..3739ac6aa473 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1614,7 +1614,7 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk, negative = (tick_error < 0); /* Sort out the magnitude of the correction */ - tick_error = abs(tick_error); + tick_error = abs64(tick_error); for (adj = 0; tick_error > interval; adj++) tick_error >>= 1; From 9e26b0b114adb321a9bf41520bac304ef49e77d1 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sun, 23 Aug 2015 21:11:52 +0800 Subject: [PATCH 41/96] gpio: mxc: need to check return value of irq_alloc_generic_chip Need to check return value of irq_alloc_generic_chip, because it may return NULL. 1. Change mxc_gpio_init_gc return type from void to int. 2. Add a new lable out_irqdomain_remove to remove the irq domain when mxc_gpio_init_gc fail. Signed-off-by: Peng Fan Cc: Alexandre Courbot [Manually rebased] Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mxc.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c index b752b560126e..8813abab9736 100644 --- a/drivers/gpio/gpio-mxc.c +++ b/drivers/gpio/gpio-mxc.c @@ -339,13 +339,15 @@ static int gpio_set_wake_irq(struct irq_data *d, u32 enable) return 0; } -static void mxc_gpio_init_gc(struct mxc_gpio_port *port, int irq_base) +static int mxc_gpio_init_gc(struct mxc_gpio_port *port, int irq_base) { struct irq_chip_generic *gc; struct irq_chip_type *ct; gc = irq_alloc_generic_chip("gpio-mxc", 1, irq_base, port->base, handle_level_irq); + if (!gc) + return -ENOMEM; gc->private = port; ct = gc->chip_types; @@ -360,6 +362,8 @@ static void mxc_gpio_init_gc(struct mxc_gpio_port *port, int irq_base) irq_setup_generic_chip(gc, IRQ_MSK(32), IRQ_GC_INIT_NESTED_LOCK, IRQ_NOREQUEST, 0); + + return 0; } static void mxc_gpio_get_hw(struct platform_device *pdev) @@ -477,12 +481,16 @@ static int mxc_gpio_probe(struct platform_device *pdev) } /* gpio-mxc can be a generic irq chip */ - mxc_gpio_init_gc(port, irq_base); + err = mxc_gpio_init_gc(port, irq_base); + if (err < 0) + goto out_irqdomain_remove; list_add_tail(&port->node, &mxc_gpio_ports); return 0; +out_irqdomain_remove: + irq_domain_remove(port->domain); out_irqdesc_free: irq_free_descs(irq_base, 32); out_gpiochip_remove: From 1bbc557d976b4e5ae9a41d619bd79f09ccac9afc Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sun, 23 Aug 2015 21:11:53 +0800 Subject: [PATCH 42/96] gpio: mxs: need to check return value of irq_alloc_generic_chip Need to check return value of irq_alloc_generic_chip, because it may return NULL. 1. Change mxs_gpio_init_gc return type from void to int. 2. Add a new lable out_irqdomain_remove to remove the irq domain when mxc_gpio_init_gc fail. Signed-off-by: Peng Fan Cc: Alexandre Courbot Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mxs.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c index b7f383eb18d9..1387385e6697 100644 --- a/drivers/gpio/gpio-mxs.c +++ b/drivers/gpio/gpio-mxs.c @@ -196,13 +196,16 @@ static int mxs_gpio_set_wake_irq(struct irq_data *d, unsigned int enable) return 0; } -static void __init mxs_gpio_init_gc(struct mxs_gpio_port *port, int irq_base) +static int __init mxs_gpio_init_gc(struct mxs_gpio_port *port, int irq_base) { struct irq_chip_generic *gc; struct irq_chip_type *ct; gc = irq_alloc_generic_chip("gpio-mxs", 1, irq_base, port->base, handle_level_irq); + if (!gc) + return -ENOMEM; + gc->private = port; ct = gc->chip_types; @@ -216,6 +219,8 @@ static void __init mxs_gpio_init_gc(struct mxs_gpio_port *port, int irq_base) irq_setup_generic_chip(gc, IRQ_MSK(32), IRQ_GC_INIT_NESTED_LOCK, IRQ_NOREQUEST, 0); + + return 0; } static int mxs_gpio_to_irq(struct gpio_chip *gc, unsigned offset) @@ -317,7 +322,9 @@ static int mxs_gpio_probe(struct platform_device *pdev) } /* gpio-mxs can be a generic irq chip */ - mxs_gpio_init_gc(port, irq_base); + err = mxs_gpio_init_gc(port, irq_base); + if (err < 0) + goto out_irqdomain_remove; /* setup one handler for each entry */ irq_set_chained_handler_and_data(port->irq, mxs_gpio_irq_handler, @@ -343,6 +350,8 @@ static int mxs_gpio_probe(struct platform_device *pdev) out_bgpio_remove: bgpio_remove(&port->bgc); +out_irqdomain_remove: + irq_domain_remove(port->domain); out_irqdesc_free: irq_free_descs(irq_base, 32); return err; From aad38b75fb632200eb64282469458d21ce2cc39a Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 25 Aug 2015 09:12:43 +0000 Subject: [PATCH 43/96] gpio: rcar: GPIO_RCAR doesn't relate to ARM 8cd1470("gpio: rcar: Add r8a7795 (R-Car H3) support") added GPIO support for r8a7795. r8a7795 based on CONFIG_ARM64. OTOH, GPIO_RCAR driver can be compiled fine on non-ARM. This patch removed ARM dependency for it. Signed-off-by: Kuninori Morimoto Acked-by: Geert Uytterhoeven Acked-by: Simon Horman Signed-off-by: Linus Walleij --- drivers/gpio/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index b4fc9e4d24c6..8949b3f6f74d 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -356,7 +356,7 @@ config GPIO_PXA config GPIO_RCAR tristate "Renesas R-Car GPIO" - depends on ARM && (ARCH_SHMOBILE || COMPILE_TEST) + depends on ARCH_SHMOBILE || COMPILE_TEST select GPIOLIB_IRQCHIP help Say yes here to support GPIO on Renesas R-Car SoCs. From e20538b82f1ffcc06e68feb117f24f211cff7a4d Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 28 Aug 2015 09:44:18 -0700 Subject: [PATCH 44/96] gpio: Propagate errors from chip->get() It's possible to have gpio chips hanging off unreliable remote buses where the get() operation will fail to acquire a readout of the current gpio state. Propagate these errors to the consumer so that they can act on, retry or ignore these failing reads, instead of treating them as the line being held high. Signed-off-by: Bjorn Andersson Reviewed-by: Alexandre Courbot Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 980c1f87866a..5db3445552b1 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1174,15 +1174,16 @@ EXPORT_SYMBOL_GPL(gpiod_is_active_low); * that the GPIO was actually requested. */ -static bool _gpiod_get_raw_value(const struct gpio_desc *desc) +static int _gpiod_get_raw_value(const struct gpio_desc *desc) { struct gpio_chip *chip; - bool value; int offset; + int value; chip = desc->chip; offset = gpio_chip_hwgpio(desc); - value = chip->get ? chip->get(chip, offset) : false; + value = chip->get ? chip->get(chip, offset) : -EIO; + value = value < 0 ? value : !!value; trace_gpio_value(desc_to_gpio(desc), 1, value); return value; } @@ -1192,7 +1193,7 @@ static bool _gpiod_get_raw_value(const struct gpio_desc *desc) * @desc: gpio whose value will be returned * * Return the GPIO's raw value, i.e. the value of the physical line disregarding - * its ACTIVE_LOW status. + * its ACTIVE_LOW status, or negative errno on failure. * * This function should be called from contexts where we cannot sleep, and will * complain if the GPIO chip functions potentially sleep. @@ -1212,7 +1213,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_raw_value); * @desc: gpio whose value will be returned * * Return the GPIO's logical value, i.e. taking the ACTIVE_LOW status into - * account. + * account, or negative errno on failure. * * This function should be called from contexts where we cannot sleep, and will * complain if the GPIO chip functions potentially sleep. @@ -1226,6 +1227,9 @@ int gpiod_get_value(const struct gpio_desc *desc) WARN_ON(desc->chip->can_sleep); value = _gpiod_get_raw_value(desc); + if (value < 0) + return value; + if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) value = !value; @@ -1548,7 +1552,7 @@ EXPORT_SYMBOL_GPL(gpiochip_unlock_as_irq); * @desc: gpio whose value will be returned * * Return the GPIO's raw value, i.e. the value of the physical line disregarding - * its ACTIVE_LOW status. + * its ACTIVE_LOW status, or negative errno on failure. * * This function is to be called from contexts that can sleep. */ @@ -1566,7 +1570,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_raw_value_cansleep); * @desc: gpio whose value will be returned * * Return the GPIO's logical value, i.e. taking the ACTIVE_LOW status into - * account. + * account, or negative errno on failure. * * This function is to be called from contexts that can sleep. */ @@ -1579,6 +1583,9 @@ int gpiod_get_value_cansleep(const struct gpio_desc *desc) return 0; value = _gpiod_get_raw_value(desc); + if (value < 0) + return value; + if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) value = !value; From 69de52ba321dda8dd7f632d1e480983494325ba0 Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Wed, 2 Sep 2015 20:07:09 +0200 Subject: [PATCH 45/96] Documentation: gpio: board: add flags parameter to gpiod_get*() functions With commit 39b2bbe3d715 ("gpio: add flags argument to gpiod_get*() functions") the gpiod_get*() functions got a 'flags' parameter. Reflect this in the documentation, too. Signed-off-by: Dirk Behme Signed-off-by: Linus Walleij --- Documentation/gpio/board.txt | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/Documentation/gpio/board.txt b/Documentation/gpio/board.txt index b80606de545a..9edd5af31b3a 100644 --- a/Documentation/gpio/board.txt +++ b/Documentation/gpio/board.txt @@ -39,11 +39,11 @@ This property will make GPIOs 15, 16 and 17 available to the driver under the struct gpio_desc *red, *green, *blue, *power; - red = gpiod_get_index(dev, "led", 0); - green = gpiod_get_index(dev, "led", 1); - blue = gpiod_get_index(dev, "led", 2); + red = gpiod_get_index(dev, "led", 0, GPIOD_OUT_HIGH); + green = gpiod_get_index(dev, "led", 1, GPIOD_OUT_HIGH); + blue = gpiod_get_index(dev, "led", 2, GPIOD_OUT_HIGH); - power = gpiod_get(dev, "power"); + power = gpiod_get(dev, "power", GPIOD_OUT_HIGH); The led GPIOs will be active-high, while the power GPIO will be active-low (i.e. gpiod_is_active_low(power) will be true). @@ -142,13 +142,14 @@ The driver controlling "foo.0" will then be able to obtain its GPIOs as follows: struct gpio_desc *red, *green, *blue, *power; - red = gpiod_get_index(dev, "led", 0); - green = gpiod_get_index(dev, "led", 1); - blue = gpiod_get_index(dev, "led", 2); + red = gpiod_get_index(dev, "led", 0, GPIOD_OUT_HIGH); + green = gpiod_get_index(dev, "led", 1, GPIOD_OUT_HIGH); + blue = gpiod_get_index(dev, "led", 2, GPIOD_OUT_HIGH); - power = gpiod_get(dev, "power"); - gpiod_direction_output(power, 1); + power = gpiod_get(dev, "power", GPIOD_OUT_HIGH); -Since the "power" GPIO is mapped as active-low, its actual signal will be 0 -after this code. Contrary to the legacy integer GPIO interface, the active-low -property is handled during mapping and is thus transparent to GPIO consumers. +Since the "led" GPIOs are mapped as active-high, this example will switch their +signals to 1, i.e. enabling the LEDs. And for the "power" GPIO, which is mapped +as active-low, its actual signal will be 0 after this code. Contrary to the legacy +integer GPIO interface, the active-low property is handled during mapping and is +thus transparent to GPIO consumers. From 87e77e46c61a9333227ab41cdefb1875758a4f13 Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Wed, 2 Sep 2015 20:07:10 +0200 Subject: [PATCH 46/96] Documentation: gpio: board: describe the con_id parameter The con_id parameter has to match the GPIO description and is automatically extended by the GPIO suffix if not NULL. I had to look into the code to understand this and properly find the GPIO I've been looking for, so document this. Signed-off-by: Dirk Behme Acked-by: Alexandre Courbot Signed-off-by: Linus Walleij --- Documentation/gpio/board.txt | 9 +++++++++ Documentation/gpio/consumer.txt | 3 +++ 2 files changed, 12 insertions(+) diff --git a/Documentation/gpio/board.txt b/Documentation/gpio/board.txt index 9edd5af31b3a..5fa069a80171 100644 --- a/Documentation/gpio/board.txt +++ b/Documentation/gpio/board.txt @@ -48,6 +48,15 @@ This property will make GPIOs 15, 16 and 17 available to the driver under the The led GPIOs will be active-high, while the power GPIO will be active-low (i.e. gpiod_is_active_low(power) will be true). +The second parameter of the gpiod_get() functions, the con_id string, has to be +the -prefix of the GPIO suffixes ("gpios" or "gpio", automatically +looked up by the gpiod functions internally) used in the device tree. With above +"led-gpios" example, use the prefix without the "-" as con_id parameter: "led". + +Internally, the GPIO subsystem prefixes the GPIO suffix ("gpios" or "gpio") +with the string passed in con_id to get the resulting string +(snprintf(... "%s-%s", con_id, gpio_suffixes[]). + ACPI ---- ACPI also supports function names for GPIOs in a similar fashion to DT. diff --git a/Documentation/gpio/consumer.txt b/Documentation/gpio/consumer.txt index a206639454ab..e000502fde20 100644 --- a/Documentation/gpio/consumer.txt +++ b/Documentation/gpio/consumer.txt @@ -39,6 +39,9 @@ device that displays digits), an additional index argument can be specified: const char *con_id, unsigned int idx, enum gpiod_flags flags) +For a more detailed description of the con_id parameter in the DeviceTree case +see Documentation/gpio/board.txt + The flags parameter is used to optionally specify a direction and initial value for the GPIO. Values can be: From e799f35c32ea940222b568abf38d6abcab7fa8c1 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Sun, 30 Aug 2015 23:58:44 +0200 Subject: [PATCH 47/96] gpio: sx150x: Remove unnecessary MODULE_ALIAS() The driver has a I2C device id table that is used to create the module aliases and also "sx150x" isn't a supported I2C id, so it's never used. Signed-off-by: Javier Martinez Canillas Signed-off-by: Linus Walleij --- drivers/gpio/gpio-sx150x.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpio/gpio-sx150x.c b/drivers/gpio/gpio-sx150x.c index 458d9d7952b8..9c6b96707c9f 100644 --- a/drivers/gpio/gpio-sx150x.c +++ b/drivers/gpio/gpio-sx150x.c @@ -706,4 +706,3 @@ module_exit(sx150x_exit); MODULE_AUTHOR("Gregory Bean "); MODULE_DESCRIPTION("Driver for Semtech SX150X I2C GPIO Expanders"); MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("i2c:sx150x"); From 5e606abef57a89b3ca25f5d97a953c6cdad7cbac Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 28 Aug 2015 11:44:49 -0700 Subject: [PATCH 48/96] gpio: omap: Fix gpiochip_add() handling for deferred probe Currently we gpio-omap breaks if gpiochip_add() returns -EPROBE_DEFER: [ 0.570000] gpiochip_add: GPIOs 0..31 (gpio) failed to register [ 0.570000] omap_gpio 48310000.gpio: Could not register gpio chip -517 ... [ 3.670000] omap_gpio 48310000.gpio: Unbalanced pm_runtime_enable! Let's fix the issue by adding the missing pm_runtime_put() on error. Cc: Grygorii Strashko Cc: Javier Martinez Canillas Cc: Kevin Hilman Cc: Santosh Shilimkar Acked-by: Santosh Shilimkar Signed-off-by: Tony Lindgren Signed-off-by: Linus Walleij --- drivers/gpio/gpio-omap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 2ae0d47e9554..892a9d1192fd 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -1253,8 +1253,11 @@ static int omap_gpio_probe(struct platform_device *pdev) omap_gpio_mod_init(bank); ret = omap_gpio_chip_init(bank, irqc); - if (ret) + if (ret) { + pm_runtime_put_sync(bank->dev); + pm_runtime_disable(bank->dev); return ret; + } omap_gpio_show_rev(bank); From ae80d64ee8c88b77c58254bcdc5c0981faab672d Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 1 Sep 2015 10:46:15 +0200 Subject: [PATCH 49/96] Documentation: gpio: Explain that -gpio is also supported The GPIO documentation mentions that GPIOs are mapped by defining a -gpios property in the consumer device's node but a -gpio sufix is also supported after commit: dd34c37aa3e8 ("gpio: of: Allow -gpio suffix for property names") Update the documentation to match the implementation. Signed-off-by: Javier Martinez Canillas Signed-off-by: Linus Walleij --- Documentation/gpio/board.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/gpio/board.txt b/Documentation/gpio/board.txt index 5fa069a80171..f59c43b6411b 100644 --- a/Documentation/gpio/board.txt +++ b/Documentation/gpio/board.txt @@ -21,8 +21,8 @@ exact way to do it depends on the GPIO controller providing the GPIOs, see the device tree bindings for your controller. GPIOs mappings are defined in the consumer device's node, in a property named --gpios, where is the function the driver will request -through gpiod_get(). For example: +either -gpios or -gpio, where is the function +the driver will request through gpiod_get(). For example: foo_device { compatible = "acme,foo"; @@ -31,7 +31,7 @@ through gpiod_get(). For example: <&gpio 16 GPIO_ACTIVE_HIGH>, /* green */ <&gpio 17 GPIO_ACTIVE_HIGH>; /* blue */ - power-gpios = <&gpio 1 GPIO_ACTIVE_LOW>; + power-gpio = <&gpio 1 GPIO_ACTIVE_LOW>; }; This property will make GPIOs 15, 16 and 17 available to the driver under the From 46d4f7c25e1bb59b1663878b843a7ec06eaf5806 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 3 Sep 2015 10:31:27 -0700 Subject: [PATCH 50/96] gpio: omap: Fix GPIO numbering for deferred probe If gpio-omap probe fails with -EPROBE_DEFER, the GPIO numbering keeps increasing. Only increase the gpio count if gpiochip_add() was successful as otherwise the numbers will increase for each probe attempt. Cc: Javier Martinez Canillas Cc: Kevin Hilman Cc: Santosh Shilimkar Reviewed-by: Grygorii Strashko Signed-off-by: Tony Lindgren Signed-off-by: Linus Walleij --- drivers/gpio/gpio-omap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 892a9d1192fd..072af5239bc1 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -1098,7 +1098,6 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc) } else { bank->chip.label = "gpio"; bank->chip.base = gpio; - gpio += bank->width; } bank->chip.ngpio = bank->width; @@ -1108,6 +1107,9 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc) return ret; } + if (!bank->is_mpuio) + gpio += bank->width; + #ifdef CONFIG_ARCH_OMAP1 /* * REVISIT: Once we have OMAP1 supporting SPARSE_IRQ, we can drop From d259ec26a6c541a5437e9ed0a1e1891342af3cff Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 24 Aug 2015 23:12:26 +0200 Subject: [PATCH 51/96] pinctrl: qcom: ssbi: convert null test to IS_ERR test Since commit 323de9efdf3e ("pinctrl: make pinctrl_register() return proper error code"), pinctrl_register returns an error code rather than NULL on failure. Update some drivers that were introduced more recently. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression e,e1,e2; @@ e = pinctrl_register(...) ... when != e = e1 if ( - e == NULL + IS_ERR(e) ) { ... return - e2 + PTR_ERR(e) ; } // Signed-off-by: Julia Lawall Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c | 4 ++-- drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c index c978b311031b..e1a3721bc8e5 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c @@ -723,9 +723,9 @@ static int pm8xxx_gpio_probe(struct platform_device *pdev) #endif pctrl->pctrl = pinctrl_register(&pctrl->desc, &pdev->dev, pctrl); - if (!pctrl->pctrl) { + if (IS_ERR(pctrl->pctrl)) { dev_err(&pdev->dev, "couldn't register pm8xxx gpio driver\n"); - return -ENODEV; + return PTR_ERR(pctrl->pctrl); } pctrl->chip = pm8xxx_gpio_template; diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c index 2d1b69f171be..6652b8d7f707 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c @@ -814,9 +814,9 @@ static int pm8xxx_mpp_probe(struct platform_device *pdev) #endif pctrl->pctrl = pinctrl_register(&pctrl->desc, &pdev->dev, pctrl); - if (!pctrl->pctrl) { + if (IS_ERR(pctrl->pctrl)) { dev_err(&pdev->dev, "couldn't register pm8xxx mpp driver\n"); - return -ENODEV; + return PTR_ERR(pctrl->pctrl); } pctrl->chip = pm8xxx_mpp_template; From 5a99233e9bcacc7ea23e173a75bbb7301abd3e6f Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 24 Aug 2015 23:12:27 +0200 Subject: [PATCH 52/96] pinctrl: digicolor: convert null test to IS_ERR test Since commit 323de9efdf3e ("pinctrl: make pinctrl_register() return proper error code"), pinctrl_register returns an error code rather than NULL on failure. Update a driver that was introduced more recently. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression e,e1,e2; @@ e = pinctrl_register(...) ... when != e = e1 if ( - e == NULL + IS_ERR(e) ) { ... return - e2 + PTR_ERR(e) ; } // Signed-off-by: Julia Lawall Acked-by: Baruch Siach Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-digicolor.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/pinctrl-digicolor.c b/drivers/pinctrl/pinctrl-digicolor.c index 461fffc4c62a..11f8b835d3b6 100644 --- a/drivers/pinctrl/pinctrl-digicolor.c +++ b/drivers/pinctrl/pinctrl-digicolor.c @@ -337,9 +337,9 @@ static int dc_pinctrl_probe(struct platform_device *pdev) pmap->dev = &pdev->dev; pmap->pctl = pinctrl_register(pctl_desc, &pdev->dev, pmap); - if (!pmap->pctl) { + if (IS_ERR(pmap->pctl)) { dev_err(&pdev->dev, "pinctrl driver registration failed\n"); - return -EINVAL; + return PTR_ERR(pmap->pctl); } ret = dc_gpiochip_add(pmap, pdev->dev.of_node); From 163dc9f39a26b41fc49319fce4145b35f9705789 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 1 Aug 2015 13:22:38 +0900 Subject: [PATCH 53/96] pinctrl: join lines that can be a single line within 80 columns There is no reason to break a line shorter than 80 columns. Signed-off-by: Masahiro Yamada Signed-off-by: Linus Walleij --- drivers/pinctrl/pinmux.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c index 67e08cb315c4..29984b36926a 100644 --- a/drivers/pinctrl/pinmux.c +++ b/drivers/pinctrl/pinmux.c @@ -313,8 +313,7 @@ static int pinmux_func_name_to_selector(struct pinctrl_dev *pctldev, /* See if this pctldev has this function */ while (selector < nfuncs) { - const char *fname = ops->get_function_name(pctldev, - selector); + const char *fname = ops->get_function_name(pctldev, selector); if (!strcmp(function, fname)) return selector; From 942cde724075f840ded89390b10dce1a47a4d712 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 3 Sep 2015 10:34:30 -0700 Subject: [PATCH 54/96] pinctrl: core: Warn about NULL gpio_chip in pinctrl_ready_for_gpio_range() If the gpio driver is confused about the numbers for gpio-ranges, pinctrl_ready_for_gpio_range() may get called with invalid GPIO causing a NULL pointer exception. Let's instead provide a warning that allows fixing the problem and return with error. Signed-off-by: Tony Lindgren Signed-off-by: Linus Walleij --- drivers/pinctrl/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 69723e07036b..9638a00c67c2 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -349,6 +349,9 @@ static bool pinctrl_ready_for_gpio_range(unsigned gpio) struct pinctrl_gpio_range *range = NULL; struct gpio_chip *chip = gpio_to_chip(gpio); + if (WARN(!chip, "no gpio_chip for gpio%i?", gpio)) + return false; + mutex_lock(&pinctrldev_list_mutex); /* Loop over the pin controllers */ From fa84b52cb681b27e6b5e003457562e25a239b9c4 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 8 Sep 2015 16:46:40 +0200 Subject: [PATCH 55/96] pinctrl: samsung: s3c24xx: fix syntax error ?SYNTAX ERROR irq_desc_get_irq_chip() does not exist. It should be irq_desc_get_chip(). Tested by compiling s3c2410_defconfig. Cc: Thomas Gleixner Reported-by: Paul Gortmaker Signed-off-by: Linus Walleij --- drivers/pinctrl/samsung/pinctrl-s3c24xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c index 019844d479bb..d168b39dd7fd 100644 --- a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c +++ b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c @@ -361,7 +361,7 @@ static inline void s3c24xx_demux_eint(struct irq_desc *desc, u32 offset, u32 range) { struct s3c24xx_eint_data *data = irq_desc_get_handler_data(desc); - struct irq_chip *chip = irq_desc_get_irq_chip(desc); + struct irq_chip *chip = irq_desc_get_chip(desc); struct samsung_pinctrl_drv_data *d = data->drvdata; unsigned int pend, mask; From 1e6428124fe22906be0de1622c8fed8e50e5de05 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 5 Sep 2015 08:58:10 +0200 Subject: [PATCH 56/96] x86/vm86: Fix the misleading CONFIG_VM86 Kconfig help text The CONFIG_VM86 Kconfig help text is actively misleading, so fix it: - Don't mark it 'obsolete' in the text as we'll support the ABI as long as CPUs support it. - Qualify the part about software emulation and mention that for some apps you want a real vm86 mode. - Don't scare users away from the option, instead explain what it does. Reported-by: Stas Sergeev Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Austin S Hemmelgarn Cc: Borislav Petkov Cc: Brian Gerst Cc: Josh Boyer Cc: Kees Cook Cc: Linus Torvalds Cc: Matthew Garrett Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 48f7433dac6f..d28815325ef0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1004,7 +1004,7 @@ config X86_THERMAL_VECTOR depends on X86_MCE_INTEL config X86_LEGACY_VM86 - bool "Legacy VM86 support (obsolete)" + bool "Legacy VM86 support" default n depends on X86_32 ---help--- @@ -1016,19 +1016,20 @@ config X86_LEGACY_VM86 available to accelerate real mode DOS programs. However, any recent version of DOSEMU, X, or vbetool should be fully functional even without kernel VM86 support, as they will all - fall back to (pretty well performing) software emulation. + fall back to software emulation. Nevertheless, if you are using + a 16-bit DOS program where 16-bit performance matters, vm86 + mode might be faster than emulation and you might want to + enable this option. - Anything that works on a 64-bit kernel is unlikely to need - this option, as 64-bit kernels don't, and can't, support V8086 - mode. This option is also unrelated to 16-bit protected mode - and is not needed to run most 16-bit programs under Wine. + Note that any app that works on a 64-bit kernel is unlikely to + need this option, as 64-bit kernels don't, and can't, support + V8086 mode. This option is also unrelated to 16-bit protected + mode and is not needed to run most 16-bit programs under Wine. - Enabling this option adds considerable attack surface to the - kernel and slows down system calls and exception handling. + Enabling this option increases the complexity of the kernel + and slows down exception handling a tiny bit. - Unless you use very old userspace or need the last drop of - performance in your real mode DOS games and can't use KVM, - say N here. + If unsure, say N here. config VM86 bool From eef7635a22f6b144206b5ca2f1398f637acffc4d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 11 Sep 2015 09:34:26 +0530 Subject: [PATCH 57/96] clockevents: Remove unused set_mode() callback All users are migrated to the per-state callbacks, get rid of the unused interface and the core support code. Signed-off-by: Viresh Kumar Signed-off-by: Thomas Gleixner Cc: linaro-kernel@lists.linaro.org Cc: John Stultz Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/fd60de14cf6d125489c031207567bb255ad946f6.1441943991.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 29 ++++---------------- kernel/time/clockevents.c | 42 +---------------------------- kernel/time/tick-common.c | 1 - kernel/time/timer_list.c | 54 +++++++++++++++++--------------------- 4 files changed, 30 insertions(+), 96 deletions(-) diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 31ce435981fe..bdcf358dfce2 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -18,15 +18,6 @@ struct clock_event_device; struct module; -/* Clock event mode commands for legacy ->set_mode(): OBSOLETE */ -enum clock_event_mode { - CLOCK_EVT_MODE_UNUSED, - CLOCK_EVT_MODE_SHUTDOWN, - CLOCK_EVT_MODE_PERIODIC, - CLOCK_EVT_MODE_ONESHOT, - CLOCK_EVT_MODE_RESUME, -}; - /* * Possible states of a clock event device. * @@ -86,16 +77,14 @@ enum clock_event_state { * @min_delta_ns: minimum delta value in ns * @mult: nanosecond to cycles multiplier * @shift: nanoseconds to cycles divisor (power of two) - * @mode: operating mode, relevant only to ->set_mode(), OBSOLETE * @state_use_accessors:current state of the device, assigned by the core code * @features: features * @retries: number of forced programming retries - * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME. - * @set_state_periodic: switch state to periodic, if !set_mode - * @set_state_oneshot: switch state to oneshot, if !set_mode - * @set_state_oneshot_stopped: switch state to oneshot_stopped, if !set_mode - * @set_state_shutdown: switch state to shutdown, if !set_mode - * @tick_resume: resume clkevt device, if !set_mode + * @set_state_periodic: switch state to periodic + * @set_state_oneshot: switch state to oneshot + * @set_state_oneshot_stopped: switch state to oneshot_stopped + * @set_state_shutdown: switch state to shutdown + * @tick_resume: resume clkevt device * @broadcast: function to broadcast events * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration @@ -116,18 +105,10 @@ struct clock_event_device { u64 min_delta_ns; u32 mult; u32 shift; - enum clock_event_mode mode; enum clock_event_state state_use_accessors; unsigned int features; unsigned long retries; - /* - * State transition callback(s): Only one of the two groups should be - * defined: - * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME. - * - set_state_{shutdown|periodic|oneshot|oneshot_stopped}(), tick_resume(). - */ - void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); int (*set_state_periodic)(struct clock_event_device *); int (*set_state_oneshot)(struct clock_event_device *); int (*set_state_oneshot_stopped)(struct clock_event_device *); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 50eb107f1198..a9b76a40319e 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -97,20 +97,6 @@ EXPORT_SYMBOL_GPL(clockevent_delta2ns); static int __clockevents_switch_state(struct clock_event_device *dev, enum clock_event_state state) { - /* Transition with legacy set_mode() callback */ - if (dev->set_mode) { - /* Legacy callback doesn't support new modes */ - if (state > CLOCK_EVT_STATE_ONESHOT) - return -ENOSYS; - /* - * 'clock_event_state' and 'clock_event_mode' have 1-to-1 - * mapping until *_ONESHOT, and so a simple cast will work. - */ - dev->set_mode((enum clock_event_mode)state, dev); - dev->mode = (enum clock_event_mode)state; - return 0; - } - if (dev->features & CLOCK_EVT_FEAT_DUMMY) return 0; @@ -204,12 +190,8 @@ int clockevents_tick_resume(struct clock_event_device *dev) { int ret = 0; - if (dev->set_mode) { - dev->set_mode(CLOCK_EVT_MODE_RESUME, dev); - dev->mode = CLOCK_EVT_MODE_RESUME; - } else if (dev->tick_resume) { + if (dev->tick_resume) ret = dev->tick_resume(dev); - } return ret; } @@ -460,26 +442,6 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu) } EXPORT_SYMBOL_GPL(clockevents_unbind_device); -/* Sanity check of state transition callbacks */ -static int clockevents_sanity_check(struct clock_event_device *dev) -{ - /* Legacy set_mode() callback */ - if (dev->set_mode) { - /* We shouldn't be supporting new modes now */ - WARN_ON(dev->set_state_periodic || dev->set_state_oneshot || - dev->set_state_shutdown || dev->tick_resume || - dev->set_state_oneshot_stopped); - - BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); - return 0; - } - - if (dev->features & CLOCK_EVT_FEAT_DUMMY) - return 0; - - return 0; -} - /** * clockevents_register_device - register a clock event device * @dev: device to register @@ -488,8 +450,6 @@ void clockevents_register_device(struct clock_event_device *dev) { unsigned long flags; - BUG_ON(clockevents_sanity_check(dev)); - /* Initialize state to DETACHED */ clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index d11c55b6ab7d..4fcd99e12aa0 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -398,7 +398,6 @@ void tick_shutdown(unsigned int cpu) * the set mode function! */ clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); - dev->mode = CLOCK_EVT_MODE_UNUSED; clockevents_exchange_device(dev, NULL); dev->event_handler = clockevents_handle_noop; td->evtdev = NULL; diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 129c96033e46..f75e35b60149 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -225,7 +225,7 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) (unsigned long long) dev->min_delta_ns); SEQ_printf(m, " mult: %u\n", dev->mult); SEQ_printf(m, " shift: %u\n", dev->shift); - SEQ_printf(m, " mode: %d\n", dev->mode); + SEQ_printf(m, " mode: %d\n", clockevent_get_state(dev)); SEQ_printf(m, " next_event: %Ld nsecs\n", (unsigned long long) ktime_to_ns(dev->next_event)); @@ -233,40 +233,34 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) print_name_offset(m, dev->set_next_event); SEQ_printf(m, "\n"); - if (dev->set_mode) { - SEQ_printf(m, " set_mode: "); - print_name_offset(m, dev->set_mode); + if (dev->set_state_shutdown) { + SEQ_printf(m, " shutdown: "); + print_name_offset(m, dev->set_state_shutdown); SEQ_printf(m, "\n"); - } else { - if (dev->set_state_shutdown) { - SEQ_printf(m, " shutdown: "); - print_name_offset(m, dev->set_state_shutdown); - SEQ_printf(m, "\n"); - } + } - if (dev->set_state_periodic) { - SEQ_printf(m, " periodic: "); - print_name_offset(m, dev->set_state_periodic); - SEQ_printf(m, "\n"); - } + if (dev->set_state_periodic) { + SEQ_printf(m, " periodic: "); + print_name_offset(m, dev->set_state_periodic); + SEQ_printf(m, "\n"); + } - if (dev->set_state_oneshot) { - SEQ_printf(m, " oneshot: "); - print_name_offset(m, dev->set_state_oneshot); - SEQ_printf(m, "\n"); - } + if (dev->set_state_oneshot) { + SEQ_printf(m, " oneshot: "); + print_name_offset(m, dev->set_state_oneshot); + SEQ_printf(m, "\n"); + } - if (dev->set_state_oneshot_stopped) { - SEQ_printf(m, " oneshot stopped: "); - print_name_offset(m, dev->set_state_oneshot_stopped); - SEQ_printf(m, "\n"); - } + if (dev->set_state_oneshot_stopped) { + SEQ_printf(m, " oneshot stopped: "); + print_name_offset(m, dev->set_state_oneshot_stopped); + SEQ_printf(m, "\n"); + } - if (dev->tick_resume) { - SEQ_printf(m, " resume: "); - print_name_offset(m, dev->tick_resume); - SEQ_printf(m, "\n"); - } + if (dev->tick_resume) { + SEQ_printf(m, " resume: "); + print_name_offset(m, dev->tick_resume); + SEQ_printf(m, "\n"); } SEQ_printf(m, " event_handler: "); From f454b478861325f067fd58ba7ee9f1b5c4a9d6a0 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 2 Sep 2015 09:45:58 -0600 Subject: [PATCH 58/96] x86/ldt: Fix small LDT allocation for Xen While the following commit: 37868fe113 ("x86/ldt: Make modify_ldt synchronous") added a nice comment explaining that Xen needs page-aligned whole page chunks for guest descriptor tables, it then nevertheless used kzalloc() on the small size path. As I'm unaware of guarantees for kmalloc(PAGE_SIZE, ) to return page-aligned memory blocks, I believe this needs to be switched back to __get_free_page() (or better get_zeroed_page()). Signed-off-by: Jan Beulich Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Vrabel Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/55E735D6020000780009F1E6@prv-mh.provo.novell.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/ldt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 2bcc0525f1c1..6acc9dd91f36 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -58,7 +58,7 @@ static struct ldt_struct *alloc_ldt_struct(int size) if (alloc_size > PAGE_SIZE) new_ldt->entries = vzalloc(alloc_size); else - new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL); + new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL); if (!new_ldt->entries) { kfree(new_ldt); @@ -95,7 +95,7 @@ static void free_ldt_struct(struct ldt_struct *ldt) if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) vfree(ldt->entries); else - kfree(ldt->entries); + free_page((unsigned long)ldt->entries); kfree(ldt); } From ba9cc453c400049f632d4eb2f2835e2f96654ddc Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 11 Sep 2015 08:49:47 +0100 Subject: [PATCH 59/96] arm64: dma-mapping: check whether cma area is initialized or not If CMA is turned on and CMA size is set to zero, kernel should behave as if CMA was not enabled at compile time. Every dma allocation should check existence of cma area before requesting memory. Arm has done this by commit e464ef16c4f0 ("arm: dma-mapping: add checking cma area initialized"), also do this for arm64. Acked-by: Catalin Marinas Signed-off-by: Jisheng Zhang Signed-off-by: Will Deacon --- arch/arm64/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 0bcc4bc94b4a..99224dcebdc5 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -100,7 +100,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, if (IS_ENABLED(CONFIG_ZONE_DMA) && dev->coherent_dma_mask <= DMA_BIT_MASK(32)) flags |= GFP_DMA; - if (IS_ENABLED(CONFIG_DMA_CMA) && (flags & __GFP_WAIT)) { + if (dev_get_cma_area(dev) && (flags & __GFP_WAIT)) { struct page *page; void *addr; From b847415ce96efef819534b230d84695b1bc6d36b Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 11 Sep 2015 18:22:00 +0100 Subject: [PATCH 60/96] arm64: Fix the pte_hw_dirty() check when AF/DBM is enabled Commit 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits") introduced support for handling hardware updates of the access flag and dirty status. The PTE is automatically dirtied in hardware (if supported) by clearing the PTE_RDONLY bit when the PTE_DBM/PTE_WRITE bit is set. The pte_hw_dirty() macro was added to detect a hardware dirtied pte. The pte_dirty() macro checks for both software PTE_DIRTY and pte_hw_dirty(). Functions like pte_modify() clear the PTE_RDONLY bit since it is meant to be set in set_pte_at() when written to memory. In such cases, pte_hw_dirty() would return true even though such pte is clean. This patch changes pte_hw_dirty() to test the PTE_DBM/PTE_WRITE bit together with PTE_RDONLY. Fixes: 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits") Reported-by: Julien Grall Tested-by: Julien Grall Tested-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 6900b2d95371..69207f016891 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -146,7 +146,7 @@ extern struct page *empty_zero_page; #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #ifdef CONFIG_ARM64_HW_AFDBM -#define pte_hw_dirty(pte) (!(pte_val(pte) & PTE_RDONLY)) +#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) #else #define pte_hw_dirty(pte) (0) #endif @@ -238,7 +238,7 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); * When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via * the page fault mechanism. Checking the dirty status of a pte becomes: * - * PTE_DIRTY || !PTE_RDONLY + * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) */ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) From 62d96c71d248834af2891293dc23cc344ae2ec36 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 11 Sep 2015 18:22:01 +0100 Subject: [PATCH 61/96] arm64: Fix pte_modify() to preserve the hardware dirty information The pte_modify() function with hardware AF/DBM enabled must transfer the hardware dirty information to the software PTE_DIRTY bit. However, it was setting this bit in newprot and the mask does not cover such bit. This patch sets PTE_DIRTY on the original pte which will be preserved in the returned value. Fixes: 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits") Cc: Julien Grall Tested-by: Julien Grall Tested-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 69207f016891..31df98adf005 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -503,7 +503,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) - newprot |= PTE_DIRTY; + pte = pte_mkdirty(pte); pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } From bf950040a53da35522e38066d9eb6ab7a1c9d136 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 11 Sep 2015 18:22:02 +0100 Subject: [PATCH 62/96] arm64: pgtable: use a single bit for PTE_WRITE regardless of DBM Depending on CONFIG_ARM64_HW_AFDBM, we use either bit 57 or 51 of the pte to represent PTE_WRITE. Given that bit 51 is reserved prior to ARMv8.1, we can just use that bit regardless of the config option. That also matches what happens if a kernel configured with ARM64_HW_AFDBM=y is run on a CPU without the DBM functionality. Cc: Julien Grall Tested-by: Julien Grall Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 31df98adf005..b0329be95cb1 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -26,13 +26,9 @@ * Software defined PTE bits definition. */ #define PTE_VALID (_AT(pteval_t, 1) << 0) +#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) -#ifdef CONFIG_ARM64_HW_AFDBM -#define PTE_WRITE (PTE_DBM) /* same as DBM */ -#else -#define PTE_WRITE (_AT(pteval_t, 1) << 57) -#endif #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* From cda34fc774d114afe98515a21c2063a803f922bc Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 14 Sep 2015 12:42:23 +0200 Subject: [PATCH 63/96] x86/paravirt: Remove the unused pv_time_ops::get_tsc_khz method It's not used anywhere. Signed-off-by: Juergen Gross Acked-by: Rusty Russell Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akataria@vmware.com Cc: chrisw@sous-sol.org Cc: jeremy@goop.org Cc: virtualization@lists.linux-foundation.org Link: http://lkml.kernel.org/r/1442227343-403-1-git-send-email-jgross@suse.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/paravirt_types.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index ce029e4fa7c6..31247b5bff7c 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -97,7 +97,6 @@ struct pv_lazy_ops { struct pv_time_ops { unsigned long long (*sched_clock)(void); unsigned long long (*steal_clock)(int cpu); - unsigned long (*get_tsc_khz)(void); }; struct pv_cpu_ops { From 4857c91f0d195f05908fff296ba1ec5fca87066c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Sep 2015 12:00:55 +0200 Subject: [PATCH 64/96] x86/ioapic: Force affinity setting in setup_ioapic_dest() The recent ioapic cleanups changed the affinity setting in setup_ioapic_dest() from a direct write to the hardware to the delayed affinity setup via irq_set_affinity(). That results in a warning from chained_irq_exit(): WARNING: CPU: 0 PID: 5 at kernel/irq/migration.c:32 irq_move_masked_irq [] irq_move_masked_irq+0xb8/0xc0 [] ioapic_ack_level+0x111/0x130 [] intel_gpio_irq_handler+0x148/0x1c0 The reason is that irq_set_affinity() does not write directly to the hardware. It marks the affinity setting as pending and executes it from the next interrupt. The chained handler infrastructure does not take the irq descriptor lock for performance reasons because such a chained interrupt is not visible to any interfaces. So the delayed affinity setting triggers the warning in irq_move_masked_irq(). Restore the old behaviour by calling the set_affinity function of the ioapic chip in setup_ioapic_dest(). This is safe as none of the interrupts can be on the fly at this point. Fixes: aa5cb97f14a2 'x86/irq: Remove x86_io_apic_ops.set_affinity and related interfaces' Reported-and-tested-by: Mika Westerberg Signed-off-by: Thomas Gleixner Cc: Jiang Liu Cc: jarkko.nikula@linux.intel.com --- arch/x86/kernel/apic/io_apic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 38a76f826530..5c60bb162622 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2522,6 +2522,7 @@ void __init setup_ioapic_dest(void) int pin, ioapic, irq, irq_entry; const struct cpumask *mask; struct irq_data *idata; + struct irq_chip *chip; if (skip_ioapic_setup == 1) return; @@ -2545,9 +2546,9 @@ void __init setup_ioapic_dest(void) else mask = apic->target_cpus(); - irq_set_affinity(irq, mask); + chip = irq_data_get_irq_chip(idata); + chip->irq_set_affinity(idata, mask, false); } - } #endif From 5d7c631d926b59aa16f3c56eaeb83f1036c81dc7 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 30 Jul 2015 16:24:43 -0700 Subject: [PATCH 65/96] x86/apic: Serialize LVTT and TSC_DEADLINE writes The APIC LVTT register is MMIO mapped but the TSC_DEADLINE register is an MSR. The write to the TSC_DEADLINE MSR is not serializing, so it's not guaranteed that the write to LVTT has reached the APIC before the TSC_DEADLINE MSR is written. In such a case the write to the MSR is ignored and as a consequence the local timer interrupt never fires. The SDM decribes this issue for xAPIC and x2APIC modes. The serialization methods recommended by the SDM differ. xAPIC: "1. Memory-mapped write to LVT Timer Register, setting bits 18:17 to 10b. 2. WRMSR to the IA32_TSC_DEADLINE MSR a value much larger than current time-stamp counter. 3. If RDMSR of the IA32_TSC_DEADLINE MSR returns zero, go to step 2. 4. WRMSR to the IA32_TSC_DEADLINE MSR the desired deadline." x2APIC: "To allow for efficient access to the APIC registers in x2APIC mode, the serializing semantics of WRMSR are relaxed when writing to the APIC registers. Thus, system software should not use 'WRMSR to APIC registers in x2APIC mode' as a serializing instruction. Read and write accesses to the APIC registers will occur in program order. A WRMSR to an APIC register may complete before all preceding stores are globally visible; software can prevent this by inserting a serializing instruction, an SFENCE, or an MFENCE before the WRMSR." The xAPIC method is to just wait for the memory mapped write to hit the LVTT by checking whether the MSR write has reached the hardware. There is no reason why a proper MFENCE after the memory mapped write would not do the same. Andi Kleen confirmed that MFENCE is sufficient for the xAPIC case as well. Issue MFENCE before writing to the TSC_DEADLINE MSR. This can be done unconditionally as all CPUs which have TSC_DEADLINE also have MFENCE support. [ tglx: Massaged the changelog ] Signed-off-by: Shaohua Li Reviewed-by: Ingo Molnar Cc: Cc: Cc: Cc: Andi Kleen Cc: H. Peter Anvin Cc: stable@vger.kernel.org #v3.7+ Link: http://lkml.kernel.org/r/20150909041352.GA2059853@devbig257.prn2.facebook.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/apic.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3ca3e46aa405..24e94ce454e2 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -336,6 +336,13 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) apic_write(APIC_LVTT, lvtt_value); if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { + /* + * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode, + * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized. + * According to Intel, MFENCE can do the serialization here. + */ + asm volatile("mfence" : : : "memory"); + printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); return; } From 1975dbc276c6ab62230cf4f9df5ddc9ff0e0e473 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 14 Sep 2015 17:11:05 -0600 Subject: [PATCH 66/96] locking/static_keys: Fix up the static keys documentation Fix a few small mistakes in the static key documentation and delete an unneeded sentence. Suggested-by: Jason Baron Signed-off-by: Jonathan Corbet Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150914171105.511e1e21@lwn.net Signed-off-by: Ingo Molnar --- Documentation/static-keys.txt | 4 ++-- include/linux/jump_label.h | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt index ec911583f6c5..477927becacb 100644 --- a/Documentation/static-keys.txt +++ b/Documentation/static-keys.txt @@ -15,8 +15,8 @@ The updated API replacements are: DEFINE_STATIC_KEY_TRUE(key); DEFINE_STATIC_KEY_FALSE(key); -static_key_likely() -static_key_unlikely() +static_branch_likely() +static_branch_unlikely() 0) Abstract diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 0684bd3a48fc..f1094238ab2a 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -21,8 +21,8 @@ * * DEFINE_STATIC_KEY_TRUE(key); * DEFINE_STATIC_KEY_FALSE(key); - * static_key_likely() - * static_key_unlikely() + * static_branch_likely() + * static_branch_unlikely() * * Jump labels provide an interface to generate dynamic branches using * self-modifying code. Assuming toolchain and architecture support, if we @@ -45,12 +45,10 @@ * statement, setting the key to true requires us to patch in a jump * to the out-of-line of true branch. * - * In addtion to static_branch_{enable,disable}, we can also reference count + * In addition to static_branch_{enable,disable}, we can also reference count * the key or branch direction via static_branch_{inc,dec}. Thus, * static_branch_inc() can be thought of as a 'make more true' and - * static_branch_dec() as a 'make more false'. The inc()/dec() - * interface is meant to be used exclusively from the inc()/dec() for a given - * key. + * static_branch_dec() as a 'make more false'. * * Since this relies on modifying code, the branch modifying functions * must be considered absolute slow paths (machine wide synchronization etc.). From 2314ee4d444e28d4670ff80c84df68c25887decb Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 21 Aug 2015 04:40:22 +0100 Subject: [PATCH 67/96] arm64: enable generic idle loop Enable generic idle loop for ARM64, so can support for hlt/nohlt command line options to override default idle loop behavior. Acked-by: Catalin Marinas Signed-off-by: Leo Yan Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7d95663c0160..8b6e378992d2 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -32,6 +32,7 @@ config ARM64 select GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_CPU_AUTOPROBE select GENERIC_EARLY_IOREMAP + select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL From d10bcd473301888f957ec4b6b12aa3621be78d59 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 2 Sep 2015 18:49:28 +0100 Subject: [PATCH 68/96] arm64: head.S: initialise mdcr_el2 in el2_setup When entering the kernel at EL2, we fail to initialise the MDCR_EL2 register which controls debug access and PMU capabilities at EL1. This patch ensures that the register is initialised so that all traps are disabled and all the PMU counters are available to the host. When a guest is scheduled, KVM takes care to configure trapping appropriately. Cc: Acked-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index a055be6125cf..90d09eddd5b2 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -523,6 +523,11 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr hstr_el2, xzr // Disable CP15 traps to EL2 #endif + /* EL2 debug */ + mrs x0, pmcr_el0 // Disable debug access traps + ubfx x0, x0, #11, #5 // to EL2 and allow access to + msr mdcr_el2, x0 // all PMU counters from EL1 + /* Stage-2 translation */ msr vttbr_el2, xzr From 09a77a885233e2a20dac2635a79c83ccf50a26a1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 15 Sep 2015 16:03:36 +0100 Subject: [PATCH 69/96] modsign: Fix GPL/OpenSSL licence incompatibility The GPL does not permit us to link against the OpenSSL library. Use LGPL for sign-file and extract-file instead. [ The whole "openssl isn't compatible with gpl" is really just fear-mongering, but there's no reason not to make modsign LGPL, so nobody cares. - Linus ] Reported-by: Julian Andres Klode Signed-off-by: David Woodhouse Signed-off-by: David Howells Reviewed-by: Julian Andres Klode Signed-off-by: Linus Torvalds --- scripts/extract-cert.c | 10 +++++----- scripts/sign-file.c | 13 ++++++++----- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/scripts/extract-cert.c b/scripts/extract-cert.c index 10d23ca9f617..6ce5945a0b89 100644 --- a/scripts/extract-cert.c +++ b/scripts/extract-cert.c @@ -1,15 +1,15 @@ /* Extract X.509 certificate in DER form from PKCS#11 or PEM. * - * Copyright © 2014 Red Hat, Inc. All Rights Reserved. - * Copyright © 2015 Intel Corporation. + * Copyright © 2014-2015 Red Hat, Inc. All Rights Reserved. + * Copyright © 2015 Intel Corporation. * * Authors: David Howells * David Woodhouse * * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the licence, or (at your option) any later version. */ #define _GNU_SOURCE #include diff --git a/scripts/sign-file.c b/scripts/sign-file.c index 058bba3103e2..c3899ca4811c 100755 --- a/scripts/sign-file.c +++ b/scripts/sign-file.c @@ -1,12 +1,15 @@ /* Sign a module file using the given key. * - * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) + * Copyright © 2014-2015 Red Hat, Inc. All Rights Reserved. + * Copyright © 2015 Intel Corporation. + * + * Authors: David Howells + * David Woodhouse * * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the licence, or (at your option) any later version. */ #define _GNU_SOURCE #include From 865ca084fdc68cd9b658da4b098008278da8fed3 Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Tue, 15 Sep 2015 13:50:18 -0700 Subject: [PATCH 70/96] ia64: Enable userfaultfd and membarrier system calls Signed-off-by: Tony Luck Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/unistd.h | 2 +- arch/ia64/include/uapi/asm/unistd.h | 2 ++ arch/ia64/kernel/entry.S | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 95c39b95e97e..99c96a5e6016 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -11,7 +11,7 @@ -#define NR_syscalls 319 /* length of syscall table */ +#define NR_syscalls 321 /* length of syscall table */ /* * The following defines stop scripts/checksyscalls.sh from complaining about diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h index 461079560c78..98e94e19a5a0 100644 --- a/arch/ia64/include/uapi/asm/unistd.h +++ b/arch/ia64/include/uapi/asm/unistd.h @@ -332,5 +332,7 @@ #define __NR_memfd_create 1340 #define __NR_bpf 1341 #define __NR_execveat 1342 +#define __NR_userfaultfd 1343 +#define __NR_membarrier 1344 #endif /* _UAPI_ASM_IA64_UNISTD_H */ diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index ae0de7bf5525..37cc7a65cd3e 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1768,5 +1768,7 @@ sys_call_table: data8 sys_memfd_create // 1340 data8 sys_bpf data8 sys_execveat + data8 sys_userfaultfd + data8 sys_membarrier .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls From 03da3ff1cfcd7774c8780d2547ba0d995f7dc03d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 16 Sep 2015 14:10:03 +0100 Subject: [PATCH 71/96] x86/platform: Fix Geode LX timekeeping in the generic x86 build In 2007, commit 07190a08eef36 ("Mark TSC on GeodeLX reliable") bypassed verification of the TSC on Geode LX. However, this code (now in the check_system_tsc_reliable() function in arch/x86/kernel/tsc.c) was only present if CONFIG_MGEODE_LX was set. OpenWRT has recently started building its generic Geode target for Geode GX, not LX, to include support for additional platforms. This broke the timekeeping on LX-based devices, because the TSC wasn't marked as reliable: https://dev.openwrt.org/ticket/20531 By adding a runtime check on is_geode_lx(), we can also include the fix if CONFIG_MGEODEGX1 or CONFIG_X86_GENERIC are set, thus fixing the problem. Signed-off-by: David Woodhouse Cc: Andres Salomon Cc: Linus Torvalds Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1442409003.131189.87.camel@infradead.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 79055cf2c497..51e62d6afd9a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -21,6 +21,7 @@ #include #include #include +#include unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -1015,15 +1016,17 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable); static void __init check_system_tsc_reliable(void) { -#ifdef CONFIG_MGEODE_LX - /* RTSC counts during suspend */ +#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) + if (is_geode_lx()) { + /* RTSC counts during suspend */ #define RTSC_SUSP 0x100 - unsigned long res_low, res_high; + unsigned long res_low, res_high; - rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); - /* Geode_LX - the OLPC CPU has a very reliable TSC */ - if (res_low & RTSC_SUSP) - tsc_clocksource_reliable = 1; + rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); + /* Geode_LX - the OLPC CPU has a very reliable TSC */ + if (res_low & RTSC_SUSP) + tsc_clocksource_reliable = 1; + } #endif if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) tsc_clocksource_reliable = 1; From 02d8dabc50f94353075f2f62b1047c1306e8bf92 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 3 Sep 2015 15:23:40 +0200 Subject: [PATCH 72/96] perf stat: Fix per-pkg event reporting bug Per-pkg events need to be captured once per processor socket. The code in check_per_pkg() ensures only one value per processor package is used. However there is a problem with this function in case the first CPU of the package does not measure anything for the per-pkg event, but other CPUs do. Consider the following: $ create cgroup FOO; echo $$ >FOO/tasks; taskset -c 1 noploop & $ perf stat -a -I 1000 -e intel_cqm/llc_occupancy/ -G FOO sleep 100 1.00000 Bytes intel_cqm/llc_occupancy/ FOO The reason for this is that CPU0 in the cgroup has nothing running on it. Yet check_per_plg() will mark socket0 as processed and no other event value will be considered for the socket. This patch fixes the problem by having check_per_pkg() only consider events which actually ran. Signed-off-by: Stephane Eranian Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1441286620-10117-1-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 415c359de465..2d065d065b67 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -196,7 +196,8 @@ static void zero_per_pkg(struct perf_evsel *counter) memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); } -static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) +static int check_per_pkg(struct perf_evsel *counter, + struct perf_counts_values *vals, int cpu, bool *skip) { unsigned long *mask = counter->per_pkg_mask; struct cpu_map *cpus = perf_evsel__cpus(counter); @@ -218,6 +219,17 @@ static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) counter->per_pkg_mask = mask; } + /* + * we do not consider an event that has not run as a good + * instance to mark a package as used (skip=1). Otherwise + * we may run into a situation where the first CPU in a package + * is not running anything, yet the second is, and this function + * would mark the package as used after the first CPU and would + * not read the values from the second CPU. + */ + if (!(vals->run && vals->ena)) + return 0; + s = cpu_map__get_socket(cpus, cpu); if (s < 0) return -1; @@ -235,7 +247,7 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel static struct perf_counts_values zero; bool skip = false; - if (check_per_pkg(evsel, cpu, &skip)) { + if (check_per_pkg(evsel, count, cpu, &skip)) { pr_err("failed to read per-pkg counter\n"); return -1; } From e56d82a116176f7af9d642b560abbbd3a2b68013 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 11 Sep 2015 15:31:24 +0100 Subject: [PATCH 73/96] arm64: cpu hotplug: ensure we mask out CPU_TASKS_FROZEN in notifiers We have a couple of CPU hotplug notifiers for resetting the CPU debug state to a sane value when a CPU comes online. This patch ensures that we mask out CPU_TASKS_FROZEN so that we don't miss any online events occuring due to suspend/resume. Acked-by: Lorenzo Pieralisi Signed-off-by: Will Deacon --- arch/arm64/kernel/debug-monitors.c | 2 +- arch/arm64/kernel/hw_breakpoint.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 9b3b62ac9c24..cebf78661a55 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -134,7 +134,7 @@ static int os_lock_notify(struct notifier_block *self, unsigned long action, void *data) { int cpu = (unsigned long)data; - if (action == CPU_ONLINE) + if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) smp_call_function_single(cpu, clear_os_lock, NULL, 1); return NOTIFY_OK; } diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index c97040ecf838..bba85c8f8037 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -872,7 +872,7 @@ static int hw_breakpoint_reset_notify(struct notifier_block *self, void *hcpu) { int cpu = (long)hcpu; - if (action == CPU_ONLINE) + if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1); return NOTIFY_OK; } From bdec97a855ef1e239f130f7a11584721c9a1bf04 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 15 Sep 2015 12:07:06 +0100 Subject: [PATCH 74/96] arm64: compat: fix vfp save/restore across signal handlers in big-endian When saving/restoring the VFP registers from a compat (AArch32) signal frame, we rely on the compat registers forming a prefix of the native register file and therefore make use of copy_{to,from}_user to transfer between the native fpsimd_state and the compat_vfp_sigframe. Unfortunately, this doesn't work so well in a big-endian environment. Our fpsimd save/restore code operates directly on 128-bit quantities (Q registers) whereas the compat_vfp_sigframe represents the registers as an array of 64-bit (D) registers. The architecture packs the compat D registers into the Q registers, with the least significant bytes holding the lower register. Consequently, we need to swap the 64-bit halves when converting between these two representations on a big-endian machine. This patch replaces the __copy_{to,from}_user invocations in our compat VFP signal handling code with explicit __put_user loops that operate on 64-bit values and swap them accordingly. Cc: Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/signal32.c | 47 +++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 948f0ad2de23..71ef6dc89ae5 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -212,14 +212,32 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) /* * VFP save/restore code. + * + * We have to be careful with endianness, since the fpsimd context-switch + * code operates on 128-bit (Q) register values whereas the compat ABI + * uses an array of 64-bit (D) registers. Consequently, we need to swap + * the two halves of each Q register when running on a big-endian CPU. */ +union __fpsimd_vreg { + __uint128_t raw; + struct { +#ifdef __AARCH64EB__ + u64 hi; + u64 lo; +#else + u64 lo; + u64 hi; +#endif + }; +}; + static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) { struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr, fpexc; - int err = 0; + int i, err = 0; /* * Save the hardware registers to the fpsimd_state structure. @@ -235,10 +253,15 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) /* * Now copy the FP registers. Since the registers are packed, * we can copy the prefix we want (V0-V15) as it is. - * FIXME: Won't work if big endian. */ - err |= __copy_to_user(&frame->ufp.fpregs, fpsimd->vregs, - sizeof(frame->ufp.fpregs)); + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg = { + .raw = fpsimd->vregs[i >> 1], + }; + + __put_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __put_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + } /* Create an AArch32 fpscr from the fpsr and the fpcr. */ fpscr = (fpsimd->fpsr & VFP_FPSCR_STAT_MASK) | @@ -263,7 +286,7 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr; - int err = 0; + int i, err = 0; __get_user_error(magic, &frame->magic, err); __get_user_error(size, &frame->size, err); @@ -273,12 +296,14 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) return -EINVAL; - /* - * Copy the FP registers into the start of the fpsimd_state. - * FIXME: Won't work if big endian. - */ - err |= __copy_from_user(fpsimd.vregs, frame->ufp.fpregs, - sizeof(frame->ufp.fpregs)); + /* Copy the FP registers into the start of the fpsimd_state. */ + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg; + + __get_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __get_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + fpsimd.vregs[i >> 1] = vreg.raw; + } /* Extract the fpsr and the fpcr from the fpscr */ __get_user_error(fpscr, &frame->ufp.fpscr, err); From df057cc7b4fa59e9b55f07ffdb6c62bf02e99a00 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 17 Mar 2015 12:15:02 +0000 Subject: [PATCH 75/96] arm64: errata: add module build workaround for erratum #843419 Cortex-A53 processors <= r0p4 are affected by erratum #843419 which can lead to a memory access using an incorrect address in certain sequences headed by an ADRP instruction. There is a linker fix to generate veneers for ADRP instructions, but this doesn't work for kernel modules which are built as unlinked ELF objects. This patch adds a new config option for the erratum which, when enabled, builds kernel modules with the mcmodel=large flag. This uses absolute addressing for all kernel symbols, thereby removing the use of ADRP as a PC-relative form of addressing. The ADRP relocs are removed from the module loader so that we fail to load any potentially affected modules. Cc: Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 16 ++++++++++++++++ arch/arm64/Makefile | 4 ++++ arch/arm64/kernel/module.c | 2 ++ 3 files changed, 22 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8b6e378992d2..07d1811aa03f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -332,6 +332,22 @@ config ARM64_ERRATUM_845719 If unsure, say Y. +config ARM64_ERRATUM_843419 + bool "Cortex-A53: 843419: A load or store might access an incorrect address" + depends on MODULES + default y + help + This option builds kernel modules using the large memory model in + order to avoid the use of the ADRP instruction, which can cause + a subsequent memory access to use an incorrect address on Cortex-A53 + parts up to r0p4. + + Note that the kernel itself must be linked with a version of ld + which fixes potentially affected ADRP instructions through the + use of veneers. + + If unsure, say Y. + endmenu diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 15ff5b4156fd..f9914d7c1bb0 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -41,6 +41,10 @@ endif CHECKFLAGS += -D__aarch64__ +ifeq ($(CONFIG_ARM64_ERRATUM_843419), y) +CFLAGS_MODULE += -mcmodel=large +endif + # Default value head-y := arch/arm64/kernel/head.o diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 67bf4107f6ef..876eb8df50bf 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -332,12 +332,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, AARCH64_INSN_IMM_ADR); break; +#ifndef CONFIG_ARM64_ERRATUM_843419 case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; case R_AARCH64_ADR_PREL_PG_HI21: ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, AARCH64_INSN_IMM_ADR); break; +#endif case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC: overflow_check = false; From 179f36dde3cec0f9f05a757b68f6a58e4edbcc95 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Sep 2015 11:30:20 -0300 Subject: [PATCH 76/96] Revert "perf symbols: Fix mismatched declarations for elf_getphdrnum" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit f785f2357673d520a0b7b468973cdd197f336494. We have a test to check if elf_getphdrnum() is present, so, if it fails, we'll get: [acme@rhel5 linux]$ cat /tmp/build/perf/feature/test-libelf-getphdrnum.make.output cc1: warnings being treated as errors test-libelf-getphdrnum.c: In function ‘main’: test-libelf-getphdrnum.c:7: warning: implicit declaration of function ‘elf_getphdrnum’ [acme@rhel5 linux]$ And this block will not be compiled: #ifndef HAVE_ELF_GETPHDRNUM_SUPPORT static int elf_getphdrnum(Elf *elf, size_t *dst) ... #endif So, if elf_getphdrnum() is being defined somewhere, there is a problem with the test that is not detecting that function, go fix it. Reported-by: Vinson Lee Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: "Naveen N. Rao" Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Stephane Eranian Cc: Victor Kamensky Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-qn459fal6acvcvm50i8zxx9k@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 53bb5f59ec58..f78ea3dc4c08 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -38,7 +38,7 @@ static inline char *bfd_demangle(void __maybe_unused *v, #endif #ifndef HAVE_ELF_GETPHDRNUM_SUPPORT -int elf_getphdrnum(Elf *elf, size_t *dst) +static int elf_getphdrnum(Elf *elf, size_t *dst) { GElf_Ehdr gehdr; GElf_Ehdr *ehdr; From f8ac8606fd3cd72183de8eec2b151ff05040c70f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Sep 2015 12:20:28 -0300 Subject: [PATCH 77/96] tools build: Add test for presence of numa_num_possible_cpus() in libnuma The existing numa test checks only if numa.h and numa_available() are present, but that can be satisfied with an old libnuma that is not enough for the 'perf bench numa' entry, so add a test to check for that: [acme@rhel5 linux]$ make NO_AUXTRACE=1 NO_LIBPERL=1 -C tools/perf O=/tmp/build/perf install-bin make: Entering directory `/home/acme/git/linux/tools/perf' BUILD: Doing 'make -j2' parallel build Auto-detecting system features: ... libelf: [ on ] ... libnuma: [ on ] ... numa_num_possible_cpus: [ OFF ] ... libperl: [ on ] config/Makefile:577: Old numa library found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev >= 2.0.8 INSTALL binaries This fixes the build on old systems such as RHEL/CentOS 5.11. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: "Naveen N. Rao" Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Stephane Eranian Cc: Victor Kamensky Cc: Vinson Lee Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-zqriqkezppi2de2iyjin1tnc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 2 ++ tools/build/feature/Makefile | 4 ++++ tools/build/feature/test-all.c | 5 +++++ tools/build/feature/test-numa_num_possible_cpus.c | 6 ++++++ tools/perf/config/Makefile | 11 ++++++++--- 5 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 tools/build/feature/test-numa_num_possible_cpus.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 2975632d51e2..970242098a7c 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -41,6 +41,7 @@ FEATURE_TESTS ?= \ libelf-getphdrnum \ libelf-mmap \ libnuma \ + numa_num_possible_cpus \ libperl \ libpython \ libpython-version \ @@ -61,6 +62,7 @@ FEATURE_DISPLAY ?= \ libbfd \ libelf \ libnuma \ + numa_num_possible_cpus \ libperl \ libpython \ libslang \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 74ca42093d70..e13a42bd0274 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -19,6 +19,7 @@ FILES= \ test-libelf-getphdrnum.bin \ test-libelf-mmap.bin \ test-libnuma.bin \ + test-numa_num_possible_cpus.bin \ test-libperl.bin \ test-libpython.bin \ test-libpython-version.bin \ @@ -87,6 +88,9 @@ test-libelf-getphdrnum.bin: test-libnuma.bin: $(BUILD) -lnuma +test-numa_num_possible_cpus.bin: + $(BUILD) -lnuma + test-libunwind.bin: $(BUILD) -lelf diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 84689a67814a..7a8cdbad3e1b 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -77,6 +77,10 @@ # include "test-libnuma.c" #undef main +#define main main_test_numa_num_possible_cpus +# include "test-numa_num_possible_cpus.c" +#undef main + #define main main_test_timerfd # include "test-timerfd.c" #undef main @@ -136,6 +140,7 @@ int main(int argc, char *argv[]) main_test_libbfd(); main_test_backtrace(); main_test_libnuma(); + main_test_numa_num_possible_cpus(); main_test_timerfd(); main_test_stackprotector_all(); main_test_libdw_dwarf_unwind(); diff --git a/tools/build/feature/test-numa_num_possible_cpus.c b/tools/build/feature/test-numa_num_possible_cpus.c new file mode 100644 index 000000000000..2606e94b0659 --- /dev/null +++ b/tools/build/feature/test-numa_num_possible_cpus.c @@ -0,0 +1,6 @@ +#include + +int main(void) +{ + return numa_num_possible_cpus(); +} diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 827557fc7511..053e65b04dc3 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -573,9 +573,14 @@ ifndef NO_LIBNUMA msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev); NO_LIBNUMA := 1 else - CFLAGS += -DHAVE_LIBNUMA_SUPPORT - EXTLIBS += -lnuma - $(call detected,CONFIG_NUMA) + ifeq ($(feature-numa_num_possible_cpus), 0) + msg := $(warning Old numa library found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev >= 2.0.8); + NO_LIBNUMA := 1 + else + CFLAGS += -DHAVE_LIBNUMA_SUPPORT + EXTLIBS += -lnuma + $(call detected,CONFIG_NUMA) + endif endif endif From b0063dbfb031a7c728ed0d9533257e8329292cf1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Sep 2015 12:54:30 -0300 Subject: [PATCH 78/96] tools build: Add test for presence of __get_cpuid() gcc builtin The auxtrace code needed by Intel PT uses the __get_cpuid() gcc builtin, that is not present in old systems, breaking the build. Add a test to check for that builtin and disable AUXTRACE in those systems. [acme@rhel5 linux]$ make NO_LIBPERL=1 -C tools/perf O=/tmp/build/perf install-bin make: Entering directory `/home/acme/git/linux/tools/perf' BUILD: Doing 'make -j2' parallel build Auto-detecting system features: ... lzma: [ on ] ... get_cpuid: [ OFF ] config/Makefile:630: Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc MKDIR /tmp/build/perf/util/ This fixes the build on old systems such as RHEL/CentOS 5.11. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: "Naveen N. Rao" Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Stephane Eranian Cc: Victor Kamensky Cc: Vinson Lee Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-d4puslul0jltoodzpx9r4sje@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 6 ++++-- tools/build/feature/Makefile | 6 +++++- tools/build/feature/test-all.c | 5 +++++ tools/build/feature/test-get_cpuid.c | 7 +++++++ tools/perf/config/Makefile | 9 +++++++-- 5 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 tools/build/feature/test-get_cpuid.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 970242098a7c..c8fe6d177119 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -52,7 +52,8 @@ FEATURE_TESTS ?= \ timerfd \ libdw-dwarf-unwind \ zlib \ - lzma + lzma \ + get_cpuid FEATURE_DISPLAY ?= \ dwarf \ @@ -69,7 +70,8 @@ FEATURE_DISPLAY ?= \ libunwind \ libdw-dwarf-unwind \ zlib \ - lzma + lzma \ + get_cpuid # Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features. # If in the future we need per-feature checks/flags for features not diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index e13a42bd0274..e43a2971bf56 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -35,7 +35,8 @@ FILES= \ test-compile-x32.bin \ test-zlib.bin \ test-lzma.bin \ - test-bpf.bin + test-bpf.bin \ + test-get_cpuid.bin CC := $(CROSS_COMPILE)gcc -MD PKG_CONFIG := $(CROSS_COMPILE)pkg-config @@ -166,6 +167,9 @@ test-zlib.bin: test-lzma.bin: $(BUILD) -llzma +test-get_cpuid.bin: + $(BUILD) + test-bpf.bin: $(BUILD) diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 7a8cdbad3e1b..33cf6f20bd4e 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -121,6 +121,10 @@ # include "test-lzma.c" #undef main +#define main main_test_get_cpuid +# include "test-get_cpuid.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -148,6 +152,7 @@ int main(int argc, char *argv[]) main_test_zlib(); main_test_pthread_attr_setaffinity_np(); main_test_lzma(); + main_test_get_cpuid(); return 0; } diff --git a/tools/build/feature/test-get_cpuid.c b/tools/build/feature/test-get_cpuid.c new file mode 100644 index 000000000000..d7a2c407130d --- /dev/null +++ b/tools/build/feature/test-get_cpuid.c @@ -0,0 +1,7 @@ +#include + +int main(void) +{ + unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; + return __get_cpuid(0x15, &eax, &ebx, &ecx, &edx); +} diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 053e65b04dc3..38a08539f4bf 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -626,8 +626,13 @@ ifdef LIBBABELTRACE endif ifndef NO_AUXTRACE - $(call detected,CONFIG_AUXTRACE) - CFLAGS += -DHAVE_AUXTRACE_SUPPORT + ifeq ($(feature-get_cpuid), 0) + msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc); + NO_AUXTRACE := 1 + else + $(call detected,CONFIG_AUXTRACE) + CFLAGS += -DHAVE_AUXTRACE_SUPPORT + endif endif # Among the variables below, these: From 7f61f545657281a3a1b0faf68993165ebdecc51b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 14 Sep 2015 16:01:05 +0300 Subject: [PATCH 79/96] libceph: don't access invalid memory in keepalive2 path This struct ceph_timespec ceph_ts; ... con_out_kvec_add(con, sizeof(ceph_ts), &ceph_ts); wraps ceph_ts into a kvec and adds it to con->out_kvec array, yet ceph_ts becomes invalid on return from prepare_write_keepalive(). As a result, we send out bogus keepalive2 stamps. Fix this by encoding into a ceph_timespec member, similar to how acks are read and written. Signed-off-by: Ilya Dryomov Reviewed-by: Yan, Zheng --- include/linux/ceph/messenger.h | 4 +++- net/ceph/messenger.c | 9 +++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 7e1252e97a30..b2371d9b51fa 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -238,6 +238,8 @@ struct ceph_connection { bool out_kvec_is_msg; /* kvec refers to out_msg */ int out_more; /* there is more data after the kvecs */ __le64 out_temp_ack; /* for writing an ack */ + struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2 + stamp */ /* message in temps */ struct ceph_msg_header in_hdr; @@ -248,7 +250,7 @@ struct ceph_connection { int in_base_pos; /* bytes read */ __le64 in_temp_ack; /* for reading an ack */ - struct timespec last_keepalive_ack; + struct timespec last_keepalive_ack; /* keepalive2 ack stamp */ struct delayed_work work; /* send|recv work */ unsigned long delay; /* current delay interval */ diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 525f454f7531..b9b0e3b5da49 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1353,11 +1353,12 @@ static void prepare_write_keepalive(struct ceph_connection *con) dout("prepare_write_keepalive %p\n", con); con_out_kvec_reset(con); if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { - struct timespec ts = CURRENT_TIME; - struct ceph_timespec ceph_ts; - ceph_encode_timespec(&ceph_ts, &ts); + struct timespec now = CURRENT_TIME; + con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); - con_out_kvec_add(con, sizeof(ceph_ts), &ceph_ts); + ceph_encode_timespec(&con->out_temp_keepalive2, &now); + con_out_kvec_add(con, sizeof(con->out_temp_keepalive2), + &con->out_temp_keepalive2); } else { con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive); } From 335c25858218e76ef47f92ecb9d22e919d36140d Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 14 Sep 2015 12:44:22 +0300 Subject: [PATCH 80/96] libceph: advertise support for keepalive2 We are the client, but advertise keepalive2 anyway - for consistency, if nothing else. In the future the server might want to know whether its clients support keepalive2. Signed-off-by: Ilya Dryomov Reviewed-by: Yan, Zheng --- include/linux/ceph/ceph_features.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 4763ad64e832..f89b31d45cc8 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -107,6 +107,7 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_OSDMAP_ENC | \ CEPH_FEATURE_CRUSH_TUNABLES3 | \ CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \ + CEPH_FEATURE_MSGR_KEEPALIVE2 | \ CEPH_FEATURE_CRUSH_V4) #define CEPH_FEATURES_REQUIRED_DEFAULT \ From bf6445631c6f00882b25516a174d5073ce0c6f81 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Thu, 17 Sep 2015 12:08:53 +0200 Subject: [PATCH 81/96] perf tools: Bool functions shouldn't return -1 Returning a negative value for a boolean function seem to have the undesired effect of returning true. Replace -1 by false in a bool-returning function. The diff of the .s file before and after the change (for x86_64): 3907c3907 < movl $1, %ebx --- > xorl %ebx, %ebx while if -1 is replaced by true, the diff is empty. This issue was found by the following Coccinelle semantic patch: @@ identifier f; constant C; typedef bool; @@ bool f (...){ <+... * return -C; ...+> } Signed-off-by: Peter Senna Tschudin Cc: Jiri Olsa Cc: Kan Liang Cc: Matt Fleming Cc: Milos Vyletel Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1442484533-19742-1-git-send-email-peter.senna@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 7acafb3c5592..c2cd9bf2348b 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -709,7 +709,7 @@ bool find_process(const char *name) dir = opendir(procfs__mountpoint()); if (!dir) - return -1; + return false; /* Walk through the directory. */ while (ret && (d = readdir(dir)) != NULL) { From 8a1513b49321e503fd6c8b6793e3b1f9a8a3285b Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Fri, 11 Sep 2015 10:40:17 -0500 Subject: [PATCH 82/96] hp-wmi: limit hotkey enable Do not write initialize magic on systems that do not have feature query 0xb. Fixes Bug #82451. Redefine FEATURE_QUERY to align with 0xb and FEATURE2 with 0xd for code clearity. Add a new test function, hp_wmi_bios_2008_later() & simplify hp_wmi_bios_2009_later(), which fixes a bug in cases where an improper value is returned. Probably also fixes Bug #69131. Add missing __init tag. Signed-off-by: Kyle Evans Cc: stable@vger.kernel.org Signed-off-by: Darren Hart --- drivers/platform/x86/hp-wmi.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 06697315a088..fb4dd7b3ee71 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -54,8 +54,9 @@ MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4"); #define HPWMI_HARDWARE_QUERY 0x4 #define HPWMI_WIRELESS_QUERY 0x5 #define HPWMI_BIOS_QUERY 0x9 +#define HPWMI_FEATURE_QUERY 0xb #define HPWMI_HOTKEY_QUERY 0xc -#define HPWMI_FEATURE_QUERY 0xd +#define HPWMI_FEATURE2_QUERY 0xd #define HPWMI_WIRELESS2_QUERY 0x1b #define HPWMI_POSTCODEERROR_QUERY 0x2a @@ -295,25 +296,33 @@ static int hp_wmi_tablet_state(void) return (state & 0x4) ? 1 : 0; } -static int __init hp_wmi_bios_2009_later(void) +static int __init hp_wmi_bios_2008_later(void) { int state = 0; int ret = hp_wmi_perform_query(HPWMI_FEATURE_QUERY, 0, &state, sizeof(state), sizeof(state)); - if (ret) - return ret; + if (!ret) + return 1; - return (state & 0x10) ? 1 : 0; + return (ret == HPWMI_RET_UNKNOWN_CMDTYPE) ? 0 : -ENXIO; } -static int hp_wmi_enable_hotkeys(void) +static int __init hp_wmi_bios_2009_later(void) { - int ret; - int query = 0x6e; + int state = 0; + int ret = hp_wmi_perform_query(HPWMI_FEATURE2_QUERY, 0, &state, + sizeof(state), sizeof(state)); + if (!ret) + return 1; - ret = hp_wmi_perform_query(HPWMI_BIOS_QUERY, 1, &query, sizeof(query), - 0); + return (ret == HPWMI_RET_UNKNOWN_CMDTYPE) ? 0 : -ENXIO; +} +static int __init hp_wmi_enable_hotkeys(void) +{ + int value = 0x6e; + int ret = hp_wmi_perform_query(HPWMI_BIOS_QUERY, 1, &value, + sizeof(value), 0); if (ret) return -EINVAL; return 0; @@ -663,7 +672,7 @@ static int __init hp_wmi_input_setup(void) hp_wmi_tablet_state()); input_sync(hp_wmi_input_dev); - if (hp_wmi_bios_2009_later() == 4) + if (!hp_wmi_bios_2009_later() && hp_wmi_bios_2008_later()) hp_wmi_enable_hotkeys(); status = wmi_install_notify_handler(HPWMI_EVENT_GUID, hp_wmi_notify, NULL); From 3aaf14da807a4e9931a37f21e4251abb8a67021b Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Thu, 17 Sep 2015 16:01:40 -0700 Subject: [PATCH 83/96] zram: fix possible use after free in zcomp_create() zcomp_create() verifies the success of zcomp_strm_{multi,single}_create() through comp->stream, which can potentially be pointing to memory that was freed if these functions returned an error. While at it, replace a 'ERR_PTR(-ENOMEM)' by a more generic 'ERR_PTR(error)' as in the future zcomp_strm_{multi,siggle}_create() could return other error codes. Function documentation updated accordingly. Fixes: beca3ec71fe5 ("zram: add multi stream functionality") Signed-off-by: Luis Henriques Acked-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zcomp.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 965d1afb0eaa..5cb13ca3a3ac 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -330,12 +330,14 @@ void zcomp_destroy(struct zcomp *comp) * allocate new zcomp and initialize it. return compressing * backend pointer or ERR_PTR if things went bad. ERR_PTR(-EINVAL) * if requested algorithm is not supported, ERR_PTR(-ENOMEM) in - * case of allocation error. + * case of allocation error, or any other error potentially + * returned by functions zcomp_strm_{multi,single}_create. */ struct zcomp *zcomp_create(const char *compress, int max_strm) { struct zcomp *comp; struct zcomp_backend *backend; + int error; backend = find_backend(compress); if (!backend) @@ -347,12 +349,12 @@ struct zcomp *zcomp_create(const char *compress, int max_strm) comp->backend = backend; if (max_strm > 1) - zcomp_strm_multi_create(comp, max_strm); + error = zcomp_strm_multi_create(comp, max_strm); else - zcomp_strm_single_create(comp); - if (!comp->stream) { + error = zcomp_strm_single_create(comp); + if (error) { kfree(comp); - return ERR_PTR(-ENOMEM); + return ERR_PTR(error); } return comp; } From 8d77a6d18ae9ccfd5eee1cc551ee4ac27fd41464 Mon Sep 17 00:00:00 2001 From: Xishi Qiu Date: Thu, 17 Sep 2015 16:01:43 -0700 Subject: [PATCH 84/96] kasan: fix last shadow judgement in memory_is_poisoned_16() The shadow which correspond 16 bytes memory may span 2 or 3 bytes. If the memory is aligned on 8, then the shadow takes only 2 bytes. So we check "shadow_first_bytes" is enough, and need not to call "memory_is_poisoned_1(addr + 15);". But the code "if (likely(!last_byte))" is wrong judgement. e.g. addr=0, so last_byte = 15 & KASAN_SHADOW_MASK = 7, then the code will continue to call "memory_is_poisoned_1(addr + 15);" Signed-off-by: Xishi Qiu Acked-by: Andrey Ryabinin Cc: Andrey Konovalov Cc: Rusty Russell Cc: Michal Marek Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/kasan.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 7b28e9cdf1c7..8da211411b57 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -135,12 +135,11 @@ static __always_inline bool memory_is_poisoned_16(unsigned long addr) if (unlikely(*shadow_addr)) { u16 shadow_first_bytes = *(u16 *)shadow_addr; - s8 last_byte = (addr + 15) & KASAN_SHADOW_MASK; if (unlikely(shadow_first_bytes)) return true; - if (likely(!last_byte)) + if (likely(IS_ALIGNED(addr, 8))) return false; return memory_is_poisoned_1(addr + 15); From 969560d2afca76823cf97ec4f5c0fb7833e18553 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Thu, 17 Sep 2015 16:01:46 -0700 Subject: [PATCH 85/96] alpha: io: define ioremap_uc ioremap_uc was not defined and as a result while building with allmodconfig were getting build error of: implicit declaration of function 'ioremap_uc'. Signed-off-by: Sudip Mukherjee Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/io.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index f05bdb4b1cb9..ff4049155c84 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -297,7 +297,9 @@ static inline void __iomem * ioremap_nocache(unsigned long offset, unsigned long size) { return ioremap(offset, size); -} +} + +#define ioremap_uc ioremap_nocache static inline void iounmap(volatile void __iomem *addr) { From 14b97deddf8ddecce9f35165b667c55c73e14638 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Thu, 17 Sep 2015 16:01:49 -0700 Subject: [PATCH 86/96] alpha: lib: export __delay __delay was not exported as a result while building with allmodconfig we were getting build error of undefined symbol. __delay is being used by: drivers/net/phy/mdio-octeon.c Signed-off-by: Sudip Mukherjee Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/lib/udelay.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/alpha/lib/udelay.c b/arch/alpha/lib/udelay.c index 69d52aa37bae..f2d81ff38aa6 100644 --- a/arch/alpha/lib/udelay.c +++ b/arch/alpha/lib/udelay.c @@ -30,6 +30,7 @@ __delay(int loops) " bgt %0,1b" : "=&r" (tmp), "=r" (loops) : "1"(loops)); } +EXPORT_SYMBOL(__delay); #ifdef CONFIG_SMP #define LPJ cpu_data[smp_processor_id()].loops_per_jiffy From 62bef58a55dfa8ada2a22b2496c6340468ecd98a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 17 Sep 2015 16:01:51 -0700 Subject: [PATCH 87/96] lib/string_helpers.c: fix infinite loop in string_get_size() Some string_get_size() calls (e.g.: string_get_size(1, 512, STRING_UNITS_10, ..., ...) string_get_size(15, 64, STRING_UNITS_10, ..., ...) ) result in an infinite loop. The problem is that if size is equal to divisor[units]/blk_size and is smaller than divisor[units] we'll end up with size == 0 when we start doing sf_cap calculations: For string_get_size(1, 512, STRING_UNITS_10, ..., ...) case: ... remainder = do_div(size, divisor[units]); -> size is 0, remainder is 1 remainder *= blk_size; -> remainder is 512 ... size *= blk_size; -> size is still 0 size += remainder / divisor[units]; -> size is still 0 The caller causing the issue is sd_read_capacity(), the problem was noticed on Hyper-V, such weird size was reported by host when scanning collides with device removal. This is probably a separate issue worth fixing, this patch is intended to prevent the library routine from infinite looping. Signed-off-by: Vitaly Kuznetsov Acked-by: James Bottomley Cc: Andy Shevchenko Cc: Rasmus Villemoes Cc: "K. Y. Srinivasan" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/string_helpers.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 54036ce2e2dd..5939f63d90cd 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -59,7 +59,11 @@ void string_get_size(u64 size, u64 blk_size, const enum string_size_units units, } exp = divisor[units] / (u32)blk_size; - if (size >= exp) { + /* + * size must be strictly greater than exp here to ensure that remainder + * is greater than divisor[units] coming out of the if below. + */ + if (size > exp) { remainder = do_div(size, divisor[units]); remainder *= blk_size; i++; From c03e946fdd653c4a23e242aca83da7e9838f5b00 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 17 Sep 2015 16:01:54 -0700 Subject: [PATCH 88/96] userfaultfd: add missing mmput() in error path This fixes a memleak if anon_inode_getfile() fails in userfaultfd(). Signed-off-by: Eric Biggers Signed-off-by: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 634e676072cb..f9aeb40a7197 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1287,8 +1287,10 @@ static struct file *userfaultfd_file_create(int flags) file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx, O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS)); - if (IS_ERR(file)) + if (IS_ERR(file)) { + mmput(ctx->mm); kmem_cache_free(userfaultfd_ctx_cachep, ctx); + } out: return file; } From 0526109a24eb07984f9e79852767300c8b8144de Mon Sep 17 00:00:00 2001 From: Cyril Hrubis Date: Thu, 17 Sep 2015 16:01:57 -0700 Subject: [PATCH 89/96] MAINTAINERS: update LTP mailing list [akpm@linux-foundation.org: Wanlong Gao has moved] Signed-off-by: Cyril Hrubis Cc: Jan Stancek Cc: Stanislav Kholmanskikh Cc: Alexey Kodanev Cc: Wanlong Gao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7ba7ab749c85..274f85405584 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6452,11 +6452,11 @@ F: drivers/hwmon/ltc4261.c LTP (Linux Test Project) M: Mike Frysinger M: Cyril Hrubis -M: Wanlong Gao +M: Wanlong Gao M: Jan Stancek M: Stanislav Kholmanskikh M: Alexey Kodanev -L: ltp-list@lists.sourceforge.net (subscribers-only) +L: ltp@lists.linux.it (subscribers-only) W: http://linux-test-project.github.io/ T: git git://github.com/linux-test-project/ltp.git S: Maintained From 28c553d0aa0acf02e18f9e008661491a4b996595 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 17 Sep 2015 16:02:00 -0700 Subject: [PATCH 90/96] revert "mm: make sure all file VMAs have ->vm_ops set" Revert commit 6dc296e7df4c "mm: make sure all file VMAs have ->vm_ops set". Will Deacon reports that it "causes some mmap regressions in LTP, which appears to use a MAP_PRIVATE mmap of /dev/zero as a way to get anonymous pages in some of its tests (specifically mmap10 [1])". William Shuman reports Oracle crashes. So revert the patch while we work out what to do. Reported-by: William Shuman Reported-by: Will Deacon Cc: Kirill A. Shutemov Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 971dd2cb77d2..c739d6db7193 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -612,8 +612,6 @@ static unsigned long count_vma_pages_range(struct mm_struct *mm, void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, struct rb_node **rb_link, struct rb_node *rb_parent) { - WARN_ONCE(vma->vm_file && !vma->vm_ops, "missing vma->vm_ops"); - /* Update tracking information for the gap following the new vma. */ if (vma->vm_next) vma_gap_update(vma->vm_next); @@ -1638,12 +1636,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr, */ WARN_ON_ONCE(addr != vma->vm_start); - /* All file mapping must have ->vm_ops set */ - if (!vma->vm_ops) { - static const struct vm_operations_struct dummy_ops = {}; - vma->vm_ops = &dummy_ops; - } - addr = vma->vm_start; vm_flags = vma->vm_flags; } else if (vm_flags & VM_SHARED) { From 5e176213a6b2bc5146820c79542d37290434a3c4 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 14 Sep 2015 14:47:02 -0700 Subject: [PATCH 91/96] perf/x86/intel: Make the CYCLE_ACTIVITY.* constraint on Broadwell more specific The counter constraint for CYCLE_ACTIVITY.* on Broadwell covered all CYCLE_ACTIVITY.* sub events, and forced them on counter 2. But actually only one sub event (umask 8) needs to be on counter 2, all others do not have any constraint. Only force that subevent. This fixes groups with multiple CYCLE_ACTIVITY.* events, for example: % perf stat -x, -e '{cpu/event=0xa3,umask=0x6,cmask=6/,\ cpu/event=0xa2,umask=0x8/,\ cpu/event=0xa3,umask=0x4,cmask=4/,cpu/event=0xb1,umask=0x1,cmask=1/}' true 122150,,cpu/event=0xa3,umask=0x6,cmask=6/,846486,100.00 16483,,cpu/event=0xa2,umask=0x8/,846486,100.00 252280,,cpu/event=0xa3,umask=0x4,cmask=4/,846486,100.00 233604,,cpu/event=0xb1,umask=0x1,cmask=1/,846486,100.00 % Without this patch the third result would be Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1442267222-16464-1-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 3fefebfbdf4b..1d84b41ba932 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -250,7 +250,7 @@ struct event_constraint intel_bdw_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ - INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */ + INTEL_UEVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */ EVENT_CONSTRAINT_END }; From d0dc8494cd6904f8ad035d9ad97f313948f35d0c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 9 Sep 2015 14:53:59 -0700 Subject: [PATCH 92/96] perf/x86/intel/pebs: Add PEBS frontend profiling for Skylake Skylake has a new FRONTEND_LATENCY PEBS event to accurately profile frontend problems (like ITLB or decoding issues). The new event is configured through a separate MSR, which selects a range of sub events. Define the extra MSR as a extra reg and export support for it through sysfs. To avoid duplicating the existing tables use a new function to add new entries to existing tables. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1435707205-6676-4-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr-index.h | 2 ++ arch/x86/kernel/cpu/perf_event.h | 1 + arch/x86/kernel/cpu/perf_event_intel.c | 11 ++++++++++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index c1c0a1c14344..54390bc140dd 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -141,6 +141,8 @@ #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) +#define MSR_PEBS_FRONTEND 0x000003f7 + #define MSR_IA32_POWER_CTL 0x000001fc #define MSR_IA32_MC0_CTL 0x00000400 diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 5edf6d868fc1..165be83a7fa4 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -47,6 +47,7 @@ enum extra_reg_type { EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ EXTRA_REG_LBR = 2, /* lbr_select */ EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ + EXTRA_REG_FE = 4, /* fe_* */ EXTRA_REG_MAX /* number of entries needed */ }; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 1d84b41ba932..ef74c9d05f82 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -205,6 +205,7 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = { INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x3fff17, FE), EVENT_EXTRA_END }; @@ -2891,6 +2892,8 @@ PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); PMU_FORMAT_ATTR(ldlat, "config1:0-15"); +PMU_FORMAT_ATTR(frontend, "config1:0-23"); + static struct attribute *intel_arch3_formats_attr[] = { &format_attr_event.attr, &format_attr_umask.attr, @@ -2907,6 +2910,11 @@ static struct attribute *intel_arch3_formats_attr[] = { NULL, }; +static struct attribute *skl_format_attr[] = { + &format_attr_frontend.attr, + NULL, +}; + static __initconst const struct x86_pmu core_pmu = { .name = "core", .handle_irq = x86_pmu_handle_irq, @@ -3516,7 +3524,8 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; - x86_pmu.cpu_events = hsw_events_attrs; + x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr, + skl_format_attr); WARN_ON(!x86_pmu.format_attrs); x86_pmu.cpu_events = hsw_events_attrs; pr_cont("Skylake events, "); From dfe1f3cb312624928052413928d88b0ee3492216 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 9 Sep 2015 14:54:00 -0700 Subject: [PATCH 93/96] perf/x86/intel: Fix Skylake FRONTEND MSR extrareg mask Stephane pointed out that the extrareg mask was one bit too short. The bubble width field was truncated by one bit. Fix that here. Also add some extra comments on the reserved bits inside the event select code. Reported-by: Stephane Eranian Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1441835640-21347-3-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ef74c9d05f82..f63360be2238 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -205,7 +205,11 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = { INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), - INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x3fff17, FE), + /* + * Note the low 8 bits eventsel code is not a continuous field, containing + * some #GPing bits. These are masked out. + */ + INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), EVENT_EXTRA_END }; From f55fc2a57cc9ca3b1bb4fb8eb25b6e1989e5b993 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 9 Sep 2015 19:06:33 +0200 Subject: [PATCH 94/96] perf: Restructure perf syscall point of no return The exclusive_event_installable() stuff only works because its exclusive with the grouping bits. Rework the code such that there is a sane place to error out before we go do things we cannot undo. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 49 +++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index f548f69c4299..39679f749500 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8297,13 +8297,30 @@ SYSCALL_DEFINE5(perf_event_open, if (move_group) { gctx = group_leader->ctx; + mutex_lock_double(&gctx->mutex, &ctx->mutex); + } else { + mutex_lock(&ctx->mutex); + } + /* + * Must be under the same ctx::mutex as perf_install_in_context(), + * because we need to serialize with concurrent event creation. + */ + if (!exclusive_event_installable(event, ctx)) { + /* exclusive and group stuff are assumed mutually exclusive */ + WARN_ON_ONCE(move_group); + + err = -EBUSY; + goto err_locked; + } + + WARN_ON_ONCE(ctx->parent_ctx); + + if (move_group) { /* * See perf_event_ctx_lock() for comments on the details * of swizzling perf_event::ctx. */ - mutex_lock_double(&gctx->mutex, &ctx->mutex); - perf_remove_from_context(group_leader, false); list_for_each_entry(sibling, &group_leader->sibling_list, @@ -8311,13 +8328,7 @@ SYSCALL_DEFINE5(perf_event_open, perf_remove_from_context(sibling, false); put_ctx(gctx); } - } else { - mutex_lock(&ctx->mutex); - } - WARN_ON_ONCE(ctx->parent_ctx); - - if (move_group) { /* * Wait for everybody to stop referencing the events through * the old lists, before installing it on new lists. @@ -8349,22 +8360,20 @@ SYSCALL_DEFINE5(perf_event_open, perf_event__state_init(group_leader); perf_install_in_context(ctx, group_leader, group_leader->cpu); get_ctx(ctx); - } - if (!exclusive_event_installable(event, ctx)) { - err = -EBUSY; - mutex_unlock(&ctx->mutex); - fput(event_file); - goto err_context; + /* + * Now that all events are installed in @ctx, nothing + * references @gctx anymore, so drop the last reference we have + * on it. + */ + put_ctx(gctx); } perf_install_in_context(ctx, event, event->cpu); perf_unpin_context(ctx); - if (move_group) { + if (move_group) mutex_unlock(&gctx->mutex); - put_ctx(gctx); - } mutex_unlock(&ctx->mutex); put_online_cpus(); @@ -8391,6 +8400,12 @@ SYSCALL_DEFINE5(perf_event_open, fd_install(event_fd, event_file); return event_fd; +err_locked: + if (move_group) + mutex_unlock(&gctx->mutex); + mutex_unlock(&ctx->mutex); +/* err_file: */ + fput(event_file); err_context: perf_unpin_context(ctx); put_ctx(ctx); From a723968c0ed36db676478c3d26078f13484fe01c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 9 Sep 2015 19:06:33 +0200 Subject: [PATCH 95/96] perf: Fix u16 overflows Vince reported that its possible to overflow the various size fields and get weird stuff if you stick too many events in a group. Put a lid on this by requiring the fixed record size not exceed 16k. This is still a fair amount of events (silly amount really) and leaves plenty room for callchains and stack dwarves while also avoiding overflowing the u16 variables. Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 50 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 39679f749500..dbb5329b6a3a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1243,11 +1243,7 @@ static inline void perf_event__state_init(struct perf_event *event) PERF_EVENT_STATE_INACTIVE; } -/* - * Called at perf_event creation and when events are attached/detached from a - * group. - */ -static void perf_event__read_size(struct perf_event *event) +static void __perf_event_read_size(struct perf_event *event, int nr_siblings) { int entry = sizeof(u64); /* value */ int size = 0; @@ -1263,7 +1259,7 @@ static void perf_event__read_size(struct perf_event *event) entry += sizeof(u64); if (event->attr.read_format & PERF_FORMAT_GROUP) { - nr += event->group_leader->nr_siblings; + nr += nr_siblings; size += sizeof(u64); } @@ -1271,14 +1267,11 @@ static void perf_event__read_size(struct perf_event *event) event->read_size = size; } -static void perf_event__header_size(struct perf_event *event) +static void __perf_event_header_size(struct perf_event *event, u64 sample_type) { struct perf_sample_data *data; - u64 sample_type = event->attr.sample_type; u16 size = 0; - perf_event__read_size(event); - if (sample_type & PERF_SAMPLE_IP) size += sizeof(data->ip); @@ -1303,6 +1296,17 @@ static void perf_event__header_size(struct perf_event *event) event->header_size = size; } +/* + * Called at perf_event creation and when events are attached/detached from a + * group. + */ +static void perf_event__header_size(struct perf_event *event) +{ + __perf_event_read_size(event, + event->group_leader->nr_siblings); + __perf_event_header_size(event, event->attr.sample_type); +} + static void perf_event__id_header_size(struct perf_event *event) { struct perf_sample_data *data; @@ -1330,6 +1334,27 @@ static void perf_event__id_header_size(struct perf_event *event) event->id_header_size = size; } +static bool perf_event_validate_size(struct perf_event *event) +{ + /* + * The values computed here will be over-written when we actually + * attach the event. + */ + __perf_event_read_size(event, event->group_leader->nr_siblings + 1); + __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ); + perf_event__id_header_size(event); + + /* + * Sum the lot; should not exceed the 64k limit we have on records. + * Conservative limit to allow for callchains and other variable fields. + */ + if (event->read_size + event->header_size + + event->id_header_size + sizeof(struct perf_event_header) >= 16*1024) + return false; + + return true; +} + static void perf_group_attach(struct perf_event *event) { struct perf_event *group_leader = event->group_leader, *pos; @@ -8302,6 +8327,11 @@ SYSCALL_DEFINE5(perf_event_open, mutex_lock(&ctx->mutex); } + if (!perf_event_validate_size(event)) { + err = -E2BIG; + goto err_locked; + } + /* * Must be under the same ctx::mutex as perf_install_in_context(), * because we need to serialize with concurrent event creation. From f73e22ab450140830005581c2c7ec389791a1b8d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 9 Sep 2015 20:48:22 +0200 Subject: [PATCH 96/96] perf: Fix races in computing the header sizes There are two races with the current code: - Another event can join the group and compute a larger header_size concurrently, if the smaller store wins we'll have an incorrect header_size set. - We compute the header_size after the event becomes active, therefore its possible to use the size before its computed. Remedy the first by moving the computation inside the ctx::mutex lock, and the second by placing it _before_ perf_install_in_context(). Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index dbb5329b6a3a..b11756f9b6dc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8399,6 +8399,15 @@ SYSCALL_DEFINE5(perf_event_open, put_ctx(gctx); } + /* + * Precalculate sample_data sizes; do while holding ctx::mutex such + * that we're serialized against further additions and before + * perf_install_in_context() which is the point the event is active and + * can use these values. + */ + perf_event__header_size(event); + perf_event__id_header_size(event); + perf_install_in_context(ctx, event, event->cpu); perf_unpin_context(ctx); @@ -8414,12 +8423,6 @@ SYSCALL_DEFINE5(perf_event_open, list_add_tail(&event->owner_entry, ¤t->perf_event_list); mutex_unlock(¤t->perf_event_mutex); - /* - * Precalculate sample_data sizes - */ - perf_event__header_size(event); - perf_event__id_header_size(event); - /* * Drop the reference on the group_event after placing the * new event on the sibling_list. This ensures destruction