From 493c65aab4cfbdec1b065d409fd8b2c9b907d8ec Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Mon, 2 Dec 2013 11:47:16 +0000 Subject: [PATCH 01/80] ARM: vexpress/TC2: Match mainline cache disabling sequence in tc2_pm_down When the TC2 pm code was finally upstreamed [1] the cache disbling sequence had been modified to avoid some potential race conditions. So lets backport these changes. [1] Commit 11b277eabe70 ARM: vexpress/TC2: basic PM support Signed-off-by: Jon Medhurst --- arch/arm/mach-vexpress/tc2_pm.c | 64 +++++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 15 deletions(-) diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c index 2b519eee84d8..b4b090ccc61f 100644 --- a/arch/arm/mach-vexpress/tc2_pm.c +++ b/arch/arm/mach-vexpress/tc2_pm.c @@ -135,20 +135,40 @@ static void tc2_pm_down(u64 residency) if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { arch_spin_unlock(&tc2_pm_lock); - set_cr(get_cr() & ~CR_C); - flush_cache_all(); - asm volatile ("clrex"); - set_auxcr(get_auxcr() & ~(1 << 6)); - - cci_disable_port_by_cpu(mpidr); + if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) { + /* + * On the Cortex-A15 we need to disable + * L2 prefetching before flushing the cache. + */ + asm volatile( + "mcr p15, 1, %0, c15, c0, 3 \n\t" + "isb \n\t" + "dsb " + : : "r" (0x400) ); + } /* - * Ensure that both C & I bits are disabled in the SCTLR - * before disabling ACE snoops. This ensures that no - * coherency traffic will originate from this cpu after - * ACE snoops are turned off. + * We need to disable and flush the whole (L1 and L2) cache. + * Let's do it in the safest possible way i.e. with + * no memory access within the following sequence + * including the stack. */ - cpu_proc_fin(); + asm volatile( + "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" + "bic r0, r0, #"__stringify(CR_C)" \n\t" + "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" + "isb \n\t" + "bl v7_flush_dcache_all \n\t" + "clrex \n\t" + "mrc p15, 0, r0, c1, c0, 1 @ get AUXCR \n\t" + "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" + "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" + "isb \n\t" + "dsb " + : : : "r0","r1","r2","r3","r4","r5","r6","r7", + "r9","r10","r11","lr","memory"); + + cci_disable_port_by_cpu(mpidr); __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN); } else { @@ -162,10 +182,24 @@ static void tc2_pm_down(u64 residency) arch_spin_unlock(&tc2_pm_lock); - set_cr(get_cr() & ~CR_C); - flush_cache_louis(); - asm volatile ("clrex"); - set_auxcr(get_auxcr() & ~(1 << 6)); + /* + * We need to disable and flush only the L1 cache. + * Let's do it in the safest possible way as above. + */ + asm volatile( + "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" + "bic r0, r0, #"__stringify(CR_C)" \n\t" + "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" + "isb \n\t" + "bl v7_flush_dcache_louis \n\t" + "clrex \n\t" + "mrc p15, 0, r0, c1, c0, 1 @ get AUXCR \n\t" + "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" + "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" + "isb \n\t" + "dsb " + : : : "r0","r1","r2","r3","r4","r5","r6","r7", + "r9","r10","r11","lr","memory"); } __mcpm_cpu_down(cpu, cluster); From 4c35ee5ab39fb4ab30662edbfc7b6ea5c88943f2 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 16 Jul 2013 20:59:53 -0400 Subject: [PATCH 02/80] ARM: vexpress/dcscb: fix cache disabling sequences Commit e8f9bb1bd6bb93fff773345cc54c42585e0e3ece upstream Unlike real A15/A7's, the RTSM simulation doesn't appear to hit the cache when the CTRL.C bit is cleared. Let's ensure there is no memory access within the disable and flush cache sequence, including to the stack. Signed-off-by: Nicolas Pitre Signed-off-by: Jon Medhurst --- arch/arm/mach-vexpress/dcscb.c | 58 ++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c index 948aec0890f5..df593951cbf4 100644 --- a/arch/arm/mach-vexpress/dcscb.c +++ b/arch/arm/mach-vexpress/dcscb.c @@ -137,14 +137,29 @@ static void dcscb_power_down(void) /* * Flush all cache levels for this cluster. * - * A15/A7 can hit in the cache with SCTLR.C=0, so we don't need - * a preliminary flush here for those CPUs. At least, that's - * the theory -- without the extra flush, Linux explodes on - * RTSM (to be investigated). + * To do so we do: + * - Clear the SCTLR.C bit to prevent further cache allocations + * - Flush the whole cache + * - Clear the ACTLR "SMP" bit to disable local coherency + * + * Let's do it in the safest possible way i.e. with + * no memory access within the following sequence + * including to the stack. */ - flush_cache_all(); - set_cr(get_cr() & ~CR_C); - flush_cache_all(); + asm volatile( + "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" + "bic r0, r0, #"__stringify(CR_C)" \n\t" + "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" + "isb \n\t" + "bl v7_flush_dcache_all \n\t" + "clrex \n\t" + "mrc p15, 0, r0, c1, c0, 1 @ get AUXCR \n\t" + "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" + "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" + "isb \n\t" + "dsb " + : : : "r0","r1","r2","r3","r4","r5","r6","r7", + "r9","r10","r11","lr","memory"); /* * This is a harmless no-op. On platforms with a real @@ -153,9 +168,6 @@ static void dcscb_power_down(void) */ outer_flush_all(); - /* Disable local coherency by clearing the ACTLR "SMP" bit: */ - set_auxcr(get_auxcr() & ~(1 << 6)); - /* * Disable cluster-level coherency by masking * incoming snoops and DVM messages: @@ -168,18 +180,22 @@ static void dcscb_power_down(void) /* * Flush the local CPU cache. - * - * A15/A7 can hit in the cache with SCTLR.C=0, so we don't need - * a preliminary flush here for those CPUs. At least, that's - * the theory -- without the extra flush, Linux explodes on - * RTSM (to be investigated). + * Let's do it in the safest possible way as above. */ - flush_cache_louis(); - set_cr(get_cr() & ~CR_C); - flush_cache_louis(); - - /* Disable local coherency by clearing the ACTLR "SMP" bit: */ - set_auxcr(get_auxcr() & ~(1 << 6)); + asm volatile( + "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" + "bic r0, r0, #"__stringify(CR_C)" \n\t" + "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" + "isb \n\t" + "bl v7_flush_dcache_louis \n\t" + "clrex \n\t" + "mrc p15, 0, r0, c1, c0, 1 @ get AUXCR \n\t" + "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" + "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" + "isb \n\t" + "dsb " + : : : "r0","r1","r2","r3","r4","r5","r6","r7", + "r9","r10","r11","lr","memory"); } __mcpm_cpu_down(cpu, cluster); From 9902091e1e4aa3ff94e525c2a9ed3632698cd28f Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 14 Aug 2013 10:25:14 -0400 Subject: [PATCH 03/80] ARM: vexpress/MCPM: fix cache disable sequence when CONFIG_FRAME_POINTER=y Commit fac2e57742d9aa3dbe41860280352efda9d5566e upstream If CONFIG_FRAME_POINTER=y we get the following error: arch/arm/mach-vexpress/tc2_pm.c: In function 'tc2_pm_down': arch/arm/mach-vexpress/tc2_pm.c:200:1: error: fp cannot be used in asm here Let's fix that by explicitly preserving r11 on the stack and removing it from the clobber list. Reported-by: Russell King Reviewed-by: Dave Martin Signed-off-by: Nicolas Pitre Signed-off-by: Olof Johansson Signed-off-by: Jon Medhurst --- arch/arm/mach-vexpress/dcscb.c | 16 ++++++++++++---- arch/arm/mach-vexpress/tc2_pm.c | 16 ++++++++++++---- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c index df593951cbf4..19a7d08102be 100644 --- a/arch/arm/mach-vexpress/dcscb.c +++ b/arch/arm/mach-vexpress/dcscb.c @@ -145,8 +145,13 @@ static void dcscb_power_down(void) * Let's do it in the safest possible way i.e. with * no memory access within the following sequence * including to the stack. + * + * Note: fp is preserved to the stack explicitly prior doing + * this since adding it to the clobber list is incompatible + * with having CONFIG_FRAME_POINTER=y. */ asm volatile( + "str fp, [sp, #-4]! \n\t" "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" "bic r0, r0, #"__stringify(CR_C)" \n\t" "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" @@ -157,9 +162,10 @@ static void dcscb_power_down(void) "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" "isb \n\t" - "dsb " + "dsb \n\t" + "ldr fp, [sp], #4" : : : "r0","r1","r2","r3","r4","r5","r6","r7", - "r9","r10","r11","lr","memory"); + "r9","r10","lr","memory"); /* * This is a harmless no-op. On platforms with a real @@ -183,6 +189,7 @@ static void dcscb_power_down(void) * Let's do it in the safest possible way as above. */ asm volatile( + "str fp, [sp, #-4]! \n\t" "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" "bic r0, r0, #"__stringify(CR_C)" \n\t" "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" @@ -193,9 +200,10 @@ static void dcscb_power_down(void) "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" "isb \n\t" - "dsb " + "dsb \n\t" + "ldr fp, [sp], #4" : : : "r0","r1","r2","r3","r4","r5","r6","r7", - "r9","r10","r11","lr","memory"); + "r9","r10","lr","memory"); } __mcpm_cpu_down(cpu, cluster); diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c index b4b090ccc61f..9221903af53c 100644 --- a/arch/arm/mach-vexpress/tc2_pm.c +++ b/arch/arm/mach-vexpress/tc2_pm.c @@ -152,8 +152,13 @@ static void tc2_pm_down(u64 residency) * Let's do it in the safest possible way i.e. with * no memory access within the following sequence * including the stack. + * + * Note: fp is preserved to the stack explicitly prior doing + * this since adding it to the clobber list is incompatible + * with having CONFIG_FRAME_POINTER=y. */ asm volatile( + "str fp, [sp, #-4]! \n\t" "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" "bic r0, r0, #"__stringify(CR_C)" \n\t" "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" @@ -164,9 +169,10 @@ static void tc2_pm_down(u64 residency) "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" "isb \n\t" - "dsb " + "dsb \n\t" + "ldr fp, [sp], #4" : : : "r0","r1","r2","r3","r4","r5","r6","r7", - "r9","r10","r11","lr","memory"); + "r9","r10","lr","memory"); cci_disable_port_by_cpu(mpidr); @@ -187,6 +193,7 @@ static void tc2_pm_down(u64 residency) * Let's do it in the safest possible way as above. */ asm volatile( + "str fp, [sp, #-4]! \n\t" "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" "bic r0, r0, #"__stringify(CR_C)" \n\t" "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" @@ -197,9 +204,10 @@ static void tc2_pm_down(u64 residency) "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" "isb \n\t" - "dsb " + "dsb \n\t" + "ldr fp, [sp], #4" : : : "r0","r1","r2","r3","r4","r5","r6","r7", - "r9","r10","r11","lr","memory"); + "r9","r10","lr","memory"); } __mcpm_cpu_down(cpu, cluster); From 27d55bacd5002422f8dab3ee6f941a2e23eb38c5 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 18 Oct 2013 22:06:03 +0100 Subject: [PATCH 04/80] ARM: 7861/1: cacheflush: consolidate single-CPU ARMv7 cache disabling code Commit 39792c7cf3111d69dc4aa0923859d8b929e9039f upstream This code is becoming duplicated in many places. So let's consolidate it into a handy macro that is known to be right and available for reuse. Signed-off-by: Nicolas Pitre Acked-by: Dave Martin Signed-off-by: Russell King Signed-off-by: Jon Medhurst --- arch/arm/include/asm/cacheflush.h | 46 +++++++++++++++++++++++++ arch/arm/mach-vexpress/dcscb.c | 56 +++---------------------------- arch/arm/mach-vexpress/tc2_pm.c | 48 ++------------------------ 3 files changed, 52 insertions(+), 98 deletions(-) diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 17d0ae8672fa..beb870221783 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -436,4 +436,50 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size) #define sync_cache_w(ptr) __sync_cache_range_w(ptr, sizeof *(ptr)) #define sync_cache_r(ptr) __sync_cache_range_r(ptr, sizeof *(ptr)) +/* + * Disabling cache access for one CPU in an ARMv7 SMP system is tricky. + * To do so we must: + * + * - Clear the SCTLR.C bit to prevent further cache allocations + * - Flush the desired level of cache + * - Clear the ACTLR "SMP" bit to disable local coherency + * + * ... and so without any intervening memory access in between those steps, + * not even to the stack. + * + * WARNING -- After this has been called: + * + * - No ldrex/strex (and similar) instructions must be used. + * - The CPU is obviously no longer coherent with the other CPUs. + * - This is unlikely to work as expected if Linux is running non-secure. + * + * Note: + * + * - This is known to apply to several ARMv7 processor implementations, + * however some exceptions may exist. Caveat emptor. + * + * - The clobber list is dictated by the call to v7_flush_dcache_*. + * fp is preserved to the stack explicitly prior disabling the cache + * since adding it to the clobber list is incompatible with having + * CONFIG_FRAME_POINTER=y. ip is saved as well if ever r12-clobbering + * trampoline are inserted by the linker and to keep sp 64-bit aligned. + */ +#define v7_exit_coherency_flush(level) \ + asm volatile( \ + "stmfd sp!, {fp, ip} \n\t" \ + "mrc p15, 0, r0, c1, c0, 0 @ get SCTLR \n\t" \ + "bic r0, r0, #"__stringify(CR_C)" \n\t" \ + "mcr p15, 0, r0, c1, c0, 0 @ set SCTLR \n\t" \ + "isb \n\t" \ + "bl v7_flush_dcache_"__stringify(level)" \n\t" \ + "clrex \n\t" \ + "mrc p15, 0, r0, c1, c0, 1 @ get ACTLR \n\t" \ + "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" \ + "mcr p15, 0, r0, c1, c0, 1 @ set ACTLR \n\t" \ + "isb \n\t" \ + "dsb \n\t" \ + "ldmfd sp!, {fp, ip}" \ + : : : "r0","r1","r2","r3","r4","r5","r6","r7", \ + "r9","r10","lr","memory" ) + #endif diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c index 19a7d08102be..b35700f8e01f 100644 --- a/arch/arm/mach-vexpress/dcscb.c +++ b/arch/arm/mach-vexpress/dcscb.c @@ -134,38 +134,8 @@ static void dcscb_power_down(void) if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { arch_spin_unlock(&dcscb_lock); - /* - * Flush all cache levels for this cluster. - * - * To do so we do: - * - Clear the SCTLR.C bit to prevent further cache allocations - * - Flush the whole cache - * - Clear the ACTLR "SMP" bit to disable local coherency - * - * Let's do it in the safest possible way i.e. with - * no memory access within the following sequence - * including to the stack. - * - * Note: fp is preserved to the stack explicitly prior doing - * this since adding it to the clobber list is incompatible - * with having CONFIG_FRAME_POINTER=y. - */ - asm volatile( - "str fp, [sp, #-4]! \n\t" - "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" - "bic r0, r0, #"__stringify(CR_C)" \n\t" - "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" - "isb \n\t" - "bl v7_flush_dcache_all \n\t" - "clrex \n\t" - "mrc p15, 0, r0, c1, c0, 1 @ get AUXCR \n\t" - "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" - "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" - "isb \n\t" - "dsb \n\t" - "ldr fp, [sp], #4" - : : : "r0","r1","r2","r3","r4","r5","r6","r7", - "r9","r10","lr","memory"); + /* Flush all cache levels for this cluster. */ + v7_exit_coherency_flush(all); /* * This is a harmless no-op. On platforms with a real @@ -184,26 +154,8 @@ static void dcscb_power_down(void) } else { arch_spin_unlock(&dcscb_lock); - /* - * Flush the local CPU cache. - * Let's do it in the safest possible way as above. - */ - asm volatile( - "str fp, [sp, #-4]! \n\t" - "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" - "bic r0, r0, #"__stringify(CR_C)" \n\t" - "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" - "isb \n\t" - "bl v7_flush_dcache_louis \n\t" - "clrex \n\t" - "mrc p15, 0, r0, c1, c0, 1 @ get AUXCR \n\t" - "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" - "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" - "isb \n\t" - "dsb \n\t" - "ldr fp, [sp], #4" - : : : "r0","r1","r2","r3","r4","r5","r6","r7", - "r9","r10","lr","memory"); + /* Disable and flush the local CPU cache. */ + v7_exit_coherency_flush(louis); } __mcpm_cpu_down(cpu, cluster); diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c index 9221903af53c..9fc264a3bade 100644 --- a/arch/arm/mach-vexpress/tc2_pm.c +++ b/arch/arm/mach-vexpress/tc2_pm.c @@ -147,32 +147,7 @@ static void tc2_pm_down(u64 residency) : : "r" (0x400) ); } - /* - * We need to disable and flush the whole (L1 and L2) cache. - * Let's do it in the safest possible way i.e. with - * no memory access within the following sequence - * including the stack. - * - * Note: fp is preserved to the stack explicitly prior doing - * this since adding it to the clobber list is incompatible - * with having CONFIG_FRAME_POINTER=y. - */ - asm volatile( - "str fp, [sp, #-4]! \n\t" - "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" - "bic r0, r0, #"__stringify(CR_C)" \n\t" - "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" - "isb \n\t" - "bl v7_flush_dcache_all \n\t" - "clrex \n\t" - "mrc p15, 0, r0, c1, c0, 1 @ get AUXCR \n\t" - "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" - "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" - "isb \n\t" - "dsb \n\t" - "ldr fp, [sp], #4" - : : : "r0","r1","r2","r3","r4","r5","r6","r7", - "r9","r10","lr","memory"); + v7_exit_coherency_flush(all); cci_disable_port_by_cpu(mpidr); @@ -188,26 +163,7 @@ static void tc2_pm_down(u64 residency) arch_spin_unlock(&tc2_pm_lock); - /* - * We need to disable and flush only the L1 cache. - * Let's do it in the safest possible way as above. - */ - asm volatile( - "str fp, [sp, #-4]! \n\t" - "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" - "bic r0, r0, #"__stringify(CR_C)" \n\t" - "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" - "isb \n\t" - "bl v7_flush_dcache_louis \n\t" - "clrex \n\t" - "mrc p15, 0, r0, c1, c0, 1 @ get AUXCR \n\t" - "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" - "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" - "isb \n\t" - "dsb \n\t" - "ldr fp, [sp], #4" - : : : "r0","r1","r2","r3","r4","r5","r6","r7", - "r9","r10","lr","memory"); + v7_exit_coherency_flush(louis); } __mcpm_cpu_down(cpu, cluster); From c04e0d85bf2565725091fd652c0b444b993fd993 Mon Sep 17 00:00:00 2001 From: Mark Hambleton Date: Mon, 2 Dec 2013 18:10:44 +0000 Subject: [PATCH 05/80] ARM64: cmpxchg: update macros to prevent warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure the value we are going to return is referenced in order to avoid warnings from newer GCCs such as: arch/arm64/include/asm/cmpxchg.h:162:3: warning: value computed is not used [-Wunused-value] ((__typeof__(*(ptr)))__cmpxchg_mb((ptr), \ ^ net/netfilter/nf_conntrack_core.c:674:2: note: in expansion of macro ‘cmpxchg’ cmpxchg(&nf_conntrack_hash_rnd, 0, rand); Signed-off-by: Mark Hambleton Signed-off-by: Mark Brown --- arch/arm64/include/asm/cmpxchg.h | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 8a8ce0e73a38..8551fbf5d46d 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -158,17 +158,23 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, return ret; } -#define cmpxchg(ptr,o,n) \ - ((__typeof__(*(ptr)))__cmpxchg_mb((ptr), \ - (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) +#define cmpxchg(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __cmpxchg((ptr), (unsigned long)(o), (unsigned long)(n), \ + sizeof(*(ptr))); \ + __ret; \ +}) -#define cmpxchg_local(ptr,o,n) \ - ((__typeof__(*(ptr)))__cmpxchg((ptr), \ - (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) +#define cmpxchg_local(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __cmpxchg_local((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr))); \ + __ret; \ +}) #define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n)) #define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n)) From 682a3d0201261152f489138d5f034742bccbfe9a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 4 Dec 2013 18:23:13 -0600 Subject: [PATCH 06/80] cpufreq: enable ARM drivers on arm64 Enable cpufreq and power kconfig menus on arm64 along with arm cpufreq drivers. The power menu is needed for OPP support. At least on Calxeda systems, the same cpufreq driver is used for arm and arm64 based systems. [Submitted upstream by Rob, still in review at the current time] Signed-off-by: Rob Herring Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 8 ++++++++ drivers/cpufreq/Kconfig | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 56b3f6d447ae..c2b4a765ff4a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -229,6 +229,14 @@ config SYSVIPC_COMPAT endmenu +menu "Power management options" + +source "kernel/power/Kconfig" + +source "drivers/cpufreq/Kconfig" + +endmenu + source "net/Kconfig" source "drivers/Kconfig" diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 534fcb825153..a9c1324843eb 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -201,7 +201,7 @@ source "drivers/cpufreq/Kconfig.x86" endmenu menu "ARM CPU frequency scaling drivers" -depends on ARM +depends on ARM || ARM64 source "drivers/cpufreq/Kconfig.arm" endmenu From d2f31cbcd06833bf301a112efd91ee7232dbe2be Mon Sep 17 00:00:00 2001 From: Mark Hambleton Date: Mon, 2 Dec 2013 14:45:37 +0000 Subject: [PATCH 07/80] arm64: dts: Add a virtio disk to the RTSM motherboard Describe the virtio device so we can mount disk images in the simulator. [Reduced the size of the region based on feedback from review -- broonie] Signed-off-by: Mark Hambleton Signed-off-by: Mark Brown Acked-by: Will Deacon --- arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi index b45e5f39f577..2f2ecd217363 100644 --- a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi +++ b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi @@ -183,6 +183,12 @@ clcd@1f0000 { clocks = <&v2m_oscclk1>, <&v2m_clk24mhz>; clock-names = "clcdclk", "apb_pclk"; }; + + virtio_block@0130000 { + compatible = "virtio,mmio"; + reg = <0x130000 0x200>; + interrupts = <42>; + }; }; v2m_fixed_3v3: fixedregulator@0 { From 023353c890a0a27fe05b7d8bcfc1fec37ec06cb1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 2 Dec 2013 14:26:13 +0000 Subject: [PATCH 08/80] arm64: dts: Add a devicetree for the ARMv8 4xA53 4xA57 FVP Add a dts file for ARMv8 fixed virtual platform which models a big.LITTLE configuration for the architecture. This is based on the DT shipped by ARM but has been modified for mainline, removing features not present in mainline and updating the CPU bindings for mainline. Signed-off-by: Mark Brown --- arch/arm64/boot/dts/Makefile | 3 +- arch/arm64/boot/dts/fvp-base-gicv2-psci.dts | 233 ++++++++++++++++++++ 2 files changed, 235 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/boot/dts/fvp-base-gicv2-psci.dts diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile index 68457e9e0975..24be4296c14c 100644 --- a/arch/arm64/boot/dts/Makefile +++ b/arch/arm64/boot/dts/Makefile @@ -1,4 +1,5 @@ -dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb +dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb \ + fvp-base-gicv2-psci.dtb targets += dtbs targets += $(dtb-y) diff --git a/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts new file mode 100644 index 000000000000..736f546123bb --- /dev/null +++ b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2013, ARM Limited. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of ARM nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/dts-v1/; + +/memreserve/ 0x80000000 0x00010000; + +/ { +}; + +/ { + model = "FVP Base"; + compatible = "arm,vfp-base", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + psci { + compatible = "arm,psci"; + method = "smc"; + cpu_suspend = <0xc4000001>; + cpu_off = <0x84000002>; + cpu_on = <0xc4000003>; + }; + + cpus { + #address-cells = <2>; + #size-cells = <0>; + + big0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a57", "arm,armv8"; + reg = <0x0 0x0>; + enable-method = "psci"; + clock-frequency = <1000000>; + }; + big1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a57", "arm,armv8"; + reg = <0x0 0x1>; + enable-method = "psci"; + clock-frequency = <1000000>; + }; + big2: cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a57", "arm,armv8"; + reg = <0x0 0x2>; + enable-method = "psci"; + clock-frequency = <1000000>; + }; + big3: cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a57", "arm,armv8"; + reg = <0x0 0x3>; + enable-method = "psci"; + clock-frequency = <1000000>; + }; + little0: cpu@100 { + device_type = "cpu"; + compatible = "arm,cortex-a53", "arm,armv8"; + reg = <0x0 0x100>; + enable-method = "psci"; + clock-frequency = <1000000>; + }; + little1: cpu@101 { + device_type = "cpu"; + compatible = "arm,cortex-a53", "arm,armv8"; + reg = <0x0 0x101>; + enable-method = "psci"; + clock-frequency = <1000000>; + }; + little2: cpu@102 { + device_type = "cpu"; + compatible = "arm,cortex-a53", "arm,armv8"; + reg = <0x0 0x102>; + enable-method = "psci"; + clock-frequency = <1000000>; + }; + little3: cpu@103 { + device_type = "cpu"; + compatible = "arm,cortex-a53", "arm,armv8"; + reg = <0x0 0x103>; + enable-method = "psci"; + clock-frequency = <1000000>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0x00000000 0x80000000 0 0x80000000>, + <0x00000008 0x80000000 0 0x80000000>; + }; + + gic: interrupt-controller@2f000000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0x0 0x2f000000 0 0x10000>, + <0x0 0x2c000000 0 0x2000>, + <0x0 0x2c010000 0 0x2000>, + <0x0 0x2c02F000 0 0x2000>; + interrupts = <1 9 0xf04>; + }; + + timer { + compatible = "arm,armv8-timer"; + interrupts = <1 13 0xff01>, + <1 14 0xff01>, + <1 11 0xff01>, + <1 10 0xff01>; + clock-frequency = <100000000>; + }; + + timer@2a810000 { + compatible = "arm,armv7-timer-mem"; + reg = <0x0 0x2a810000 0x0 0x10000>; + clock-frequency = <100000000>; + #address-cells = <2>; + #size-cells = <2>; + ranges; + frame@2a820000 { + frame-number = <0>; + interrupts = <0 25 4>; + reg = <0x0 0x2a820000 0x0 0x10000>; + }; + }; + + pmu { + compatible = "arm,armv8-pmuv3"; + interrupts = <0 60 4>, + <0 61 4>, + <0 62 4>, + <0 63 4>; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; From d09f1f75c5e8f3b930cc3803af50c694eb903849 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 11 Dec 2013 12:10:15 +0000 Subject: [PATCH 09/80] arm64: dts: Add CPU topology properties for ARMv8 4xA53 4xA57 FVP This provides for discovery of the clusters using the CPU topology bindings, it is added as a separate commit to aid testing of the code for systems without explicit topology bindings. Signed-off-by: Mark Brown --- arch/arm64/boot/dts/fvp-base-gicv2-psci.dts | 31 +++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts index 736f546123bb..452ba22f3c6e 100644 --- a/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts +++ b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts @@ -119,6 +119,37 @@ little3: cpu@103 { enable-method = "psci"; clock-frequency = <1000000>; }; + + cpu-map { + cluster0 { + core0 { + cpu = <&big0>; + }; + core1 { + cpu = <&big1>; + }; + core2 { + cpu = <&big2>; + }; + core3 { + cpu = <&big3>; + }; + }; + cluster1 { + core0 { + cpu = <&little0>; + }; + core1 { + cpu = <&little1>; + }; + core2 { + cpu = <&little2>; + }; + core3 { + cpu = <&little3>; + }; + }; + }; }; memory@80000000 { From 94eb302681ca87d0078ffb88dfc6d0e4a9b1141d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 11 Dec 2013 12:49:15 +0000 Subject: [PATCH 10/80] arm64: dts: Add panel for ARMv8 4xA53 4xA57 FVP This is not in mainline since the panel bindings have not yet been merged into mainline. Signed-off-by: Mark Brown --- arch/arm64/boot/dts/fvp-base-gicv2-psci.dts | 23 +++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts index 452ba22f3c6e..79ddd464c7d0 100644 --- a/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts +++ b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts @@ -261,4 +261,27 @@ smb { /include/ "rtsm_ve-motherboard.dtsi" }; + + panels { + panel@0 { + compatible = "panel"; + mode = "XVGA"; + refresh = <60>; + xres = <1024>; + yres = <768>; + pixclock = <15748>; + left_margin = <152>; + right_margin = <48>; + upper_margin = <23>; + lower_margin = <3>; + hsync_len = <104>; + vsync_len = <4>; + sync = <0>; + vmode = "FB_VMODE_NONINTERLACED"; + tim2 = "TIM2_BCD", "TIM2_IPC"; + cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)"; + caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888"; + bpp = <16>; + }; + }; }; From 20aab0887ba6b06584fea3a8b46268b5662a1149 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Oct 2013 20:30:14 +0100 Subject: [PATCH 11/80] arm64: unify smp_psci.c and psci.c The functions in psci.c are only used from smp_psci.c, and smp_psci cannot function without psci.c. Additionally psci.c is built when !SMP, where it's expected that cpu_suspend may be useful. This patch unifies the two files, removing pointless duplication and paving the way for PSCI support in UP systems. Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit 00ef54bb97389bfe8894272f48496f4191aae946) Signed-off-by: Mark Brown --- arch/arm64/include/asm/psci.h | 19 ------------ arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/psci.c | 54 ++++++++++++++++++++++++++++++++++- arch/arm64/kernel/smp_psci.c | 53 ---------------------------------- 4 files changed, 54 insertions(+), 74 deletions(-) delete mode 100644 arch/arm64/kernel/smp_psci.c diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h index 0604237ecd99..e5312ea0ec1a 100644 --- a/arch/arm64/include/asm/psci.h +++ b/arch/arm64/include/asm/psci.h @@ -14,25 +14,6 @@ #ifndef __ASM_PSCI_H #define __ASM_PSCI_H -#define PSCI_POWER_STATE_TYPE_STANDBY 0 -#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 - -struct psci_power_state { - u16 id; - u8 type; - u8 affinity_level; -}; - -struct psci_operations { - int (*cpu_suspend)(struct psci_power_state state, - unsigned long entry_point); - int (*cpu_off)(struct psci_power_state state); - int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); - int (*migrate)(unsigned long cpuid); -}; - -extern struct psci_operations psci_ops; - int psci_init(void); #endif /* __ASM_PSCI_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 7b4b564961d4..75a90c15b30b 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -14,7 +14,7 @@ arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o -arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o +arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 14f73c445ff5..368b78788cb5 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -17,12 +17,31 @@ #include #include +#include #include #include #include +#include -struct psci_operations psci_ops; +#define PSCI_POWER_STATE_TYPE_STANDBY 0 +#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 + +struct psci_power_state { + u16 id; + u8 type; + u8 affinity_level; +}; + +struct psci_operations { + int (*cpu_suspend)(struct psci_power_state state, + unsigned long entry_point); + int (*cpu_off)(struct psci_power_state state); + int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); + int (*migrate)(unsigned long cpuid); +}; + +static struct psci_operations psci_ops; static int (*invoke_psci_fn)(u64, u64, u64, u64); @@ -209,3 +228,36 @@ int __init psci_init(void) of_node_put(np); return err; } + +#ifdef CONFIG_SMP + +static int __init smp_psci_init_cpu(struct device_node *dn, int cpu) +{ + return 0; +} + +static int __init smp_psci_prepare_cpu(int cpu) +{ + int err; + + if (!psci_ops.cpu_on) { + pr_err("no cpu_on method, not booting CPU%d\n", cpu); + return -ENODEV; + } + + err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_holding_pen)); + if (err) { + pr_err("failed to boot CPU%d (%d)\n", cpu, err); + return err; + } + + return 0; +} + +const struct smp_enable_ops smp_psci_ops __initconst = { + .name = "psci", + .init_cpu = smp_psci_init_cpu, + .prepare_cpu = smp_psci_prepare_cpu, +}; + +#endif diff --git a/arch/arm64/kernel/smp_psci.c b/arch/arm64/kernel/smp_psci.c deleted file mode 100644 index 0c533301be77..000000000000 --- a/arch/arm64/kernel/smp_psci.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * PSCI SMP initialisation - * - * Copyright (C) 2013 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include - -#include -#include - -static int __init smp_psci_init_cpu(struct device_node *dn, int cpu) -{ - return 0; -} - -static int __init smp_psci_prepare_cpu(int cpu) -{ - int err; - - if (!psci_ops.cpu_on) { - pr_err("psci: no cpu_on method, not booting CPU%d\n", cpu); - return -ENODEV; - } - - err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_holding_pen)); - if (err) { - pr_err("psci: failed to boot CPU%d (%d)\n", cpu, err); - return err; - } - - return 0; -} - -const struct smp_enable_ops smp_psci_ops __initconst = { - .name = "psci", - .init_cpu = smp_psci_init_cpu, - .prepare_cpu = smp_psci_prepare_cpu, -}; From eee896b4ad47791576e0becaffe6a7b5e73b1bab Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Oct 2013 20:30:15 +0100 Subject: [PATCH 12/80] arm64: reorganise smp_enable_ops For hotplug support, we're going to want a place to store operations that do more than bring CPUs online, and it makes sense to group these with our current smp_enable_ops. For cpuidle support, we'll want to group additional functions, and we may want them even for UP kernels. This patch renames smp_enable_ops to the more general cpu_operations, and pulls the definitions out of smp code such that they can be used in UP kernels. While we're at it, fix up instances of the cpu parameter to be an unsigned int, drop the init markings and rename the *_cpu functions to cpu_* to reduce future churn when cpu_operations is extended. Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit cd1aebf5277a3a154a9e4c0ea4b3acabb62e5cab) Signed-off-by: Mark Brown --- arch/arm64/include/asm/cpu_ops.h | 33 +++++++++++++++++++++ arch/arm64/include/asm/smp.h | 11 ------- arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/cpu_ops.c | 47 ++++++++++++++++++++++++++++++ arch/arm64/kernel/psci.c | 11 +++---- arch/arm64/kernel/smp.c | 39 ++++++------------------- arch/arm64/kernel/smp_spin_table.c | 11 +++---- 7 files changed, 102 insertions(+), 52 deletions(-) create mode 100644 arch/arm64/include/asm/cpu_ops.h create mode 100644 arch/arm64/kernel/cpu_ops.c diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h new file mode 100644 index 000000000000..3c60c8d1d928 --- /dev/null +++ b/arch/arm64/include/asm/cpu_ops.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __ASM_CPU_OPS_H +#define __ASM_CPU_OPS_H + +#include +#include + +struct device_node; + +struct cpu_operations { + const char *name; + int (*cpu_init)(struct device_node *, unsigned int); + int (*cpu_prepare)(unsigned int); +}; + +extern const struct cpu_operations *cpu_ops[NR_CPUS]; +extern const struct cpu_operations * __init cpu_get_ops(const char *name); + +#endif /* ifndef __ASM_CPU_OPS_H */ diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 4b8023c5d146..7e34295f78e3 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -66,15 +66,4 @@ extern volatile unsigned long secondary_holding_pen_release; extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -struct device_node; - -struct smp_enable_ops { - const char *name; - int (*init_cpu)(struct device_node *, int); - int (*prepare_cpu)(int); -}; - -extern const struct smp_enable_ops smp_spin_table_ops; -extern const struct smp_enable_ops smp_psci_ops; - #endif /* ifndef __ASM_SMP_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 75a90c15b30b..5ba2fd43a75b 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -9,7 +9,7 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ - hyp-stub.o psci.o + hyp-stub.o psci.o cpu_ops.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c new file mode 100644 index 000000000000..e2652aadd9ea --- /dev/null +++ b/arch/arm64/kernel/cpu_ops.c @@ -0,0 +1,47 @@ +/* + * CPU kernel entry/exit control + * + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +extern const struct cpu_operations smp_spin_table_ops; +extern const struct cpu_operations cpu_psci_ops; + +const struct cpu_operations *cpu_ops[NR_CPUS]; + +static const struct cpu_operations *supported_cpu_ops[] __initconst = { +#ifdef CONFIG_SMP + &smp_spin_table_ops, + &cpu_psci_ops, +#endif + NULL, +}; + +const struct cpu_operations * __init cpu_get_ops(const char *name) +{ + const struct cpu_operations **ops = supported_cpu_ops; + + while (*ops) { + if (!strcmp(name, (*ops)->name)) + return *ops; + + ops++; + } + + return NULL; +} diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 368b78788cb5..ccec2ca67755 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -231,12 +232,12 @@ int __init psci_init(void) #ifdef CONFIG_SMP -static int __init smp_psci_init_cpu(struct device_node *dn, int cpu) +static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu) { return 0; } -static int __init smp_psci_prepare_cpu(int cpu) +static int __init cpu_psci_cpu_prepare(unsigned int cpu) { int err; @@ -254,10 +255,10 @@ static int __init smp_psci_prepare_cpu(int cpu) return 0; } -const struct smp_enable_ops smp_psci_ops __initconst = { +const struct cpu_operations cpu_psci_ops = { .name = "psci", - .init_cpu = smp_psci_init_cpu, - .prepare_cpu = smp_psci_prepare_cpu, + .cpu_init = cpu_psci_cpu_init, + .cpu_prepare = cpu_psci_cpu_prepare, }; #endif diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5d54e3717bf8..96b14a1d5b02 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -235,28 +236,6 @@ void __init smp_prepare_boot_cpu(void) static void (*smp_cross_call)(const struct cpumask *, unsigned int); -static const struct smp_enable_ops *enable_ops[] __initconst = { - &smp_spin_table_ops, - &smp_psci_ops, - NULL, -}; - -static const struct smp_enable_ops *smp_enable_ops[NR_CPUS]; - -static const struct smp_enable_ops * __init smp_get_enable_ops(const char *name) -{ - const struct smp_enable_ops **ops = enable_ops; - - while (*ops) { - if (!strcmp(name, (*ops)->name)) - return *ops; - - ops++; - } - - return NULL; -} - /* * Enumerate the possible CPU set from the device tree and build the * cpu logical map array containing MPIDR values related to logical @@ -266,7 +245,7 @@ void __init smp_init_cpus(void) { const char *enable_method; struct device_node *dn = NULL; - int i, cpu = 1; + unsigned int i, cpu = 1; bool bootcpu_valid = false; while ((dn = of_find_node_by_type(dn, "cpu"))) { @@ -345,15 +324,15 @@ void __init smp_init_cpus(void) goto next; } - smp_enable_ops[cpu] = smp_get_enable_ops(enable_method); + cpu_ops[cpu] = cpu_get_ops(enable_method); - if (!smp_enable_ops[cpu]) { + if (!cpu_ops[cpu]) { pr_err("%s: invalid enable-method property: %s\n", dn->full_name, enable_method); goto next; } - if (smp_enable_ops[cpu]->init_cpu(dn, cpu)) + if (cpu_ops[cpu]->cpu_init(dn, cpu)) goto next; pr_debug("cpu logical map 0x%llx\n", hwid); @@ -383,8 +362,8 @@ void __init smp_init_cpus(void) void __init smp_prepare_cpus(unsigned int max_cpus) { - int cpu, err; - unsigned int ncores = num_possible_cpus(); + int err; + unsigned int cpu, ncores = num_possible_cpus(); /* * are we trying to boot more cores than exist? @@ -411,10 +390,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus) if (cpu == smp_processor_id()) continue; - if (!smp_enable_ops[cpu]) + if (!cpu_ops[cpu]) continue; - err = smp_enable_ops[cpu]->prepare_cpu(cpu); + err = cpu_ops[cpu]->cpu_prepare(cpu); if (err) continue; diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 7c35fa682f76..a8b76e4eccff 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -21,10 +21,11 @@ #include #include +#include static phys_addr_t cpu_release_addr[NR_CPUS]; -static int __init smp_spin_table_init_cpu(struct device_node *dn, int cpu) +static int smp_spin_table_cpu_init(struct device_node *dn, unsigned int cpu) { /* * Determine the address from which the CPU is polling. @@ -40,7 +41,7 @@ static int __init smp_spin_table_init_cpu(struct device_node *dn, int cpu) return 0; } -static int __init smp_spin_table_prepare_cpu(int cpu) +static int smp_spin_table_cpu_prepare(unsigned int cpu) { void **release_addr; @@ -59,8 +60,8 @@ static int __init smp_spin_table_prepare_cpu(int cpu) return 0; } -const struct smp_enable_ops smp_spin_table_ops __initconst = { +const struct cpu_operations smp_spin_table_ops = { .name = "spin-table", - .init_cpu = smp_spin_table_init_cpu, - .prepare_cpu = smp_spin_table_prepare_cpu, + .cpu_init = smp_spin_table_cpu_init, + .cpu_prepare = smp_spin_table_cpu_prepare, }; From 92201a63ef9b5a692a3c273ea8964aa3ad83a8f7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Oct 2013 20:30:16 +0100 Subject: [PATCH 13/80] arm64: factor out spin-table boot method The arm64 kernel has an internal holding pen, which is necessary for some systems where we can't bring CPUs online individually and must hold multiple CPUs in a safe area until the kernel is able to handle them. The current SMP infrastructure for arm64 is closely coupled to this holding pen, and alternative boot methods must launch CPUs into the pen, where they sit before they are launched into the kernel proper. With PSCI (and possibly other future boot methods), we can bring CPUs online individually, and need not perform the secondary_holding_pen dance. Instead, this patch factors the holding pen management code out to the spin-table boot method code, as it is the only boot method requiring the pen. A new entry point for secondaries, secondary_entry is added for other boot methods to use, which bypasses the holding pen and its associated overhead when bringing CPUs online. The smp.pen.text section is also removed, as the pen can live in head.text without problem. The cpu_operations structure is extended with two new functions, cpu_boot and cpu_postboot, for bringing a cpu into the kernel and performing any post-boot cleanup required by a bootmethod (e.g. resetting the secondary_holding_pen_release to INVALID_HWID). Documentation is added for cpu_operations. Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit 652af899799354049b273af897b798b8f03fdd88) Signed-off-by: Mark Brown --- arch/arm64/include/asm/cpu_ops.h | 16 +++++++ arch/arm64/include/asm/smp.h | 3 +- arch/arm64/kernel/head.S | 12 ++++- arch/arm64/kernel/psci.c | 18 +++---- arch/arm64/kernel/smp.c | 65 ++------------------------ arch/arm64/kernel/smp_spin_table.c | 75 ++++++++++++++++++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 1 - 7 files changed, 117 insertions(+), 73 deletions(-) diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index 3c60c8d1d928..67bc4fd83798 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -21,10 +21,26 @@ struct device_node; +/** + * struct cpu_operations - Callback operations for hotplugging CPUs. + * + * @name: Name of the property as appears in a devicetree cpu node's + * enable-method property. + * @cpu_init: Reads any data necessary for a specific enable-method from the + * devicetree, for a given cpu node and proposed logical id. + * @cpu_prepare: Early one-time preparation step for a cpu. If there is a + * mechanism for doing so, tests whether it is possible to boot + * the given CPU. + * @cpu_boot: Boots a cpu into the kernel. + * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary + * synchronisation. Called from the cpu being booted. + */ struct cpu_operations { const char *name; int (*cpu_init)(struct device_node *, unsigned int); int (*cpu_prepare)(unsigned int); + int (*cpu_boot)(unsigned int); + void (*cpu_postboot)(void); }; extern const struct cpu_operations *cpu_ops[NR_CPUS]; diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 7e34295f78e3..d64187ce69a2 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -60,8 +60,7 @@ struct secondary_data { void *stack; }; extern struct secondary_data secondary_data; -extern void secondary_holding_pen(void); -extern volatile unsigned long secondary_holding_pen_release; +extern void secondary_entry(void); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 53dcae49e729..3532ca613718 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -217,7 +217,6 @@ ENTRY(__boot_cpu_mode) .quad PAGE_OFFSET #ifdef CONFIG_SMP - .pushsection .smp.pen.text, "ax" .align 3 1: .quad . .quad secondary_holding_pen_release @@ -242,7 +241,16 @@ pen: ldr x4, [x3] wfe b pen ENDPROC(secondary_holding_pen) - .popsection + + /* + * Secondary entry point that jumps straight into the kernel. Only to + * be used where CPUs are brought online dynamically by the kernel. + */ +ENTRY(secondary_entry) + bl __calc_phys_offset // x2=phys offset + bl el2_setup // Drop to EL1 + b secondary_startup +ENDPROC(secondary_entry) ENTRY(secondary_startup) /* diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index ccec2ca67755..fb56b6158344 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -239,26 +239,28 @@ static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu) static int __init cpu_psci_cpu_prepare(unsigned int cpu) { - int err; - if (!psci_ops.cpu_on) { pr_err("no cpu_on method, not booting CPU%d\n", cpu); return -ENODEV; } - err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_holding_pen)); - if (err) { - pr_err("failed to boot CPU%d (%d)\n", cpu, err); - return err; - } - return 0; } +static int cpu_psci_cpu_boot(unsigned int cpu) +{ + int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry)); + if (err) + pr_err("psci: failed to boot CPU%d (%d)\n", cpu, err); + + return err; +} + const struct cpu_operations cpu_psci_ops = { .name = "psci", .cpu_init = cpu_psci_cpu_init, .cpu_prepare = cpu_psci_cpu_prepare, + .cpu_boot = cpu_psci_cpu_boot, }; #endif diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 96b14a1d5b02..6e9779c878ad 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -55,7 +55,6 @@ * where to place its SVC stack */ struct secondary_data secondary_data; -volatile unsigned long secondary_holding_pen_release = INVALID_HWID; enum ipi_msg_type { IPI_RESCHEDULE, @@ -64,61 +63,16 @@ enum ipi_msg_type { IPI_CPU_STOP, }; -static DEFINE_RAW_SPINLOCK(boot_lock); - -/* - * Write secondary_holding_pen_release in a way that is guaranteed to be - * visible to all observers, irrespective of whether they're taking part - * in coherency or not. This is necessary for the hotplug code to work - * reliably. - */ -static void __cpuinit write_pen_release(u64 val) -{ - void *start = (void *)&secondary_holding_pen_release; - unsigned long size = sizeof(secondary_holding_pen_release); - - secondary_holding_pen_release = val; - __flush_dcache_area(start, size); -} - /* * Boot a secondary CPU, and assign it the specified idle task. * This also gives us the initial stack to use for this CPU. */ static int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) { - unsigned long timeout; + if (cpu_ops[cpu]->cpu_boot) + return cpu_ops[cpu]->cpu_boot(cpu); - /* - * Set synchronisation state between this boot processor - * and the secondary one - */ - raw_spin_lock(&boot_lock); - - /* - * Update the pen release flag. - */ - write_pen_release(cpu_logical_map(cpu)); - - /* - * Send an event, causing the secondaries to read pen_release. - */ - sev(); - - timeout = jiffies + (1 * HZ); - while (time_before(jiffies, timeout)) { - if (secondary_holding_pen_release == INVALID_HWID) - break; - udelay(10); - } - - /* - * Now the secondary core is starting up let it run its - * calibrations, then wait for it to finish - */ - raw_spin_unlock(&boot_lock); - - return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0; + return -EOPNOTSUPP; } static DECLARE_COMPLETION(cpu_running); @@ -188,17 +142,8 @@ asmlinkage void __cpuinit secondary_start_kernel(void) preempt_disable(); trace_hardirqs_off(); - /* - * Let the primary processor know we're out of the - * pen, then head off into the C entry point - */ - write_pen_release(INVALID_HWID); - - /* - * Synchronise with the boot thread. - */ - raw_spin_lock(&boot_lock); - raw_spin_unlock(&boot_lock); + if (cpu_ops[cpu]->cpu_postboot) + cpu_ops[cpu]->cpu_postboot(); /* * Enable local interrupts. diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index a8b76e4eccff..27f08367a6e7 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -16,14 +16,37 @@ * along with this program. If not, see . */ +#include #include #include #include #include #include +#include +#include + +extern void secondary_holding_pen(void); +volatile unsigned long secondary_holding_pen_release = INVALID_HWID; static phys_addr_t cpu_release_addr[NR_CPUS]; +static DEFINE_RAW_SPINLOCK(boot_lock); + +/* + * Write secondary_holding_pen_release in a way that is guaranteed to be + * visible to all observers, irrespective of whether they're taking part + * in coherency or not. This is necessary for the hotplug code to work + * reliably. + */ +static void write_pen_release(u64 val) +{ + void *start = (void *)&secondary_holding_pen_release; + unsigned long size = sizeof(secondary_holding_pen_release); + + secondary_holding_pen_release = val; + __flush_dcache_area(start, size); +} + static int smp_spin_table_cpu_init(struct device_node *dn, unsigned int cpu) { @@ -60,8 +83,60 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) return 0; } +static int smp_spin_table_cpu_boot(unsigned int cpu) +{ + unsigned long timeout; + + /* + * Set synchronisation state between this boot processor + * and the secondary one + */ + raw_spin_lock(&boot_lock); + + /* + * Update the pen release flag. + */ + write_pen_release(cpu_logical_map(cpu)); + + /* + * Send an event, causing the secondaries to read pen_release. + */ + sev(); + + timeout = jiffies + (1 * HZ); + while (time_before(jiffies, timeout)) { + if (secondary_holding_pen_release == INVALID_HWID) + break; + udelay(10); + } + + /* + * Now the secondary core is starting up let it run its + * calibrations, then wait for it to finish + */ + raw_spin_unlock(&boot_lock); + + return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0; +} + +void smp_spin_table_cpu_postboot(void) +{ + /* + * Let the primary processor know we're out of the pen. + */ + write_pen_release(INVALID_HWID); + + /* + * Synchronise with the boot thread. + */ + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); +} + const struct cpu_operations smp_spin_table_ops = { .name = "spin-table", .cpu_init = smp_spin_table_cpu_init, .cpu_prepare = smp_spin_table_cpu_prepare, + .cpu_boot = smp_spin_table_cpu_boot, + .cpu_postboot = smp_spin_table_cpu_postboot, }; diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 3fae2be8b016..2c8a95b539ce 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -41,7 +41,6 @@ SECTIONS } .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ - *(.smp.pen.text) __exception_text_start = .; *(.exception.text) __exception_text_end = .; From 5ad709f5d9cb351caf1be770d03f420bd88bf248 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Oct 2013 20:30:17 +0100 Subject: [PATCH 14/80] arm64: read enable-method for CPU0 With the advent of CPU_HOTPLUG, the enable-method property for CPU0 may tells us something useful (i.e. how to hotplug it back on), so we must read it along with all the enable-method for all the other CPUs. Even on UP the enable-method may tell us useful information (e.g. if a core has some mechanism that might be usable for cpuidle), so we should always read it. This patch factors out the reading of the enable method, and ensures that CPU0's enable method is read regardless of whether the kernel is built with SMP support. Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit e8765b265a69c83504afc6901d6e137b1811d1f0) Signed-off-by: Mark Brown --- arch/arm64/include/asm/cpu_ops.h | 3 +- arch/arm64/kernel/cpu_ops.c | 54 +++++++++++++++++++++++++++++++- arch/arm64/kernel/setup.c | 2 ++ arch/arm64/kernel/smp.c | 18 +---------- 4 files changed, 58 insertions(+), 19 deletions(-) diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index 67bc4fd83798..1a98dbfc4a5f 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -44,6 +44,7 @@ struct cpu_operations { }; extern const struct cpu_operations *cpu_ops[NR_CPUS]; -extern const struct cpu_operations * __init cpu_get_ops(const char *name); +extern int __init cpu_read_ops(struct device_node *dn, int cpu); +extern void __init cpu_read_bootcpu_ops(void); #endif /* ifndef __ASM_CPU_OPS_H */ diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index e2652aadd9ea..aa0c9e78dbe1 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -17,6 +17,9 @@ */ #include +#include +#include +#include #include extern const struct cpu_operations smp_spin_table_ops; @@ -32,7 +35,7 @@ static const struct cpu_operations *supported_cpu_ops[] __initconst = { NULL, }; -const struct cpu_operations * __init cpu_get_ops(const char *name) +static const struct cpu_operations * __init cpu_get_ops(const char *name) { const struct cpu_operations **ops = supported_cpu_ops; @@ -45,3 +48,52 @@ const struct cpu_operations * __init cpu_get_ops(const char *name) return NULL; } + +/* + * Read a cpu's enable method from the device tree and record it in cpu_ops. + */ +int __init cpu_read_ops(struct device_node *dn, int cpu) +{ + const char *enable_method = of_get_property(dn, "enable-method", NULL); + if (!enable_method) { + /* + * The boot CPU may not have an enable method (e.g. when + * spin-table is used for secondaries). Don't warn spuriously. + */ + if (cpu != 0) + pr_err("%s: missing enable-method property\n", + dn->full_name); + return -ENOENT; + } + + cpu_ops[cpu] = cpu_get_ops(enable_method); + if (!cpu_ops[cpu]) { + pr_err("%s: invalid enable-method property: %s\n", + dn->full_name, enable_method); + return -EOPNOTSUPP; + } + + return 0; +} + +void __init cpu_read_bootcpu_ops(void) +{ + struct device_node *dn = NULL; + u64 mpidr = cpu_logical_map(0); + + while ((dn = of_find_node_by_type(dn, "cpu"))) { + u64 hwid; + const __be32 *prop; + + prop = of_get_property(dn, "reg", NULL); + if (!prop) + continue; + + hwid = of_read_number(prop, of_n_addr_cells(dn)); + if (hwid == mpidr) { + cpu_read_ops(dn, 0); + of_node_put(dn); + return; + } + } +} diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index add6ea616843..3dc5c2f7c6b1 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -269,6 +270,7 @@ void __init setup_arch(char **cmdline_p) psci_init(); cpu_logical_map(0) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; + cpu_read_bootcpu_ops(); #ifdef CONFIG_SMP smp_init_cpus(); #endif diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6e9779c878ad..82eea4a18a85 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -188,7 +188,6 @@ static void (*smp_cross_call)(const struct cpumask *, unsigned int); */ void __init smp_init_cpus(void) { - const char *enable_method; struct device_node *dn = NULL; unsigned int i, cpu = 1; bool bootcpu_valid = false; @@ -259,23 +258,8 @@ void __init smp_init_cpus(void) if (cpu >= NR_CPUS) goto next; - /* - * We currently support only the "spin-table" enable-method. - */ - enable_method = of_get_property(dn, "enable-method", NULL); - if (!enable_method) { - pr_err("%s: missing enable-method property\n", - dn->full_name); + if (cpu_read_ops(dn, cpu) != 0) goto next; - } - - cpu_ops[cpu] = cpu_get_ops(enable_method); - - if (!cpu_ops[cpu]) { - pr_err("%s: invalid enable-method property: %s\n", - dn->full_name, enable_method); - goto next; - } if (cpu_ops[cpu]->cpu_init(dn, cpu)) goto next; From d3bde2dcb4434ae9f905d9286e36feb59953f1e3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Oct 2013 20:30:18 +0100 Subject: [PATCH 15/80] arm64: add CPU_HOTPLUG infrastructure This patch adds the basic infrastructure necessary to support CPU_HOTPLUG on arm64, based on the arm implementation. Actual hotplug support will depend on an implementation's cpu_operations (e.g. PSCI). Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit 9327e2c6bb8cb0131b38a07847cd58c78dc095e9) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 7 +++ arch/arm64/include/asm/cpu_ops.h | 9 +++ arch/arm64/include/asm/irq.h | 1 + arch/arm64/include/asm/smp.h | 5 ++ arch/arm64/kernel/cputable.c | 2 +- arch/arm64/kernel/irq.c | 61 ++++++++++++++++++++ arch/arm64/kernel/process.c | 7 +++ arch/arm64/kernel/smp.c | 96 ++++++++++++++++++++++++++++++++ 8 files changed, 187 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 56b3f6d447ae..022f20097930 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -150,6 +150,13 @@ config NR_CPUS depends on SMP default "4" +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs" + depends on SMP + help + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu. + source kernel/Kconfig.preempt config HZ diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index 1a98dbfc4a5f..c4cdb5e5b73d 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -34,6 +34,11 @@ struct device_node; * @cpu_boot: Boots a cpu into the kernel. * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary * synchronisation. Called from the cpu being booted. + * @cpu_disable: Prepares a cpu to die. May fail for some mechanism-specific + * reason, which will cause the hot unplug to be aborted. Called + * from the cpu to be killed. + * @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the + * cpu being killed. */ struct cpu_operations { const char *name; @@ -41,6 +46,10 @@ struct cpu_operations { int (*cpu_prepare)(unsigned int); int (*cpu_boot)(unsigned int); void (*cpu_postboot)(void); +#ifdef CONFIG_HOTPLUG_CPU + int (*cpu_disable)(unsigned int cpu); + void (*cpu_die)(unsigned int cpu); +#endif }; extern const struct cpu_operations *cpu_ops[NR_CPUS]; diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 0332fc077f6e..e1f7ecdde11f 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -4,6 +4,7 @@ #include extern void (*handle_arch_irq)(struct pt_regs *); +extern void migrate_irqs(void); extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); #endif diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index d64187ce69a2..a498f2cd2c2a 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -65,4 +65,9 @@ extern void secondary_entry(void); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); +extern int __cpu_disable(void); + +extern void __cpu_die(unsigned int cpu); +extern void cpu_die(void); + #endif /* ifndef __ASM_SMP_H */ diff --git a/arch/arm64/kernel/cputable.c b/arch/arm64/kernel/cputable.c index 63cfc4a43f4e..fd3993cb060f 100644 --- a/arch/arm64/kernel/cputable.c +++ b/arch/arm64/kernel/cputable.c @@ -22,7 +22,7 @@ extern unsigned long __cpu_setup(void); -struct cpu_info __initdata cpu_table[] = { +struct cpu_info cpu_table[] = { { .cpu_id_val = 0x000f0000, .cpu_id_mask = 0x000f0000, diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index ecb3354292ed..473e5dbf8f39 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -81,3 +81,64 @@ void __init init_IRQ(void) if (!handle_arch_irq) panic("No interrupt controller found."); } + +#ifdef CONFIG_HOTPLUG_CPU +static bool migrate_one_irq(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + const struct cpumask *affinity = d->affinity; + struct irq_chip *c; + bool ret = false; + + /* + * If this is a per-CPU interrupt, or the affinity does not + * include this CPU, then we have nothing to do. + */ + if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity)) + return false; + + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + affinity = cpu_online_mask; + ret = true; + } + + c = irq_data_get_irq_chip(d); + if (!c->irq_set_affinity) + pr_debug("IRQ%u: unable to set affinity\n", d->irq); + else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret) + cpumask_copy(d->affinity, affinity); + + return ret; +} + +/* + * The current CPU has been marked offline. Migrate IRQs off this CPU. + * If the affinity settings do not allow other CPUs, force them onto any + * available CPU. + * + * Note: we must iterate over all IRQs, whether they have an attached + * action structure or not, as we need to get chained interrupts too. + */ +void migrate_irqs(void) +{ + unsigned int i; + struct irq_desc *desc; + unsigned long flags; + + local_irq_save(flags); + + for_each_irq_desc(i, desc) { + bool affinity_broken; + + raw_spin_lock(&desc->lock); + affinity_broken = migrate_one_irq(desc); + raw_spin_unlock(&desc->lock); + + if (affinity_broken) + pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n", + i, smp_processor_id()); + } + + local_irq_restore(flags); +} +#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 46f02c3b5015..4caf198ca44f 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -102,6 +102,13 @@ void arch_cpu_idle(void) local_irq_enable(); } +#ifdef CONFIG_HOTPLUG_CPU +void arch_cpu_idle_dead(void) +{ + cpu_die(); +} +#endif + void machine_shutdown(void) { #ifdef CONFIG_SMP diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 82eea4a18a85..e0b7c0e45b87 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -166,6 +166,102 @@ asmlinkage void __cpuinit secondary_start_kernel(void) cpu_startup_entry(CPUHP_ONLINE); } +#ifdef CONFIG_HOTPLUG_CPU +static int op_cpu_disable(unsigned int cpu) +{ + /* + * If we don't have a cpu_die method, abort before we reach the point + * of no return. CPU0 may not have an cpu_ops, so test for it. + */ + if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die) + return -EOPNOTSUPP; + + /* + * We may need to abort a hot unplug for some other mechanism-specific + * reason. + */ + if (cpu_ops[cpu]->cpu_disable) + return cpu_ops[cpu]->cpu_disable(cpu); + + return 0; +} + +/* + * __cpu_disable runs on the processor to be shutdown. + */ +int __cpu_disable(void) +{ + unsigned int cpu = smp_processor_id(); + int ret; + + ret = op_cpu_disable(cpu); + if (ret) + return ret; + + /* + * Take this CPU offline. Once we clear this, we can't return, + * and we must not schedule until we're ready to give up the cpu. + */ + set_cpu_online(cpu, false); + + /* + * OK - migrate IRQs away from this CPU + */ + migrate_irqs(); + + /* + * Remove this CPU from the vm mask set of all processes. + */ + clear_tasks_mm_cpumask(cpu); + + return 0; +} + +static DECLARE_COMPLETION(cpu_died); + +/* + * called on the thread which is asking for a CPU to be shutdown - + * waits until shutdown has completed, or it is timed out. + */ +void __cpu_die(unsigned int cpu) +{ + if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) { + pr_crit("CPU%u: cpu didn't die\n", cpu); + return; + } + pr_notice("CPU%u: shutdown\n", cpu); +} + +/* + * Called from the idle thread for the CPU which has been shutdown. + * + * Note that we disable IRQs here, but do not re-enable them + * before returning to the caller. This is also the behaviour + * of the other hotplug-cpu capable cores, so presumably coming + * out of idle fixes this. + */ +void cpu_die(void) +{ + unsigned int cpu = smp_processor_id(); + + idle_task_exit(); + + local_irq_disable(); + + /* Tell __cpu_die() that this CPU is now safe to dispose of */ + complete(&cpu_died); + + /* + * Actually shutdown the CPU. This must never fail. The specific hotplug + * mechanism must perform all required cache maintenance to ensure that + * no dirty lines are lost in the process of shutting down the CPU. + */ + cpu_ops[cpu]->cpu_die(cpu); + + BUG(); +} +#endif + void __init smp_cpus_done(unsigned int max_cpus) { unsigned long bogosum = loops_per_jiffy * num_online_cpus(); From 8cea5d72615ec55b8e31d8d4a066a38041e0dca2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Oct 2013 20:30:19 +0100 Subject: [PATCH 16/80] arm64: add PSCI CPU_OFF-based hotplug support This patch adds support for using PSCI CPU_OFF calls for CPU hotplug. With this code it is possible to hot unplug CPUs with "psci" as their boot-method, as long as there's an appropriate cpu_off function id specified in the psci node. Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit 831ccf79b46e02f31cfd16c66df9d5600f635155) Signed-off-by: Mark Brown --- arch/arm64/kernel/psci.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index fb56b6158344..4f97db3d7363 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -256,11 +256,41 @@ static int cpu_psci_cpu_boot(unsigned int cpu) return err; } +#ifdef CONFIG_HOTPLUG_CPU +static int cpu_psci_cpu_disable(unsigned int cpu) +{ + /* Fail early if we don't have CPU_OFF support */ + if (!psci_ops.cpu_off) + return -EOPNOTSUPP; + return 0; +} + +static void cpu_psci_cpu_die(unsigned int cpu) +{ + int ret; + /* + * There are no known implementations of PSCI actually using the + * power state field, pass a sensible default for now. + */ + struct psci_power_state state = { + .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, + }; + + ret = psci_ops.cpu_off(state); + + pr_crit("psci: unable to power off CPU%u (%d)\n", cpu, ret); +} +#endif + const struct cpu_operations cpu_psci_ops = { .name = "psci", .cpu_init = cpu_psci_cpu_init, .cpu_prepare = cpu_psci_cpu_prepare, .cpu_boot = cpu_psci_cpu_boot, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = cpu_psci_cpu_disable, + .cpu_die = cpu_psci_cpu_die, +#endif }; #endif From 914866357990d96231a6bb8d2cad870cf21bd44a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 31 Oct 2013 16:37:26 +0000 Subject: [PATCH 17/80] arm64: Slightly improve the warning on CPU0 enable-method Commit e8765b265a69 (arm64: read enable-method for CPU0) introduced checks for the enable method on CPU0 (to be later used with CPU suspend). However, if the kernel is compiled for UP and a DT file is used with a method like 'spin-table', Linux complains about 'invalid enable method'. This patch turns it into an 'unsupported enable method' warning. Signed-off-by: Catalin Marinas (cherry picked from commit 264666e62848c19ba9252c80e895ffec9ad1d391) Signed-off-by: Mark Brown --- arch/arm64/kernel/cpu_ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index aa0c9e78dbe1..04efea8fe4bc 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -68,8 +68,8 @@ int __init cpu_read_ops(struct device_node *dn, int cpu) cpu_ops[cpu] = cpu_get_ops(enable_method); if (!cpu_ops[cpu]) { - pr_err("%s: invalid enable-method property: %s\n", - dn->full_name, enable_method); + pr_warn("%s: unsupported enable-method property: %s\n", + dn->full_name, enable_method); return -EOPNOTSUPP; } From 6e2bb9193fc2161ae2a5cfd11590aa9c0ae8a80d Mon Sep 17 00:00:00 2001 From: Sudeep KarkadaNagesha Date: Thu, 15 Aug 2013 14:01:40 +0100 Subject: [PATCH 18/80] of: move of_get_cpu_node implementation to DT core library This patch moves the generalized implementation of of_get_cpu_node from PowerPC to DT core library, thereby adding support for retrieving cpu node for a given logical cpu index on any architecture. The CPU subsystem can now use this function to assign of_node in the cpu device while registering CPUs. It is recommended to use these helper function only in pre-SMP/early initialisation stages to retrieve CPU device node pointers in logical ordering. Once the cpu devices are registered, it can be retrieved easily from cpu device of_node which avoids unnecessary parsing and matching. Cc: Benjamin Herrenschmidt Cc: Grant Likely Acked-by: Rob Herring Signed-off-by: Sudeep KarkadaNagesha (cherry picked from commit 183912d352a242a276a7877852f107459a13aff9) Signed-off-by: Mark Brown --- arch/powerpc/include/asm/prom.h | 3 -- arch/powerpc/kernel/prom.c | 43 +-------------- drivers/of/base.c | 95 +++++++++++++++++++++++++++++++++ include/linux/cpu.h | 1 + include/linux/of.h | 7 +++ 5 files changed, 105 insertions(+), 44 deletions(-) diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index bc2da154f68b..ac204e022922 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -43,9 +43,6 @@ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, extern void kdump_move_device_tree(void); -/* CPU OF node matching */ -struct device_node *of_get_cpu_node(int cpu, unsigned int *thread); - /* cache lookup */ struct device_node *of_find_next_cache_node(struct device_node *np); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 8b6f7a99cce2..00610a34fb9a 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -827,49 +827,10 @@ static int __init prom_reconfig_setup(void) __initcall(prom_reconfig_setup); #endif -/* Find the device node for a given logical cpu number, also returns the cpu - * local thread number (index in ibm,interrupt-server#s) if relevant and - * asked for (non NULL) - */ -struct device_node *of_get_cpu_node(int cpu, unsigned int *thread) +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { - int hardid; - struct device_node *np; - - hardid = get_hard_smp_processor_id(cpu); - - for_each_node_by_type(np, "cpu") { - const u32 *intserv; - unsigned int plen, t; - - /* Check for ibm,ppc-interrupt-server#s. If it doesn't exist - * fallback to "reg" property and assume no threads - */ - intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", - &plen); - if (intserv == NULL) { - const u32 *reg = of_get_property(np, "reg", NULL); - if (reg == NULL) - continue; - if (*reg == hardid) { - if (thread) - *thread = 0; - return np; - } - } else { - plen /= sizeof(u32); - for (t = 0; t < plen; t++) { - if (hardid == intserv[t]) { - if (thread) - *thread = t; - return np; - } - } - } - } - return NULL; + return (int)phys_id == get_hard_smp_processor_id(cpu); } -EXPORT_SYMBOL(of_get_cpu_node); #if defined(CONFIG_DEBUG_FS) && defined(DEBUG) static struct debugfs_blob_wrapper flat_dt_blob; diff --git a/drivers/of/base.c b/drivers/of/base.c index a6f584a7f4a1..9b6cd2d0e34f 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -18,6 +18,7 @@ * 2 of the License, or (at your option) any later version. */ #include +#include #include #include #include @@ -230,6 +231,100 @@ const void *of_get_property(const struct device_node *np, const char *name, } EXPORT_SYMBOL(of_get_property); +/* + * arch_match_cpu_phys_id - Match the given logical CPU and physical id + * + * @cpu: logical cpu index of a core/thread + * @phys_id: physical identifier of a core/thread + * + * CPU logical to physical index mapping is architecture specific. + * However this __weak function provides a default match of physical + * id to logical cpu index. phys_id provided here is usually values read + * from the device tree which must match the hardware internal registers. + * + * Returns true if the physical identifier and the logical cpu index + * correspond to the same core/thread, false otherwise. + */ +bool __weak arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return (u32)phys_id == cpu; +} + +/** + * Checks if the given "prop_name" property holds the physical id of the + * core/thread corresponding to the logical cpu 'cpu'. If 'thread' is not + * NULL, local thread number within the core is returned in it. + */ +static bool __of_find_n_match_cpu_property(struct device_node *cpun, + const char *prop_name, int cpu, unsigned int *thread) +{ + const __be32 *cell; + int ac, prop_len, tid; + u64 hwid; + + ac = of_n_addr_cells(cpun); + cell = of_get_property(cpun, prop_name, &prop_len); + if (!cell) + return false; + prop_len /= sizeof(*cell); + for (tid = 0; tid < prop_len; tid++) { + hwid = of_read_number(cell, ac); + if (arch_match_cpu_phys_id(cpu, hwid)) { + if (thread) + *thread = tid; + return true; + } + cell += ac; + } + return false; +} + +/** + * of_get_cpu_node - Get device node associated with the given logical CPU + * + * @cpu: CPU number(logical index) for which device node is required + * @thread: if not NULL, local thread number within the physical core is + * returned + * + * The main purpose of this function is to retrieve the device node for the + * given logical CPU index. It should be used to initialize the of_node in + * cpu device. Once of_node in cpu device is populated, all the further + * references can use that instead. + * + * CPU logical to physical index mapping is architecture specific and is built + * before booting secondary cores. This function uses arch_match_cpu_phys_id + * which can be overridden by architecture specific implementation. + * + * Returns a node pointer for the logical cpu if found, else NULL. + */ +struct device_node *of_get_cpu_node(int cpu, unsigned int *thread) +{ + struct device_node *cpun, *cpus; + + cpus = of_find_node_by_path("/cpus"); + if (!cpus) { + pr_warn("Missing cpus node, bailing out\n"); + return NULL; + } + + for_each_child_of_node(cpus, cpun) { + if (of_node_cmp(cpun->type, "cpu")) + continue; + /* Check for non-standard "ibm,ppc-interrupt-server#s" property + * for thread ids on PowerPC. If it doesn't exist fallback to + * standard "reg" property. + */ + if (IS_ENABLED(CONFIG_PPC) && + __of_find_n_match_cpu_property(cpun, + "ibm,ppc-interrupt-server#s", cpu, thread)) + return cpun; + if (__of_find_n_match_cpu_property(cpun, "reg", cpu, thread)) + return cpun; + } + return NULL; +} +EXPORT_SYMBOL(of_get_cpu_node); + /** Checks if the given "compat" string matches one of the strings in * the device's "compatible" property */ diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 9f3c7e81270a..322e0afc8634 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -29,6 +29,7 @@ struct cpu { extern int register_cpu(struct cpu *cpu, int num); extern struct device *get_cpu_device(unsigned cpu); extern bool cpu_is_hotpluggable(unsigned cpu); +extern bool arch_match_cpu_phys_id(int cpu, u64 phys_id); extern int cpu_add_dev_attr(struct device_attribute *attr); extern void cpu_remove_dev_attr(struct device_attribute *attr); diff --git a/include/linux/of.h b/include/linux/of.h index 1fd08ca23106..c0bb2f188048 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -266,6 +266,7 @@ extern int of_device_is_available(const struct device_node *device); extern const void *of_get_property(const struct device_node *node, const char *name, int *lenp); +extern struct device_node *of_get_cpu_node(int cpu, unsigned int *thread); #define for_each_property_of_node(dn, pp) \ for (pp = dn->properties; pp != NULL; pp = pp->next) @@ -459,6 +460,12 @@ static inline const void *of_get_property(const struct device_node *node, return NULL; } +static inline struct device_node *of_get_cpu_node(int cpu, + unsigned int *thread) +{ + return NULL; +} + static inline int of_property_read_u64(const struct device_node *np, const char *propname, u64 *out_value) { From 2166811c3289da53b42cd44228cc6c72acc57fd4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 Dec 2013 22:16:19 +0000 Subject: [PATCH 19/80] arm64: topology: Implement basic CPU topology support Add basic CPU topology support to arm64, based on the existing pre-v8 code and some work done by Mark Hambleton. This patch does not implement any topology discovery support since that should be based on information from firmware, it merely implements the scaffolding for integration of topology support in the architecture. The goal is to separate the architecture hookup for providing topology information from the DT parsing in order to ease review and avoid blocking the architecture code (which will be built on by other work) with the DT code review by providing something something simple and basic. A following patch will implement support for parsing the DT topology bindings for ARM, similar patches will be needed for ACPI. Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 8 ++ arch/arm64/include/asm/topology.h | 42 ++++++++++ arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/smp.c | 12 +++ arch/arm64/kernel/topology.c | 124 ++++++++++++++++++++++++++++++ 5 files changed, 187 insertions(+) create mode 100644 arch/arm64/include/asm/topology.h create mode 100644 arch/arm64/kernel/topology.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 56b3f6d447ae..43c9336eb679 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -144,6 +144,14 @@ config SMP If you don't know what to do here, say N. +config ARM_CPU_TOPOLOGY + bool "Support CPU topology definition" + depends on SMP + default y + help + Support CPU topology definition, based on configuration + provided by the firmware. + config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32 diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h new file mode 100644 index 000000000000..611edefaeaf1 --- /dev/null +++ b/arch/arm64/include/asm/topology.h @@ -0,0 +1,42 @@ +#ifndef _ASM_ARM_TOPOLOGY_H +#define _ASM_ARM_TOPOLOGY_H + +#ifdef CONFIG_ARM_CPU_TOPOLOGY + +#include + +struct cputopo_arm { + int thread_id; + int core_id; + int socket_id; + cpumask_t thread_sibling; + cpumask_t core_sibling; +}; + +extern struct cputopo_arm cpu_topology[NR_CPUS]; + +#define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id) +#define topology_core_id(cpu) (cpu_topology[cpu].core_id) +#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) +#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) + +#define mc_capable() (cpu_topology[0].socket_id != -1) +#define smt_capable() (cpu_topology[0].thread_id != -1) + +void init_cpu_topology(void); +void store_cpu_topology(unsigned int cpuid); +const struct cpumask *cpu_coregroup_mask(int cpu); +int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask); + +#else + +static inline void init_cpu_topology(void) { } +static inline void store_cpu_topology(unsigned int cpuid) { } +static inline int cluster_to_logical_mask(unsigned int socket_id, + cpumask_t *cluster_mask) { return -EINVAL; } + +#endif + +#include + +#endif /* _ASM_ARM_TOPOLOGY_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 7b4b564961d4..4e4c80d11509 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +arm64-obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5d54e3717bf8..84c00cf02ae2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -158,6 +158,11 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) return ret; } +static void __cpuinit smp_store_cpu_info(unsigned int cpuid) +{ + store_cpu_topology(cpuid); +} + /* * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. @@ -206,6 +211,8 @@ asmlinkage void __cpuinit secondary_start_kernel(void) local_irq_enable(); local_fiq_enable(); + smp_store_cpu_info(cpu); + /* * OK, now it's safe to let the boot CPU continue. Wait for * the CPU migration code to notice that the CPU is online @@ -386,6 +393,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus) int cpu, err; unsigned int ncores = num_possible_cpus(); + init_cpu_topology(); + + smp_store_cpu_info(smp_processor_id()); + + /* * are we trying to boot more cores than exist? */ diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c new file mode 100644 index 000000000000..aae9d4d72328 --- /dev/null +++ b/arch/arm64/kernel/topology.c @@ -0,0 +1,124 @@ +/* + * arch/arm64/kernel/topology.c + * + * Copyright (C) 2011,2013 Linaro Limited. + * Written by: Vincent Guittot + * + * based on arch/sh/kernel/topology.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * cpu topology table + */ +struct cputopo_arm cpu_topology[NR_CPUS]; +EXPORT_SYMBOL_GPL(cpu_topology); + +const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_topology[cpu].core_sibling; +} + +static void update_siblings_masks(unsigned int cpuid) +{ + struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; + int cpu; + + /* update core and thread sibling masks */ + for_each_possible_cpu(cpu) { + cpu_topo = &cpu_topology[cpu]; + + if (cpuid_topo->socket_id != cpu_topo->socket_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + + if (cpuid_topo->core_id != cpu_topo->core_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); + } + smp_wmb(); +} + +void store_cpu_topology(unsigned int cpuid) +{ + struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid]; + + /* DT should have been parsed by the time we get here */ + if (cpuid_topo->core_id == -1) + pr_info("CPU%u: No topology information configured\n", cpuid); + else + update_siblings_masks(cpuid); +} + + +/* + * cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster + * @socket_id: cluster HW identifier + * @cluster_mask: the cpumask location to be initialized, modified by the + * function only if return value == 0 + * + * Return: + * + * 0 on success + * -EINVAL if cluster_mask is NULL or there is no record matching socket_id + */ +int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask) +{ + int cpu; + + if (!cluster_mask) + return -EINVAL; + + for_each_online_cpu(cpu) { + if (socket_id == topology_physical_package_id(cpu)) { + cpumask_copy(cluster_mask, topology_core_cpumask(cpu)); + return 0; + } + } + + return -EINVAL; +} + +/* + * init_cpu_topology is called at boot when only one cpu is running + * which prevent simultaneous write access to cpu_topology array + */ +void __init init_cpu_topology(void) +{ + unsigned int cpu; + + /* init core mask and power*/ + for_each_possible_cpu(cpu) { + struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]); + + cpu_topo->thread_id = -1; + cpu_topo->core_id = -1; + cpu_topo->socket_id = -1; + cpumask_clear(&cpu_topo->core_sibling); + cpumask_clear(&cpu_topo->thread_sibling); + } + smp_wmb(); +} From 66f06c94717c696fee2fa3ba139e8ffe226758ec Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 Dec 2013 22:19:11 +0000 Subject: [PATCH 20/80] arm64: topology: Tell the scheduler about the relative power of cores In non-heterogeneous systems like big.LITTLE systems the scheduler will be able to make better use of the available cores if we provide power numbers to it indicating their relative performance. Do this by parsing the CPU nodes in the DT. The power numbers are the same as for ARMv7 since it seems that the expected differential between the big and little cores is very similar on both ARMv7 and ARMv8. These numbers are just an initial and basic approximation for use with the current scheduler, it is likely that both experience with silicon and ongoing work on improving the scheduler will lead to further tuning. Signed-off-by: Mark Brown --- arch/arm64/kernel/topology.c | 164 +++++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index aae9d4d72328..c88970b1b863 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -25,6 +26,163 @@ #include #include +/* + * cpu power scale management + */ + +/* + * cpu power table + * This per cpu data structure describes the relative capacity of each core. + * On a heteregenous system, cores don't have the same computation capacity + * and we reflect that difference in the cpu_power field so the scheduler can + * take this difference into account during load balance. A per cpu structure + * is preferred because each CPU updates its own cpu_power field during the + * load balance except for idle cores. One idle core is selected to run the + * rebalance_domains for all idle cores and the cpu_power can be updated + * during this sequence. + */ +static DEFINE_PER_CPU(unsigned long, cpu_scale); + +unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) +{ + return per_cpu(cpu_scale, cpu); +} + +static void set_power_scale(unsigned int cpu, unsigned long power) +{ + per_cpu(cpu_scale, cpu) = power; +} + +#ifdef CONFIG_OF +struct cpu_efficiency { + const char *compatible; + unsigned long efficiency; +}; + +/* + * Table of relative efficiency of each processors + * The efficiency value must fit in 20bit and the final + * cpu_scale value must be in the range + * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2 + * in order to return at most 1 when DIV_ROUND_CLOSEST + * is used to compute the capacity of a CPU. + * Processors that are not defined in the table, + * use the default SCHED_POWER_SCALE value for cpu_scale. + */ +static const struct cpu_efficiency table_efficiency[] = { + { "arm,cortex-a57", 3891 }, + { "arm,cortex-a53", 2048 }, + { NULL, }, +}; + +static unsigned long *__cpu_capacity; +#define cpu_capacity(cpu) __cpu_capacity[cpu] + +static unsigned long middle_capacity = 1; + +/* + * Iterate all CPUs' descriptor in DT and compute the efficiency + * (as per table_efficiency). Also calculate a middle efficiency + * as close as possible to (max{eff_i} - min{eff_i}) / 2 + * This is later used to scale the cpu_power field such that an + * 'average' CPU is of middle power. Also see the comments near + * table_efficiency[] and update_cpu_power(). + */ +static void __init parse_dt_topology(void) +{ + const struct cpu_efficiency *cpu_eff; + struct device_node *cn = NULL; + unsigned long min_capacity = (unsigned long)(-1); + unsigned long max_capacity = 0; + unsigned long capacity = 0; + int alloc_size, cpu; + + alloc_size = nr_cpu_ids * sizeof(*__cpu_capacity); + __cpu_capacity = kzalloc(alloc_size, GFP_NOWAIT); + + for_each_possible_cpu(cpu) { + const u32 *rate; + int len; + + /* Too early to use cpu->of_node */ + cn = of_get_cpu_node(cpu, NULL); + if (!cn) { + pr_err("Missing device node for CPU %d\n", cpu); + continue; + } + + /* check if the cpu is marked as "disabled", if so ignore */ + if (!of_device_is_available(cn)) + continue; + + for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++) + if (of_device_is_compatible(cn, cpu_eff->compatible)) + break; + + if (cpu_eff->compatible == NULL) { + pr_warn("%s: Unknown CPU type\n", cn->full_name); + continue; + } + + rate = of_get_property(cn, "clock-frequency", &len); + if (!rate || len != 4) { + pr_err("%s: Missing clock-frequency property\n", + cn->full_name); + continue; + } + + capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency; + + /* Save min capacity of the system */ + if (capacity < min_capacity) + min_capacity = capacity; + + /* Save max capacity of the system */ + if (capacity > max_capacity) + max_capacity = capacity; + + cpu_capacity(cpu) = capacity; + } + + /* If min and max capacities are equal we bypass the update of the + * cpu_scale because all CPUs have the same capacity. Otherwise, we + * compute a middle_capacity factor that will ensure that the capacity + * of an 'average' CPU of the system will be as close as possible to + * SCHED_POWER_SCALE, which is the default value, but with the + * constraint explained near table_efficiency[]. + */ + if (min_capacity == max_capacity) + return; + else if (4 * max_capacity < (3 * (max_capacity + min_capacity))) + middle_capacity = (min_capacity + max_capacity) + >> (SCHED_POWER_SHIFT+1); + else + middle_capacity = ((max_capacity / 3) + >> (SCHED_POWER_SHIFT-1)) + 1; + +} + +/* + * Look for a customed capacity of a CPU in the cpu_topo_data table during the + * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the + * function returns directly for SMP system. + */ +static void update_cpu_power(unsigned int cpu) +{ + if (!cpu_capacity(cpu)) + return; + + set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity); + + pr_info("CPU%u: update cpu_power %lu\n", + cpu, arch_scale_freq_power(NULL, cpu)); +} + +#else +static inline void parse_dt_topology(void) {} +static inline void update_cpu_power(unsigned int cpuid) {} +#endif + /* * cpu topology table */ @@ -71,6 +229,8 @@ void store_cpu_topology(unsigned int cpuid) pr_info("CPU%u: No topology information configured\n", cpuid); else update_siblings_masks(cpuid); + + update_cpu_power(cpuid); } @@ -119,6 +279,10 @@ void __init init_cpu_topology(void) cpu_topo->socket_id = -1; cpumask_clear(&cpu_topo->core_sibling); cpumask_clear(&cpu_topo->thread_sibling); + + set_power_scale(cpu, SCHED_POWER_SCALE); } smp_wmb(); + + parse_dt_topology(); } From e101473817ddc5e10919e4e22c3b421a350ad742 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 11 Dec 2013 12:45:29 +0000 Subject: [PATCH 21/80] arm64: topology: Add support for topology DT bindings Add support for parsing the explicit topology bindings to discover the topology of the system. Since it is not currently clear how to map multi-level clusters for the scheduler all leaf clusters are presented to the scheduler at the same level. This should be enough to provide good support for current systems. Signed-off-by: Mark Brown --- arch/arm64/kernel/topology.c | 130 +++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index c88970b1b863..83d6919d4e0a 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -79,6 +79,121 @@ static unsigned long *__cpu_capacity; #define cpu_capacity(cpu) __cpu_capacity[cpu] static unsigned long middle_capacity = 1; +static int cluster_id; + +static int __init get_cpu_for_node(struct device_node *node) +{ + struct device_node *cpu_node; + int cpu; + + cpu_node = of_parse_phandle(node, "cpu", 0); + if (!cpu_node) { + pr_crit("%s: Unable to parse CPU phandle\n", node->full_name); + return -1; + } + + for_each_possible_cpu(cpu) { + if (of_get_cpu_node(cpu, NULL) == cpu_node) + return cpu; + } + + pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name); + return -1; +} + +static void __init parse_core(struct device_node *core, int core_id) +{ + char name[10]; + bool leaf = true; + int i, cpu; + struct device_node *t; + + i = 0; + do { + snprintf(name, sizeof(name), "thread%d", i); + t = of_get_child_by_name(core, name); + if (t) { + leaf = false; + cpu = get_cpu_for_node(t); + if (cpu) { + pr_info("CPU%d: socket %d core %d thread %d\n", + cpu, cluster_id, core_id, i); + cpu_topology[cpu].socket_id = cluster_id; + cpu_topology[cpu].core_id = core_id; + cpu_topology[cpu].thread_id = i; + } else { + pr_err("%s: Can't get CPU for thread\n", + t->full_name); + } + } + i++; + } while (t); + + cpu = get_cpu_for_node(core); + if (cpu >= 0) { + if (!leaf) { + pr_err("%s: Core has both threads and CPU\n", + core->full_name); + return; + } + + pr_info("CPU%d: socket %d core %d\n", + cpu, cluster_id, core_id); + cpu_topology[cpu].socket_id = cluster_id; + cpu_topology[cpu].core_id = core_id; + } else if (leaf) { + pr_err("%s: Can't get CPU for leaf core\n", core->full_name); + } +} + +static void __init parse_cluster(struct device_node *cluster) +{ + char name[10]; + bool leaf = true; + bool has_cores = false; + struct device_node *c; + int core_id = 0; + int i; + + /* + * First check for child clusters; we currently ignore any + * information about the nesting of clusters and present the + * scheduler with a flat list of them. + */ + i = 0; + do { + snprintf(name, sizeof(name), "cluster%d", i); + c = of_get_child_by_name(cluster, name); + if (c) { + parse_cluster(c); + leaf = false; + } + i++; + } while (c); + + /* Now check for cores */ + i = 0; + do { + snprintf(name, sizeof(name), "core%d", i); + c = of_get_child_by_name(cluster, name); + if (c) { + has_cores = true; + + if (leaf) + parse_core(c, core_id++); + else + pr_err("%s: Non-leaf cluster with core %s\n", + cluster->full_name, name); + } + i++; + } while (c); + + if (leaf && !has_cores) + pr_warn("%s: empty cluster\n", cluster->full_name); + + if (leaf) + cluster_id++; +} /* * Iterate all CPUs' descriptor in DT and compute the efficiency @@ -100,6 +215,21 @@ static void __init parse_dt_topology(void) alloc_size = nr_cpu_ids * sizeof(*__cpu_capacity); __cpu_capacity = kzalloc(alloc_size, GFP_NOWAIT); + cn = of_find_node_by_path("/cpus"); + if (!cn) { + pr_err("No CPU information found in DT\n"); + return; + } + + /* + * If topology is provided as a cpu-map it is essentially a + * root cluster. + */ + cn = of_find_node_by_name(cn, "cpu-map"); + if (!cn) + return; + parse_cluster(cn); + for_each_possible_cpu(cpu) { const u32 *rate; int len; From 26b545351664d9ec3a7f9aff4100a05558c8db84 Mon Sep 17 00:00:00 2001 From: Mark Hambleton Date: Mon, 2 Dec 2013 18:10:44 +0000 Subject: [PATCH 22/80] arm64: cmpxchg: update macros to prevent warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure the value we are going to return is referenced in order to avoid warnings from newer GCCs such as: arch/arm64/include/asm/cmpxchg.h:162:3: warning: value computed is not used [-Wunused-value] ((__typeof__(*(ptr)))__cmpxchg_mb((ptr), \ ^ net/netfilter/nf_conntrack_core.c:674:2: note: in expansion of macro ‘cmpxchg’ cmpxchg(&nf_conntrack_hash_rnd, 0, rand); [Modified to use the current underlying implementation as current mainline for both cmpxchg() and cmpxchg_local() does -- broonie] [In review upstream -- broonie] Signed-off-by: Mark Hambleton Signed-off-by: Mark Brown --- arch/arm64/include/asm/cmpxchg.h | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 8a8ce0e73a38..68d7c932a9c3 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -158,17 +158,23 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, return ret; } -#define cmpxchg(ptr,o,n) \ - ((__typeof__(*(ptr)))__cmpxchg_mb((ptr), \ - (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) +#define cmpxchg(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __cmpxchg_mb((ptr), (unsigned long)(o), (unsigned long)(n), \ + sizeof(*(ptr))); \ + __ret; \ +}) -#define cmpxchg_local(ptr,o,n) \ - ((__typeof__(*(ptr)))__cmpxchg((ptr), \ - (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) +#define cmpxchg_local(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __cmpxchg((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr))); \ + __ret; \ +}) #define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n)) #define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n)) From daba76170151aa64ba29d46c9426b4d0b204601d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 29 Nov 2013 12:47:34 +0100 Subject: [PATCH 23/80] ALSA: hda - Fix silent output on ASUS W7J laptop commit 6ddf0fd1c462a418a3cbb8b0653820dc48ffbd98 upstream. The recent kernels got regressions on ASUS W7J with ALC660 codec where no sound comes out. After a long debugging session, we found out that setting the pin control on the unused NID 0x10 is mandatory for the outputs. And, it was found out that another magic of NID 0x0f that is required for other ASUS laptops isn't needed on this machine. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=66081 Reported-and-tested-by: Andrey Lipaev Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 661afe7d85e4..8b7c9e86b7d8 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3896,6 +3896,7 @@ enum { ALC861_FIXUP_AMP_VREF_0F, ALC861_FIXUP_NO_JACK_DETECT, ALC861_FIXUP_ASUS_A6RP, + ALC660_FIXUP_ASUS_W7J, }; /* On some laptops, VREF of pin 0x0f is abused for controlling the main amp */ @@ -3945,10 +3946,21 @@ static const struct hda_fixup alc861_fixups[] = { .v.func = alc861_fixup_asus_amp_vref_0f, .chained = true, .chain_id = ALC861_FIXUP_NO_JACK_DETECT, + }, + [ALC660_FIXUP_ASUS_W7J] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* ASUS W7J needs a magic pin setup on unused NID 0x10 + * for enabling outputs + */ + {0x10, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24}, + { } + }, } }; static const struct snd_pci_quirk alc861_fixup_tbl[] = { + SND_PCI_QUIRK(0x1043, 0x1253, "ASUS W7J", ALC660_FIXUP_ASUS_W7J), SND_PCI_QUIRK(0x1043, 0x1393, "ASUS A6Rp", ALC861_FIXUP_ASUS_A6RP), SND_PCI_QUIRK_VENDOR(0x1043, "ASUS laptop", ALC861_FIXUP_AMP_VREF_0F), SND_PCI_QUIRK(0x1462, 0x7254, "HP DX2200", ALC861_FIXUP_NO_JACK_DETECT), From e0979ec7e098007b943b6e779093eb62b4551884 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 2 Dec 2013 15:27:19 +0100 Subject: [PATCH 24/80] ALSA: hda - Another fixup for ASUS laptop with ALC660 codec commit e7ca237bfcf6a288702cb95e94ab94f642ccad88 upstream. ASUS Z35HL laptop also needs the very same fix as the previous one that was applied to ASUS W7J. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=66231 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8b7c9e86b7d8..db5168555a14 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3961,6 +3961,7 @@ static const struct hda_fixup alc861_fixups[] = { static const struct snd_pci_quirk alc861_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1253, "ASUS W7J", ALC660_FIXUP_ASUS_W7J), + SND_PCI_QUIRK(0x1043, 0x1263, "ASUS Z35HL", ALC660_FIXUP_ASUS_W7J), SND_PCI_QUIRK(0x1043, 0x1393, "ASUS A6Rp", ALC861_FIXUP_ASUS_A6RP), SND_PCI_QUIRK_VENDOR(0x1043, "ASUS laptop", ALC861_FIXUP_AMP_VREF_0F), SND_PCI_QUIRK(0x1462, 0x7254, "HP DX2200", ALC861_FIXUP_NO_JACK_DETECT), From c4fcb3cd61aecde82df7d9856e3d2201e908ca65 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Mon, 2 Dec 2013 18:06:20 +0800 Subject: [PATCH 25/80] ALSA: hda - Fix headset mic input after muted internal mic (Dell/Realtek) commit d59915d0655c5864b514f21daaeac98c047875dc upstream. By trial and error, I found this patch could work around an issue where the headset mic would stop working if you switch between the internal mic and the headset mic, and the internal mic was muted. It still takes a second or two before the headset mic actually starts working, but still better than nothing. Information update from Kailang: The verb was ADC digital mute(bit 6 default 1). Switch internal mic and headset mic will run alc_headset_mode_default. The coef index 0x11 will set to 0x0041. Because headset mode was fixed type. It doesn't need to run alc_determine_headset_type. So, the value still keep 0x0041. ADC was muted. BugLink: https://bugs.launchpad.net/bugs/1256840 Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index db5168555a14..a7605db321a4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3041,6 +3041,7 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) alc_write_coef_idx(codec, 0x18, 0x7388); break; case 0x10ec0668: + alc_write_coef_idx(codec, 0x11, 0x0001); alc_write_coef_idx(codec, 0x15, 0x0d60); alc_write_coef_idx(codec, 0xc3, 0x0000); break; @@ -3063,6 +3064,7 @@ static void alc_headset_mode_omtp(struct hda_codec *codec) alc_write_coef_idx(codec, 0x18, 0x7388); break; case 0x10ec0668: + alc_write_coef_idx(codec, 0x11, 0x0001); alc_write_coef_idx(codec, 0x15, 0x0d50); alc_write_coef_idx(codec, 0xc3, 0x0000); break; From 64871fb0be804a971bcdc5db337101d2537167b4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 4 Dec 2013 13:59:45 +0100 Subject: [PATCH 26/80] ALSA: hda - Fix silent output on MacBook Air 2,1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0756f09c4946fe2d9ce2ebcb6f2e3c58830d22a3 upstream. MacBook Air 2,1 has a fairly different pin assignment from its brother MBA 1,1, and yet another quirks are needed for pin 0x18 and 0x19, similarly like what iMac 9,1 requires, in order to make the sound working on it. Reported-and-tested-by: Bruno Prémont Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 39 ++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a7605db321a4..e755f5109d84 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1763,6 +1763,7 @@ enum { ALC889_FIXUP_DAC_ROUTE, ALC889_FIXUP_MBP_VREF, ALC889_FIXUP_IMAC91_VREF, + ALC889_FIXUP_MBA21_VREF, ALC882_FIXUP_INV_DMIC, ALC882_FIXUP_NO_PRIMARY_HP, ALC887_FIXUP_ASUS_BASS, @@ -1866,17 +1867,13 @@ static void alc889_fixup_mbp_vref(struct hda_codec *codec, } } -/* Set VREF on speaker pins on imac91 */ -static void alc889_fixup_imac91_vref(struct hda_codec *codec, - const struct hda_fixup *fix, int action) +static void alc889_fixup_mac_pins(struct hda_codec *codec, + const hda_nid_t *nids, int num_nids) { struct alc_spec *spec = codec->spec; - static hda_nid_t nids[2] = { 0x18, 0x1a }; int i; - if (action != HDA_FIXUP_ACT_INIT) - return; - for (i = 0; i < ARRAY_SIZE(nids); i++) { + for (i = 0; i < num_nids; i++) { unsigned int val; val = snd_hda_codec_get_pin_target(codec, nids[i]); val |= AC_PINCTL_VREF_50; @@ -1885,6 +1882,26 @@ static void alc889_fixup_imac91_vref(struct hda_codec *codec, spec->gen.keep_vref_in_automute = 1; } +/* Set VREF on speaker pins on imac91 */ +static void alc889_fixup_imac91_vref(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + static hda_nid_t nids[2] = { 0x18, 0x1a }; + + if (action == HDA_FIXUP_ACT_INIT) + alc889_fixup_mac_pins(codec, nids, ARRAY_SIZE(nids)); +} + +/* Set VREF on speaker pins on mba21 */ +static void alc889_fixup_mba21_vref(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + static hda_nid_t nids[2] = { 0x18, 0x19 }; + + if (action == HDA_FIXUP_ACT_INIT) + alc889_fixup_mac_pins(codec, nids, ARRAY_SIZE(nids)); +} + /* Don't take HP output as primary * Strangely, the speaker output doesn't work on Vaio Z and some Vaio * all-in-one desktop PCs (for example VGC-LN51JGB) through DAC 0x05 @@ -2079,6 +2096,12 @@ static const struct hda_fixup alc882_fixups[] = { .chained = true, .chain_id = ALC882_FIXUP_GPIO1, }, + [ALC889_FIXUP_MBA21_VREF] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc889_fixup_mba21_vref, + .chained = true, + .chain_id = ALC889_FIXUP_MBP_VREF, + }, [ALC882_FIXUP_INV_DMIC] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_inv_dmic_0x12, @@ -2143,7 +2166,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x106b, 0x3000, "iMac", ALC889_FIXUP_MBP_VREF), SND_PCI_QUIRK(0x106b, 0x3200, "iMac 7,1 Aluminum", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x106b, 0x3400, "MacBookAir 1,1", ALC889_FIXUP_MBP_VREF), - SND_PCI_QUIRK(0x106b, 0x3500, "MacBookAir 2,1", ALC889_FIXUP_MBP_VREF), + SND_PCI_QUIRK(0x106b, 0x3500, "MacBookAir 2,1", ALC889_FIXUP_MBA21_VREF), SND_PCI_QUIRK(0x106b, 0x3600, "Macbook 3,1", ALC889_FIXUP_MBP_VREF), SND_PCI_QUIRK(0x106b, 0x3800, "MacbookPro 4,1", ALC889_FIXUP_MBP_VREF), SND_PCI_QUIRK(0x106b, 0x3e00, "iMac 24 Aluminum", ALC885_FIXUP_MACPRO_GPIO), From 87a226e9df5efd1c7d26756fab9e9f29a267ac47 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Fri, 29 Nov 2013 15:10:20 +0800 Subject: [PATCH 27/80] ALSA: hda - Add mono speaker quirk for Dell Inspiron 5439 commit eb82594b75b0cf54c667189e061934b7c49b5d42 upstream. This machine also has mono output if run through DAC node 0x03. BugLink: https://bugs.launchpad.net/bugs/1256212 Tested-by: David Chen Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e755f5109d84..937079a5265a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3657,6 +3657,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0613, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0614, "Dell Inspiron 3135", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_MONO_SPEAKERS), + SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), SND_PCI_QUIRK(0x103c, 0x18e6, "HP", ALC269_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x1973, "HP Pavilion", ALC269_FIXUP_HP_MUTE_LED_MIC1), From da94360f55be9d57c866f4d6b0861d9ecd1a1aeb Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 19 Nov 2013 17:12:47 +0100 Subject: [PATCH 28/80] crypto: s390 - Fix aes-xts parameter corruption commit 9dda2769af4f3f3093434648c409bb351120d9e8 upstream. Some s390 crypto algorithms incorrectly use the crypto_tfm structure to store private data. As the tfm can be shared among multiple threads, this can result in data corruption. This patch fixes aes-xts by moving the xts and pcc parameter blocks from the tfm onto the stack (48 + 96 bytes). Signed-off-by: Gerald Schaefer Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/s390/crypto/aes_s390.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 2e4b5be31a1b..94e20dd2729f 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -55,8 +55,7 @@ struct pcc_param { struct s390_xts_ctx { u8 key[32]; - u8 xts_param[16]; - struct pcc_param pcc; + u8 pcc_key[32]; long enc; long dec; int key_len; @@ -591,7 +590,7 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, xts_ctx->enc = KM_XTS_128_ENCRYPT; xts_ctx->dec = KM_XTS_128_DECRYPT; memcpy(xts_ctx->key + 16, in_key, 16); - memcpy(xts_ctx->pcc.key + 16, in_key + 16, 16); + memcpy(xts_ctx->pcc_key + 16, in_key + 16, 16); break; case 48: xts_ctx->enc = 0; @@ -602,7 +601,7 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, xts_ctx->enc = KM_XTS_256_ENCRYPT; xts_ctx->dec = KM_XTS_256_DECRYPT; memcpy(xts_ctx->key, in_key, 32); - memcpy(xts_ctx->pcc.key, in_key + 32, 32); + memcpy(xts_ctx->pcc_key, in_key + 32, 32); break; default: *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; @@ -621,29 +620,33 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, long func, unsigned int nbytes = walk->nbytes; unsigned int n; u8 *in, *out; - void *param; + struct pcc_param pcc_param; + struct { + u8 key[32]; + u8 init[16]; + } xts_param; if (!nbytes) goto out; - memset(xts_ctx->pcc.block, 0, sizeof(xts_ctx->pcc.block)); - memset(xts_ctx->pcc.bit, 0, sizeof(xts_ctx->pcc.bit)); - memset(xts_ctx->pcc.xts, 0, sizeof(xts_ctx->pcc.xts)); - memcpy(xts_ctx->pcc.tweak, walk->iv, sizeof(xts_ctx->pcc.tweak)); - param = xts_ctx->pcc.key + offset; - ret = crypt_s390_pcc(func, param); + memset(pcc_param.block, 0, sizeof(pcc_param.block)); + memset(pcc_param.bit, 0, sizeof(pcc_param.bit)); + memset(pcc_param.xts, 0, sizeof(pcc_param.xts)); + memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak)); + memcpy(pcc_param.key, xts_ctx->pcc_key, 32); + ret = crypt_s390_pcc(func, &pcc_param.key[offset]); if (ret < 0) return -EIO; - memcpy(xts_ctx->xts_param, xts_ctx->pcc.xts, 16); - param = xts_ctx->key + offset; + memcpy(xts_param.key, xts_ctx->key, 32); + memcpy(xts_param.init, pcc_param.xts, 16); do { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); out = walk->dst.virt.addr; in = walk->src.virt.addr; - ret = crypt_s390_km(func, param, out, in, n); + ret = crypt_s390_km(func, &xts_param.key[offset], out, in, n); if (ret < 0 || ret != n) return -EIO; From a426ea27b26cca933cba53e189d6478bd04eddd1 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 12 Nov 2013 11:46:10 -0600 Subject: [PATCH 29/80] crypto: scatterwalk - Set the chain pointer indication bit commit 41da8b5adba77e22584f8b45f9641504fa885308 upstream. The scatterwalk_crypto_chain function invokes the scatterwalk_sg_chain function to chain two scatterlists, but the chain pointer indication bit is not set. When the resulting scatterlist is used, for example, by sg_nents to count the number of scatterlist entries, a segfault occurs because sg_nents does not follow the chain pointer to the chained scatterlist. Update scatterwalk_sg_chain to set the chain pointer indication bit as is done by the sg_chain function. Signed-off-by: Tom Lendacky Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- include/crypto/scatterwalk.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h index 3744d2a642df..09ef1a0c5499 100644 --- a/include/crypto/scatterwalk.h +++ b/include/crypto/scatterwalk.h @@ -36,6 +36,7 @@ static inline void scatterwalk_sg_chain(struct scatterlist *sg1, int num, { sg_set_page(&sg1[num - 1], (void *)sg2, 0, 0); sg1[num - 1].page_link &= ~0x02; + sg1[num - 1].page_link |= 0x01; } static inline struct scatterlist *scatterwalk_sg_next(struct scatterlist *sg) From bf2e52300b9e5a329c5f76ba8d77e33ade976b1c Mon Sep 17 00:00:00 2001 From: Horia Geanta Date: Thu, 28 Nov 2013 15:11:15 +0200 Subject: [PATCH 30/80] crypto: ccm - Fix handling of zero plaintext when computing mac commit 5638cabf3e4883f38dfb246c30980cebf694fbda upstream. There are cases when cryptlen can be zero in crypto_ccm_auth(): -encryptiom: input scatterlist length is zero (no plaintext) -decryption: input scatterlist contains only the mac plus the condition of having different source and destination buffers (or else scatterlist length = max(plaintext_len, ciphertext_len)). These are not handled correctly, leading to crashes like: root@p4080ds:~/crypto# insmod tcrypt.ko mode=45 ------------[ cut here ]------------ kernel BUG at crypto/scatterwalk.c:37! Oops: Exception in kernel mode, sig: 5 [#1] SMP NR_CPUS=8 P4080 DS Modules linked in: tcrypt(+) crc32c xts xcbc vmac pcbc ecb gcm ghash_generic gf128mul ccm ctr seqiv CPU: 3 PID: 1082 Comm: cryptomgr_test Not tainted 3.11.0 #14 task: ee12c5b0 ti: eecd0000 task.ti: eecd0000 NIP: c0204d98 LR: f9225848 CTR: c0204d80 REGS: eecd1b70 TRAP: 0700 Not tainted (3.11.0) MSR: 00029002 CR: 22044022 XER: 20000000 GPR00: f9225c94 eecd1c20 ee12c5b0 eecd1c28 ee879400 ee879400 00000000 ee607464 GPR08: 00000001 00000001 00000000 006b0000 c0204d80 00000000 00000002 c0698e20 GPR16: ee987000 ee895000 fffffff4 ee879500 00000100 eecd1d58 00000001 00000000 GPR24: ee879400 00000020 00000000 00000000 ee5b2800 ee607430 00000004 ee607460 NIP [c0204d98] scatterwalk_start+0x18/0x30 LR [f9225848] get_data_to_compute+0x28/0x2f0 [ccm] Call Trace: [eecd1c20] [f9225974] get_data_to_compute+0x154/0x2f0 [ccm] (unreliable) [eecd1c70] [f9225c94] crypto_ccm_auth+0x184/0x1d0 [ccm] [eecd1cb0] [f9225d40] crypto_ccm_encrypt+0x60/0x2d0 [ccm] [eecd1cf0] [c020d77c] __test_aead+0x3ec/0xe20 [eecd1e20] [c020f35c] test_aead+0x6c/0xe0 [eecd1e40] [c020f420] alg_test_aead+0x50/0xd0 [eecd1e60] [c020e5e4] alg_test+0x114/0x2e0 [eecd1ee0] [c020bd1c] cryptomgr_test+0x4c/0x60 [eecd1ef0] [c0047058] kthread+0xa8/0xb0 [eecd1f40] [c000eb0c] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 0f080000 81290024 552807fe 0f080000 5529003a 4bffffb4 90830000 39400000 39000001 8124000c 2f890000 7d28579e <0f090000> 81240008 91230004 4e800020 ---[ end trace 6d652dfcd1be37bd ]--- Cc: Jussi Kivilinna Signed-off-by: Horia Geanta Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/ccm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/ccm.c b/crypto/ccm.c index 499c91717d93..ed009b77e67d 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -271,7 +271,8 @@ static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain, } /* compute plaintext into mac */ - get_data_to_compute(cipher, pctx, plain, cryptlen); + if (cryptlen) + get_data_to_compute(cipher, pctx, plain, cryptlen); out: return err; From 1c651d3627a2d3ba10f2f116303ca620e17b14d5 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 12 Nov 2013 11:46:04 -0600 Subject: [PATCH 31/80] crypto: authenc - Find proper IV address in ablkcipher callback commit fc019c7122dfcd69c50142b57a735539aec5da95 upstream. When performing an asynchronous ablkcipher operation the authenc completion callback routine is invoked, but it does not locate and use the proper IV. The callback routine, crypto_authenc_encrypt_done, is updated to use the same method of calculating the address of the IV as is done in crypto_authenc_encrypt function which sets up the callback. Signed-off-by: Tom Lendacky Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/authenc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crypto/authenc.c b/crypto/authenc.c index ffce19de05cf..528b00bc4769 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -368,9 +368,10 @@ static void crypto_authenc_encrypt_done(struct crypto_async_request *req, if (!err) { struct crypto_aead *authenc = crypto_aead_reqtfm(areq); struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); - struct ablkcipher_request *abreq = aead_request_ctx(areq); - u8 *iv = (u8 *)(abreq + 1) + - crypto_ablkcipher_reqsize(ctx->enc); + struct authenc_request_ctx *areq_ctx = aead_request_ctx(areq); + struct ablkcipher_request *abreq = (void *)(areq_ctx->tail + + ctx->reqoff); + u8 *iv = (u8 *)abreq - crypto_ablkcipher_ivsize(ctx->enc); err = crypto_authenc_genicv(areq, iv, 0); } From d47787bdf7cd4dfccfaffdab00b5b593d6270a6a Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 28 Nov 2013 21:43:40 +0000 Subject: [PATCH 32/80] ARM: fix booting low-vectors machines commit d8aa712c30148ba26fd89a5dc14de95d4c375184 upstream. Commit f6f91b0d9fd9 (ARM: allow kuser helpers to be removed from the vector page) required two pages for the vectors code. Although the code setting up the initial page tables was updated, the code which allocates page tables for new processes wasn't, neither was the code which tears down the mappings. Fix this. Fixes: f6f91b0d9fd9 ("ARM: allow kuser helpers to be removed from the vector page") Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/pgtable.h | 2 +- arch/arm/mm/mmap.c | 2 +- arch/arm/mm/pgd.c | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 9bcd262a9008..5aac06fcc97e 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -58,7 +58,7 @@ extern void __pgd_error(const char *file, int line, pgd_t); * mapping to be mapped at. This is particularly important for * non-high vector CPUs. */ -#define FIRST_USER_ADDRESS PAGE_SIZE +#define FIRST_USER_ADDRESS (PAGE_SIZE * 2) /* * Use TASK_SIZE as the ceiling argument for free_pgtables() and diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 10062ceadd1c..f0ef2f7d4ad7 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -146,7 +146,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; - info.low_limit = PAGE_SIZE; + info.low_limit = FIRST_USER_ADDRESS; info.high_limit = mm->mmap_base; info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; info.align_offset = pgoff << PAGE_SHIFT; diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index 0acb089d0f70..1046b373d1ae 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c @@ -87,7 +87,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm) init_pud = pud_offset(init_pgd, 0); init_pmd = pmd_offset(init_pud, 0); init_pte = pte_offset_map(init_pmd, 0); - set_pte_ext(new_pte, *init_pte, 0); + set_pte_ext(new_pte + 0, init_pte[0], 0); + set_pte_ext(new_pte + 1, init_pte[1], 0); pte_unmap(init_pte); pte_unmap(new_pte); } From d877bdfe0c784f4b1d9e0d0ba151d0f4e6a2d09b Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 28 Nov 2013 21:55:41 +0000 Subject: [PATCH 33/80] ARM: footbridge: fix VGA initialisation commit 43659222e7a0113912ed02f6b2231550b3e471ac upstream. It's no good setting vga_base after the VGA console has been initialised, because if we do that we get this: Unable to handle kernel paging request at virtual address 000b8000 pgd = c0004000 [000b8000] *pgd=07ffc831, *pte=00000000, *ppte=00000000 0Internal error: Oops: 5017 [#1] ARM Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 3.12.0+ #49 task: c03e2974 ti: c03d8000 task.ti: c03d8000 PC is at vgacon_startup+0x258/0x39c LR is at request_resource+0x10/0x1c pc : [] lr : [] psr: 60000053 sp : c03d9f68 ip : 000b8000 fp : c03d9f8c r10: 000055aa r9 : 4401a103 r8 : ffffaa55 r7 : c03e357c r6 : c051b460 r5 : 000000ff r4 : 000c0000 r3 : 000b8000 r2 : c03e0514 r1 : 00000000 r0 : c0304971 Flags: nZCv IRQs on FIQs off Mode SVC_32 ISA ARM Segment kernel which is an access to the 0xb8000 without the PCI offset required to make it work. Fixes: cc22b4c18540 ("ARM: set vga memory base at run-time") Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-footbridge/common.c | 3 +++ arch/arm/mach-footbridge/dc21285.c | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-footbridge/common.c b/arch/arm/mach-footbridge/common.c index a42b369bc439..95d0b6e6438b 100644 --- a/arch/arm/mach-footbridge/common.c +++ b/arch/arm/mach-footbridge/common.c @@ -15,6 +15,7 @@ #include #include #include +#include