From 7bb39313cd6239e7eb95198950a02b4ad2a08316 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 23 Dec 2020 17:04:19 -0800 Subject: [PATCH 1/3] x86/mce: Make mce_timed_out() identify holdout CPUs The "Timeout: Not all CPUs entered broadcast exception handler" message will appear from time to time given enough systems, but this message does not identify which CPUs failed to enter the broadcast exception handler. This information would be valuable if available, for example, in order to correlate with other hardware-oriented error messages. Add a cpumask of CPUs which maintains which CPUs have entered this handler, and print out which ones failed to enter in the event of a timeout. [ bp: Massage. ] Reported-by: Jonathan Lemon Signed-off-by: Paul E. McKenney Signed-off-by: Borislav Petkov Tested-by: Tony Luck Link: https://lkml.kernel.org/r/20210106174102.GA23874@paulmck-ThinkPad-P72 --- arch/x86/kernel/cpu/mce/core.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 13d3f1cbda17..6c81d0998e0a 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -877,6 +877,12 @@ static atomic_t mce_executing; */ static atomic_t mce_callin; +/* + * Track which CPUs entered the MCA broadcast synchronization and which not in + * order to print holdouts. + */ +static cpumask_t mce_missing_cpus = CPU_MASK_ALL; + /* * Check if a timeout waiting for other CPUs happened. */ @@ -894,8 +900,12 @@ static int mce_timed_out(u64 *t, const char *msg) if (!mca_cfg.monarch_timeout) goto out; if ((s64)*t < SPINUNIT) { - if (mca_cfg.tolerant <= 1) + if (mca_cfg.tolerant <= 1) { + if (cpumask_and(&mce_missing_cpus, cpu_online_mask, &mce_missing_cpus)) + pr_emerg("CPUs not responding to MCE broadcast (may include false positives): %*pbl\n", + cpumask_pr_args(&mce_missing_cpus)); mce_panic(msg, NULL, NULL); + } cpu_missing = 1; return 1; } @@ -1006,6 +1016,7 @@ static int mce_start(int *no_way_out) * is updated before mce_callin. */ order = atomic_inc_return(&mce_callin); + cpumask_clear_cpu(smp_processor_id(), &mce_missing_cpus); /* * Wait for everyone. @@ -1114,6 +1125,7 @@ static int mce_end(int order) reset: atomic_set(&global_nwo, 0); atomic_set(&mce_callin, 0); + cpumask_setall(&mce_missing_cpus); barrier(); /* @@ -2712,6 +2724,7 @@ static void mce_reset(void) atomic_set(&mce_executing, 0); atomic_set(&mce_callin, 0); atomic_set(&global_nwo, 0); + cpumask_setall(&mce_missing_cpus); } static int fake_panic_get(void *data, u64 *val) From 4f432e8bb15b352da72525144da025a46695968f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 7 Jan 2021 13:23:34 +0100 Subject: [PATCH 2/3] x86/mce: Get rid of mcheck_intel_therm_init() Move the APIC_LVTTHMR read which needs to happen on the BSP, to intel_init_thermal(). One less boot dependency. No functional changes. Signed-off-by: Borislav Petkov Tested-by: Srinivas Pandruvada Link: https://lkml.kernel.org/r/20210201142704.12495-2-bp@alien8.de --- arch/x86/include/asm/mce.h | 6 ------ arch/x86/kernel/cpu/mce/core.c | 1 - arch/x86/kernel/cpu/mce/therm_throt.c | 15 ++++----------- 3 files changed, 4 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 56cdeaac76a0..def9aa5e1fa4 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -304,12 +304,6 @@ extern int (*platform_thermal_package_notify)(__u64 msr_val); * callback has rate control */ extern bool (*platform_thermal_package_rate_control)(void); -#ifdef CONFIG_X86_THERMAL_VECTOR -extern void mcheck_intel_therm_init(void); -#else -static inline void mcheck_intel_therm_init(void) { } -#endif - /* * Used by APEI to report memory error via /dev/mcelog */ diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 6c81d0998e0a..0cb065e6786f 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -2189,7 +2189,6 @@ __setup("mce", mcheck_enable); int __init mcheck_init(void) { - mcheck_intel_therm_init(); mce_register_decode_chain(&early_nb); mce_register_decode_chain(&mce_uc_nb); mce_register_decode_chain(&mce_default_nb); diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c index a7cd2d203ced..5b15d7cef1d1 100644 --- a/arch/x86/kernel/cpu/mce/therm_throt.c +++ b/arch/x86/kernel/cpu/mce/therm_throt.c @@ -633,17 +633,6 @@ static int intel_thermal_supported(struct cpuinfo_x86 *c) return 1; } -void __init mcheck_intel_therm_init(void) -{ - /* - * This function is only called on boot CPU. Save the init thermal - * LVT value on BSP and use that value to restore APs' thermal LVT - * entry BIOS programmed later - */ - if (intel_thermal_supported(&boot_cpu_data)) - lvtthmr_init = apic_read(APIC_LVTTHMR); -} - void intel_init_thermal(struct cpuinfo_x86 *c) { unsigned int cpu = smp_processor_id(); @@ -653,6 +642,10 @@ void intel_init_thermal(struct cpuinfo_x86 *c) if (!intel_thermal_supported(c)) return; + /* On the BSP? */ + if (c == &boot_cpu_data) + lvtthmr_init = apic_read(APIC_LVTTHMR); + /* * First check if its enabled already, in which case there might * be some SMM goo which handles it, so we can't even put a handler From 9223d0dccb8f8523754122f68316dd1a4f39f7f8 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 7 Jan 2021 13:29:05 +0100 Subject: [PATCH 3/3] thermal: Move therm_throt there from x86/mce This functionality has nothing to do with MCE, move it to the thermal framework and untangle it from MCE. Requested-by: Peter Zijlstra Signed-off-by: Borislav Petkov Reviewed-by: Srinivas Pandruvada Tested-by: Srinivas Pandruvada Link: https://lkml.kernel.org/r/20210202121003.GD18075@zn.tnic --- arch/x86/Kconfig | 4 --- arch/x86/include/asm/mce.h | 16 ---------- arch/x86/include/asm/thermal.h | 13 ++++++++ arch/x86/kernel/cpu/intel.c | 3 ++ arch/x86/kernel/cpu/mce/Makefile | 2 -- arch/x86/kernel/cpu/mce/intel.c | 1 - arch/x86/kernel/irq.c | 21 ++++++++++++ drivers/thermal/intel/Kconfig | 4 +++ drivers/thermal/intel/Makefile | 1 + .../thermal/intel}/therm_throt.c | 32 ++++++------------- drivers/thermal/intel/thermal_interrupt.h | 15 +++++++++ drivers/thermal/intel/x86_pkg_temp_thermal.c | 4 ++- 12 files changed, 69 insertions(+), 47 deletions(-) create mode 100644 arch/x86/include/asm/thermal.h rename {arch/x86/kernel/cpu/mce => drivers/thermal/intel}/therm_throt.c (97%) create mode 100644 drivers/thermal/intel/thermal_interrupt.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7b6dd10b162a..54df3151cdb6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1157,10 +1157,6 @@ config X86_MCE_INJECT If you don't know what a machine check is and you don't do kernel QA it is safe to say n. -config X86_THERMAL_VECTOR - def_bool y - depends on X86_MCE_INTEL - source "arch/x86/events/Kconfig" config X86_LEGACY_VM86 diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index def9aa5e1fa4..ddfb3cad8dff 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -288,22 +288,6 @@ extern void (*mce_threshold_vector)(void); /* Deferred error interrupt handler */ extern void (*deferred_error_int_vector)(void); -/* - * Thermal handler - */ - -void intel_init_thermal(struct cpuinfo_x86 *c); - -/* Interrupt Handler for core thermal thresholds */ -extern int (*platform_thermal_notify)(__u64 msr_val); - -/* Interrupt Handler for package thermal thresholds */ -extern int (*platform_thermal_package_notify)(__u64 msr_val); - -/* Callback support of rate control, return true, if - * callback has rate control */ -extern bool (*platform_thermal_package_rate_control)(void); - /* * Used by APEI to report memory error via /dev/mcelog */ diff --git a/arch/x86/include/asm/thermal.h b/arch/x86/include/asm/thermal.h new file mode 100644 index 000000000000..ddbdefd5b94f --- /dev/null +++ b/arch/x86/include/asm/thermal.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_THERMAL_H +#define _ASM_X86_THERMAL_H + +#ifdef CONFIG_X86_THERMAL_VECTOR +void intel_init_thermal(struct cpuinfo_x86 *c); +bool x86_thermal_enabled(void); +void intel_thermal_interrupt(void); +#else +static inline void intel_init_thermal(struct cpuinfo_x86 *c) { } +#endif + +#endif /* _ASM_X86_THERMAL_H */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 59a1e3ce3f14..71221af87cb1 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -24,6 +24,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 #include @@ -719,6 +720,8 @@ static void init_intel(struct cpuinfo_x86 *c) tsx_disable(); split_lock_init(); + + intel_init_thermal(c); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/mce/Makefile b/arch/x86/kernel/cpu/mce/Makefile index 9f020c994154..015856abdbb1 100644 --- a/arch/x86/kernel/cpu/mce/Makefile +++ b/arch/x86/kernel/cpu/mce/Makefile @@ -9,8 +9,6 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o mce-inject-y := inject.o obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o -obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o - obj-$(CONFIG_ACPI_APEI) += apei.o obj-$(CONFIG_X86_MCELOG_LEGACY) += dev-mcelog.o diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index c2476fe0682e..e309476743b7 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -531,7 +531,6 @@ static void intel_imc_init(struct cpuinfo_x86 *c) void mce_intel_feature_init(struct cpuinfo_x86 *c) { - intel_init_thermal(c); intel_init_cmci(); intel_init_lmce(); intel_ppin_init(c); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index c5dd50369e2f..d4ad344e80bf 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -21,6 +21,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -374,3 +375,23 @@ void fixup_irqs(void) } } #endif + +#ifdef CONFIG_X86_THERMAL_VECTOR +static void smp_thermal_vector(void) +{ + if (x86_thermal_enabled()) + intel_thermal_interrupt(); + else + pr_err("CPU%d: Unexpected LVT thermal interrupt!\n", + smp_processor_id()); +} + +DEFINE_IDTENTRY_SYSVEC(sysvec_thermal) +{ + trace_thermal_apic_entry(THERMAL_APIC_VECTOR); + inc_irq_stat(irq_thermal_count); + smp_thermal_vector(); + trace_thermal_apic_exit(THERMAL_APIC_VECTOR); + ack_APIC_irq(); +} +#endif diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig index 8025b21f43fa..ce4f59213c7a 100644 --- a/drivers/thermal/intel/Kconfig +++ b/drivers/thermal/intel/Kconfig @@ -8,6 +8,10 @@ config INTEL_POWERCLAMP enforce idle time which results in more package C-state residency. The user interface is exposed via generic thermal framework. +config X86_THERMAL_VECTOR + def_bool y + depends on X86 && CPU_SUP_INTEL && X86_LOCAL_APIC + config X86_PKG_TEMP_THERMAL tristate "X86 package temperature thermal driver" depends on X86_THERMAL_VECTOR diff --git a/drivers/thermal/intel/Makefile b/drivers/thermal/intel/Makefile index 0d9736ced5d4..ff2ad30ef397 100644 --- a/drivers/thermal/intel/Makefile +++ b/drivers/thermal/intel/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_INTEL_QUARK_DTS_THERMAL) += intel_quark_dts_thermal.o obj-$(CONFIG_INT340X_THERMAL) += int340x_thermal/ obj-$(CONFIG_INTEL_BXT_PMIC_THERMAL) += intel_bxt_pmic_thermal.o obj-$(CONFIG_INTEL_PCH_THERMAL) += intel_pch_thermal.o +obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/drivers/thermal/intel/therm_throt.c similarity index 97% rename from arch/x86/kernel/cpu/mce/therm_throt.c rename to drivers/thermal/intel/therm_throt.c index 5b15d7cef1d1..f8e882592ba5 100644 --- a/arch/x86/kernel/cpu/mce/therm_throt.c +++ b/drivers/thermal/intel/therm_throt.c @@ -26,13 +26,13 @@ #include #include +#include #include #include -#include +#include #include -#include -#include "internal.h" +#include "thermal_interrupt.h" /* How long to wait between reporting thermal events */ #define CHECK_INTERVAL (300 * HZ) @@ -570,7 +570,7 @@ static void notify_thresholds(__u64 msr_val) } /* Thermal transition interrupt handler */ -static void intel_thermal_interrupt(void) +void intel_thermal_interrupt(void) { __u64 msr_val; @@ -606,23 +606,6 @@ static void intel_thermal_interrupt(void) } } -static void unexpected_thermal_interrupt(void) -{ - pr_err("CPU%d: Unexpected LVT thermal interrupt!\n", - smp_processor_id()); -} - -static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; - -DEFINE_IDTENTRY_SYSVEC(sysvec_thermal) -{ - trace_thermal_apic_entry(THERMAL_APIC_VECTOR); - inc_irq_stat(irq_thermal_count); - smp_thermal_vector(); - trace_thermal_apic_exit(THERMAL_APIC_VECTOR); - ack_APIC_irq(); -} - /* Thermal monitoring depends on APIC, ACPI and clock modulation */ static int intel_thermal_supported(struct cpuinfo_x86 *c) { @@ -633,6 +616,11 @@ static int intel_thermal_supported(struct cpuinfo_x86 *c) return 1; } +bool x86_thermal_enabled(void) +{ + return atomic_read(&therm_throt_en); +} + void intel_init_thermal(struct cpuinfo_x86 *c) { unsigned int cpu = smp_processor_id(); @@ -719,8 +707,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | PACKAGE_THERM_INT_HIGH_ENABLE), h); } - smp_thermal_vector = intel_thermal_interrupt; - rdmsr(MSR_IA32_MISC_ENABLE, l, h); wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); diff --git a/drivers/thermal/intel/thermal_interrupt.h b/drivers/thermal/intel/thermal_interrupt.h new file mode 100644 index 000000000000..53f427bb58dc --- /dev/null +++ b/drivers/thermal/intel/thermal_interrupt.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _INTEL_THERMAL_INTERRUPT_H +#define _INTEL_THERMAL_INTERRUPT_H + +/* Interrupt Handler for package thermal thresholds */ +extern int (*platform_thermal_package_notify)(__u64 msr_val); + +/* Interrupt Handler for core thermal thresholds */ +extern int (*platform_thermal_notify)(__u64 msr_val); + +/* Callback support of rate control, return true, if + * callback has rate control */ +extern bool (*platform_thermal_package_rate_control)(void); + +#endif /* _INTEL_THERMAL_INTERRUPT_H */ diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c index b81c33202f41..295742e83960 100644 --- a/drivers/thermal/intel/x86_pkg_temp_thermal.c +++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c @@ -17,8 +17,10 @@ #include #include #include + #include -#include + +#include "thermal_interrupt.h" /* * Rate control delay: Idea is to introduce denounce effect